Nginx gets halt on 15K connections !!

shahzaib shahzaib shahzaib.cb at gmail.com
Sat May 2 10:32:51 UTC 2015


Hi,

    We've been running nginx-1.8 instance on one of our media server to
serve big static .mp4 files as well as small files such as .jpeg. Nginx is
serving well under 13K connections/sec with 800Mbps outgoing network load
but whenever requests exceed 15K connections, nginx gets halt and 'D'
status goes all over around the nginx workers, as well as network load
drops down to 400Mbps  due to which video streaming gets stuck and after
5-10 minutes load starts dropping and nginx starts stabilizing again as
well as network load gets back to 800Mbps. We've been encountering this
fluctuating situation on each 15minutes gap (Probably).

 We know that 'D' status is most likely due to high Disk I/O and to ensure
that the disk i/o could be the problem under 15K connections, we had
enabled apache on port 8080 for testing same video stream during high load
and buffered on apache, well the stream was fluctuating a bit but there was
no stuck for around 5-10 minutes. In the meantime the same video was worst
on nginx and stucked for 5minutes during buffer.

We suspecting this to be related to something else than Disk I/O, reason is
the same video under high load buffers better on apache(on port 8080). Also
if it is related to high disk I/O, there must be no possibility that video
should should stuck for 5-10 minutes.

It looks to us that nginx gets halt when concurrent connections exceed 15K.
We also tried optimizing backlog directive which slightly improved the
performance but there must be something more related to nginx optimization
which we must be missing. I have linked nginx.conf file, sysctl and vhost
file to get better understanding of our tweaks.

user  nginx;
worker_processes 48;
worker_rlimit_nofile 600000; #2 filehandlers for each connection
#error_log  logs/error.log;
#error_log  logs/error.log  notice;
error_log  /var/log/nginx/error.log  error;
#error_log /dev/null;
#pid        logs/nginx.pid;


events {
    worker_connections  2048;
    use epoll;
#       use kqueue;
}
http {
     include       mime.types;
     default_type  application/octet-stream;
 #    client_max_body_size 800M;
     client_body_buffer_size 128K;
     output_buffers 1 512k;
     sendfile_max_chunk 128k;
     client_header_buffer_size 256k;
     large_client_header_buffers 4 256k;
#    fastcgi_buffers 512 8k;
#    proxy_buffers   512 8k;
#    fastcgi_read_timeout 300s;
     server_tokens off; #Conceals nginx version
     access_log off;
#    access_log /var/log/nginx/access.log;
     sendfile        off;
#    sendfile         ;
     tcp_nodelay on;
     aio on;
     directio 512;
#    tcp_nopush     on;
     client_header_timeout  120s;
     client_body_timeout 120s;
     send_timeout     120s;
     keepalive_timeout  15;
 gzip on;
    gzip_vary on;
    gzip_disable "MSIE [1-6]\.";
    gzip_proxied any;
    gzip_http_version 1.0;
    gzip_min_length  1280;
    gzip_comp_level  6;
    gzip_buffers  16 8k;
    gzip_types    text/plain text/xml text/css application/x-javascript
image/png image/x-icon image/gif image/jpeg image/jpg application/xml
application/xml+rss text/javascr     ipt application/atom+xml;
     include /usr/local/nginx/conf/vhosts/*.conf;
#     open_file_cache          max=2000 inactive=20s;
#     open_file_cache_valid    60s;
#     open_file_cache_min_uses 5;
#     open_file_cache_errors   off;

}

sysctl.conf main config :

fs.file-max = 700000
net.core.wmem_max=6291456
net.core.rmem_max=6291456
net.ipv4.tcp_rmem= 10240 87380 6291456
net.ipv4.tcp_wmem= 10240 87380 6291456
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_sack = 1
net.ipv4.tcp_no_metrics_save = 1
net.core.netdev_max_backlog = 10000

net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
net.ipv6.conf.eth0.disable_ipv6 = 1
net.ipv6.conf.eth1.disable_ipv6 = 1
net.ipv6.conf.ppp0.disable_ipv6 = 1
net.ipv6.conf.tun0.disable_ipv6 = 1
vm.dirty_background_ratio = 50
vm.dirty_ratio = 80
net.ipv4.tcp_fin_timeout = 30
net.ipv4.ip_local_port_range=1024 65000
net.ipv4.tcp_tw_reuse = 1
net.netfilter.nf_conntrack_tcp_timeout_established = 54000
net.ipv4.netfilter.ip_conntrack_generic_timeout = 120
net.ipv4.tcp_syn_retries=2
net.ipv4.tcp_synack_retries=2
net.ipv4.netfilter.ip_conntrack_max = 90536
net.core.somaxconn = 10000

Vhost :

server {
        listen  80 backlog=10000;
        server_name archive3.domain.com archive3.domain.com
www.archive3.domain.com www.archive3.domain.com;
        access_log off;
        location / {
            root   /content/archive;
            index index.html index.htm index.php;
           autoindex off;
}

location /files/thumbs/ {
        root /data/nginx/archive;
        add_header X-Cache SSD;
        expires max;
}

location ~ \.(flv)$ {
                flv;
                root /content/archive;
#                aio on;
#                directio 512;
#                output_buffers 1 2m;
                expires 7d;
                valid_referers none blocked domain.com *.domain.com *.
facebook.com *.domain.com *.twitter.com *.domain.com *.gear3rd.net
domain.com *.domain.com tunemedia.tv www.tunemedia.tv embed.tunemedia.tv;
                if ($invalid_referer) {
                    return   403;
                }
                }


location ~ \.(mp4)$ {
                mp4;
                mp4_buffer_size 4M;
                mp4_max_buffer_size 10M;
                expires 7d;
                root /content/archive;
                 valid_referers none blocked  domain.com *.domain.com *.
facebook.com *.domain.com *.twitter.com *.domain.com *.gear3rd.net
domain.com *.domain.com tunemedia.tv www.tunemedia.tv embed.tunemedia.tv;
                if ($invalid_referer) {
                    return   403;
                }
                }

 # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
        location ~ \.php$ {
            root /content/archive;
            fastcgi_pass   127.0.0.1:9000;
           fastcgi_index  index.php;
            fastcgi_param  SCRIPT_FILENAME
 $document_root$fastcgi_script_name;
            include        fastcgi_params;
           fastcgi_read_timeout 10000;
        }

        location ~ /\.ht {
            deny  all;
        }


location ~ ^/(status|ping)$ {
     access_log off;
     allow 127.0.0.1;

     deny all;
     fastcgi_param  SCRIPT_FILENAME  $document_root$fastcgi_script_name;
        include fastcgi_params;
     fastcgi_pass 127.0.0.1:9000;
}
}

Server Specs :

L5630 (8cores, 16threads)
RAM 64GB
12 x 3TB @ SATA Hardware Raid-6

Here's the screenshot of server load during 15K connections:

http://prntscr.com/70l68q

Regards.
Shahzaib
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx/attachments/20150502/86ef76fa/attachment.html>


More information about the nginx mailing list