Nginx gets halt on 15K connections !!

shahzaib shahzaib shahzaib.cb at gmail.com
Thu May 7 13:38:23 UTC 2015


Hi,

  It looks like we made the false calculation based on entertaining
concurrent connections per seconds and worker_connections limit was set to
be very low. I've increased this limit to 16000 and issue looks to be
fixed. Here's the mechanism i used to calculate concurrent connections/sec:

 worker_processes * worker_connections / keepalive_timeout == concurrent
connections per second

Concurrent connections on our server is around 15K . Based on this i used
the following values :

 48 * 16000 / 15 == 51200/sec

Can somebody point to me if the calculation method is false ?

Regards.
Shahzaib

On Sun, May 3, 2015 at 3:11 AM, shahzaib shahzaib <shahzaib.cb at gmail.com>
wrote:

> Experts,
>
>         Could you please do me a favor in order to solve this problem ?
>
> Regards.
> Shahzaib
>
> On Sat, May 2, 2015 at 3:32 PM, shahzaib shahzaib <shahzaib.cb at gmail.com>
> wrote:
>
>> Hi,
>>
>>     We've been running nginx-1.8 instance on one of our media server to
>> serve big static .mp4 files as well as small files such as .jpeg. Nginx is
>> serving well under 13K connections/sec with 800Mbps outgoing network load
>> but whenever requests exceed 15K connections, nginx gets halt and 'D'
>> status goes all over around the nginx workers, as well as network load
>> drops down to 400Mbps  due to which video streaming gets stuck and after
>> 5-10 minutes load starts dropping and nginx starts stabilizing again as
>> well as network load gets back to 800Mbps. We've been encountering this
>> fluctuating situation on each 15minutes gap (Probably).
>>
>>  We know that 'D' status is most likely due to high Disk I/O and to
>> ensure that the disk i/o could be the problem under 15K connections, we had
>> enabled apache on port 8080 for testing same video stream during high load
>> and buffered on apache, well the stream was fluctuating a bit but there was
>> no stuck for around 5-10 minutes. In the meantime the same video was worst
>> on nginx and stucked for 5minutes during buffer.
>>
>> We suspecting this to be related to something else than Disk I/O, reason
>> is the same video under high load buffers better on apache(on port 8080).
>> Also if it is related to high disk I/O, there must be no possibility that
>> video should should stuck for 5-10 minutes.
>>
>> It looks to us that nginx gets halt when concurrent connections exceed
>> 15K. We also tried optimizing backlog directive which slightly improved the
>> performance but there must be something more related to nginx optimization
>> which we must be missing. I have linked nginx.conf file, sysctl and vhost
>> file to get better understanding of our tweaks.
>>
>> user  nginx;
>> worker_processes 48;
>> worker_rlimit_nofile 600000; #2 filehandlers for each connection
>> #error_log  logs/error.log;
>> #error_log  logs/error.log  notice;
>> error_log  /var/log/nginx/error.log  error;
>> #error_log /dev/null;
>> #pid        logs/nginx.pid;
>>
>>
>> events {
>>     worker_connections  2048;
>>     use epoll;
>> #       use kqueue;
>> }
>> http {
>>      include       mime.types;
>>      default_type  application/octet-stream;
>>  #    client_max_body_size 800M;
>>      client_body_buffer_size 128K;
>>      output_buffers 1 512k;
>>      sendfile_max_chunk 128k;
>>      client_header_buffer_size 256k;
>>      large_client_header_buffers 4 256k;
>> #    fastcgi_buffers 512 8k;
>> #    proxy_buffers   512 8k;
>> #    fastcgi_read_timeout 300s;
>>      server_tokens off; #Conceals nginx version
>>      access_log off;
>> #    access_log /var/log/nginx/access.log;
>>      sendfile        off;
>> #    sendfile         ;
>>      tcp_nodelay on;
>>      aio on;
>>      directio 512;
>> #    tcp_nopush     on;
>>      client_header_timeout  120s;
>>      client_body_timeout 120s;
>>      send_timeout     120s;
>>      keepalive_timeout  15;
>>  gzip on;
>>     gzip_vary on;
>>     gzip_disable "MSIE [1-6]\.";
>>     gzip_proxied any;
>>     gzip_http_version 1.0;
>>     gzip_min_length  1280;
>>     gzip_comp_level  6;
>>     gzip_buffers  16 8k;
>>     gzip_types    text/plain text/xml text/css application/x-javascript
>> image/png image/x-icon image/gif image/jpeg image/jpg application/xml
>> application/xml+rss text/javascr     ipt application/atom+xml;
>>      include /usr/local/nginx/conf/vhosts/*.conf;
>> #     open_file_cache          max=2000 inactive=20s;
>> #     open_file_cache_valid    60s;
>> #     open_file_cache_min_uses 5;
>> #     open_file_cache_errors   off;
>>
>> }
>>
>> sysctl.conf main config :
>>
>> fs.file-max = 700000
>> net.core.wmem_max=6291456
>> net.core.rmem_max=6291456
>> net.ipv4.tcp_rmem= 10240 87380 6291456
>> net.ipv4.tcp_wmem= 10240 87380 6291456
>> net.ipv4.tcp_window_scaling = 1
>> net.ipv4.tcp_timestamps = 1
>> net.ipv4.tcp_sack = 1
>> net.ipv4.tcp_no_metrics_save = 1
>> net.core.netdev_max_backlog = 10000
>>
>> net.ipv6.conf.all.disable_ipv6 = 1
>> net.ipv6.conf.default.disable_ipv6 = 1
>> net.ipv6.conf.lo.disable_ipv6 = 1
>> net.ipv6.conf.eth0.disable_ipv6 = 1
>> net.ipv6.conf.eth1.disable_ipv6 = 1
>> net.ipv6.conf.ppp0.disable_ipv6 = 1
>> net.ipv6.conf.tun0.disable_ipv6 = 1
>> vm.dirty_background_ratio = 50
>> vm.dirty_ratio = 80
>> net.ipv4.tcp_fin_timeout = 30
>> net.ipv4.ip_local_port_range=1024 65000
>> net.ipv4.tcp_tw_reuse = 1
>> net.netfilter.nf_conntrack_tcp_timeout_established = 54000
>> net.ipv4.netfilter.ip_conntrack_generic_timeout = 120
>> net.ipv4.tcp_syn_retries=2
>> net.ipv4.tcp_synack_retries=2
>> net.ipv4.netfilter.ip_conntrack_max = 90536
>> net.core.somaxconn = 10000
>>
>> Vhost :
>>
>> server {
>>         listen  80 backlog=10000;
>>         server_name archive3.domain.com archive3.domain.com
>> www.archive3.domain.com www.archive3.domain.com;
>>         access_log off;
>>         location / {
>>             root   /content/archive;
>>             index index.html index.htm index.php;
>>            autoindex off;
>> }
>>
>> location /files/thumbs/ {
>>         root /data/nginx/archive;
>>         add_header X-Cache SSD;
>>         expires max;
>> }
>>
>> location ~ \.(flv)$ {
>>                 flv;
>>                 root /content/archive;
>> #                aio on;
>> #                directio 512;
>> #                output_buffers 1 2m;
>>                 expires 7d;
>>                 valid_referers none blocked domain.com *.domain.com *.
>> facebook.com *.domain.com *.twitter.com *.domain.com *.gear3rd.net
>> domain.com *.domain.com tunemedia.tv www.tunemedia.tv embed.tunemedia.tv;
>>                 if ($invalid_referer) {
>>                     return   403;
>>                 }
>>                 }
>>
>>
>> location ~ \.(mp4)$ {
>>                 mp4;
>>                 mp4_buffer_size 4M;
>>                 mp4_max_buffer_size 10M;
>>                 expires 7d;
>>                 root /content/archive;
>>                  valid_referers none blocked  domain.com *.domain.com *.
>> facebook.com *.domain.com *.twitter.com *.domain.com *.gear3rd.net
>> domain.com *.domain.com tunemedia.tv www.tunemedia.tv embed.tunemedia.tv;
>>                 if ($invalid_referer) {
>>                     return   403;
>>                 }
>>                 }
>>
>>  # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
>>         location ~ \.php$ {
>>             root /content/archive;
>>             fastcgi_pass   127.0.0.1:9000;
>>            fastcgi_index  index.php;
>>             fastcgi_param  SCRIPT_FILENAME
>>  $document_root$fastcgi_script_name;
>>             include        fastcgi_params;
>>            fastcgi_read_timeout 10000;
>>         }
>>
>>         location ~ /\.ht {
>>             deny  all;
>>         }
>>
>>
>> location ~ ^/(status|ping)$ {
>>      access_log off;
>>      allow 127.0.0.1;
>>
>>      deny all;
>>      fastcgi_param  SCRIPT_FILENAME  $document_root$fastcgi_script_name;
>>         include fastcgi_params;
>>      fastcgi_pass 127.0.0.1:9000;
>> }
>> }
>>
>> Server Specs :
>>
>> L5630 (8cores, 16threads)
>> RAM 64GB
>> 12 x 3TB @ SATA Hardware Raid-6
>>
>> Here's the screenshot of server load during 15K connections:
>>
>> http://prntscr.com/70l68q
>>
>> Regards.
>> Shahzaib
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx/attachments/20150507/7cfd28a5/attachment.html>


More information about the nginx mailing list