[NGINX] cache process manager consume a lot of CPU at reload and has to be restarted

Srebrenko Šehić ssehic at gmail.com
Wed Apr 6 19:55:58 MSD 2011


2011/4/5 Srebrenko Šehić <ssehic at gmail.com>:

> I will try to get a gdb trace of running wild process.

Some more info about this problem. I had cache manager in a spinlock
using 100% CPU. I did a ktrace (this is on OpenBSD 4.8/amd64 running
0.8.50) and here is the output (edited for brevity):

# ps auxww | grep 28055 | grep -v grep
_proxy   28055 99.1  1.9 646012 39096 ??  R/0    2:28PM   75:07.06
nginx: cache manager process (nginx)

# kdump -R | head -50
 28055 nginx    1302104178.942729 EMUL  "native"
 28055 nginx    0.000051 RET   gettimeofday 0
 28055 nginx    0.000031 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000011 RET   gettimeofday 0
 28055 nginx    0.000204 CALL  getpid()
 28055 nginx    0.000008 RET   getpid 28055/0x6d97
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000010 RET   gettimeofday 0
 28055 nginx    0.000027 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000020 RET   gettimeofday 0
 28055 nginx    0.000040 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000009 RET   gettimeofday 0
 28055 nginx    0.000027 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000027 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000015 CALL  getpid()
 28055 nginx    0.000004 RET   getpid 28055/0x6d97
 28055 nginx    0.000019 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000007 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000007 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000007 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000015 CALL  getpid()
 28055 nginx    0.000011 RET   getpid 28055/0x6d97
 28055 nginx    0.000018 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000027 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000027 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000027 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000016 CALL  getpid()
 28055 nginx    0.000003 RET   getpid 28055/0x6d97
 28055 nginx    0.000018 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000007 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000007 RET   gettimeofday 0
 28055 nginx    0.000026 CALL  gettimeofday(0x7f7ffffea2a0,0)
 28055 nginx    0.000006 RET   gettimeofday 0

As you can see, nginx is stuck at calling gettimeofday() and getpid()
in an endless loop. Filtering those 2 syscalls in the kdump output
gives this:

# kdump -R | grep -v gettime | grep -v getpid
 28055 nginx    1302104178.942729 EMUL  "native"
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000009 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000012 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000011 RET   __sysctl 0
 28055 nginx    0.000007 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000017 RET   __sysctl 0
 28055 nginx    0.000018 CALL  sched_yield()
 28055 nginx    0.000013 RET   sched_yield 0
 28055 nginx    0.000007 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000015 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000022 RET   __sysctl 0
 28055 nginx    0.000007 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000015 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000017 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000007 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000007 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000015 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000007 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000014 RET   __sysctl 0
 28055 nginx    0.000008 CALL  __sysctl(1.37,0x7f7ffffea130,0x7f7ffffea118,0,0)
 28055 nginx    0.000013 RET   __sysctl 0

Something is "rotten" in the cache manager.

I also found out that setting "proxy_cache_path .... max_size=10k" (a
ridiculously small number) while there is traffic on the virtualhost
triggers the problem even *without* nginx receiving a SIGHUP.

Here is a gdb trace (no debugging symbols - I will try to get that):

(gdb) attach 28055
Attaching to program: /usr/local/sbin/nginx, process 28055
....
....
0x00000002072246fa in getpid () from /usr/lib/libc.so.56.0
(gdb) bt
#0  0x00000002072246fa in getpid () from /usr/lib/libc.so.56.0
#1  0x000000020728ce99 in arc4random_stir () from /usr/lib/libc.so.56.0
#2  0x000000020728cef9 in arc4random_buf () from /usr/lib/libc.so.56.0
#3  0x000000020726a795 in _citrus_utf8_ctype_wcsrtombs () from
/usr/lib/libc.so.56.0
#4  0x000000020726c12d in free () from /usr/lib/libc.so.56.0
#5  0x000000020726cba3 in malloc () from /usr/lib/libc.so.56.0
#6  0x0000000000423c3e in ngx_alloc ()
#7  0x000000000044cde2 in ngx_http_file_cache_set_header ()
#8  0x000000000044d080 in ngx_http_file_cache_set_header ()
#9  0x0000000000427329 in ngx_single_process_cycle ()
#10 0x00000000004219aa in ngx_event_expire_timers ()
#11 0x000000000042189d in ngx_process_events_and_timers ()
#12 0x0000000000427f57 in ngx_master_process_cycle ()
#13 0x0000000000425cff in ngx_spawn_process ()
#14 0x0000000000426833 in ngx_single_process_cycle ()
#15 0x00000000004276c6 in ngx_master_process_cycle ()
#16 0x000000000040e616 in main ()
(gdb) bt
#0  0x00000002072246fa in getpid () from /usr/lib/libc.so.56.0
#1  0x000000020728ce99 in arc4random_stir () from /usr/lib/libc.so.56.0
#2  0x000000020728cef9 in arc4random_buf () from /usr/lib/libc.so.56.0
#3  0x000000020726a795 in _citrus_utf8_ctype_wcsrtombs () from
/usr/lib/libc.so.56.0
#4  0x000000020726c12d in free () from /usr/lib/libc.so.56.0
#5  0x000000020726cba3 in malloc () from /usr/lib/libc.so.56.0
#6  0x0000000000423c3e in ngx_alloc ()
#7  0x000000000044cde2 in ngx_http_file_cache_set_header ()
#8  0x000000000044d080 in ngx_http_file_cache_set_header ()
#9  0x0000000000427329 in ngx_single_process_cycle ()
#10 0x00000000004219aa in ngx_event_expire_timers ()
#11 0x000000000042189d in ngx_process_events_and_timers ()
#12 0x0000000000427f57 in ngx_master_process_cycle ()
#13 0x0000000000425cff in ngx_spawn_process ()
#14 0x0000000000426833 in ngx_single_process_cycle ()
#15 0x00000000004276c6 in ngx_master_process_cycle ()
#16 0x000000000040e616 in main ()
(gdb) c
Continuing.
^C
Program received signal SIGINT, Interrupt.
0x000000020722cc4a in gettimeofday () from /usr/lib/libc.so.56.0
(gdb) bt
#0  0x000000020722cc4a in gettimeofday () from /usr/lib/libc.so.56.0
#1  0x0000000000417982 in ngx_time_update ()
#2  0x000000000044c73a in ngx_http_file_cache_set_header ()
#3  0x000000000044d08a in ngx_http_file_cache_set_header ()
#4  0x0000000000427329 in ngx_single_process_cycle ()
#5  0x00000000004219aa in ngx_event_expire_timers ()
#6  0x000000000042189d in ngx_process_events_and_timers ()
#7  0x0000000000427f57 in ngx_master_process_cycle ()
#8  0x0000000000425cff in ngx_spawn_process ()
#9  0x0000000000426833 in ngx_single_process_cycle ()
#10 0x00000000004276c6 in ngx_master_process_cycle ()
#11 0x000000000040e616 in main ()
(gdb) c
Continuing.
^C
Program received signal SIGINT, Interrupt.
0x000000020722cc4a in gettimeofday () from /usr/lib/libc.so.56.0
(gdb) bt
#0  0x000000020722cc4a in gettimeofday () from /usr/lib/libc.so.56.0
#1  0x0000000000417982 in ngx_time_update ()
#2  0x000000000044c73a in ngx_http_file_cache_set_header ()
#3  0x000000000044d08a in ngx_http_file_cache_set_header ()
#4  0x0000000000427329 in ngx_single_process_cycle ()
#5  0x00000000004219aa in ngx_event_expire_timers ()
#6  0x000000000042189d in ngx_process_events_and_timers ()
#7  0x0000000000427f57 in ngx_master_process_cycle ()
#8  0x0000000000425cff in ngx_spawn_process ()
#9  0x0000000000426833 in ngx_single_process_cycle ()
#10 0x00000000004276c6 in ngx_master_process_cycle ()
#11 0x000000000040e616 in main ()
(gdb) c
Continuing.
^C
Program received signal SIGINT, Interrupt.
0x000000020722cc4a in gettimeofday () from /usr/lib/libc.so.56.0
(gdb) bt
#0  0x000000020722cc4a in gettimeofday () from /usr/lib/libc.so.56.0
#1  0x0000000000417982 in ngx_time_update ()
#2  0x000000000044c73a in ngx_http_file_cache_set_header ()
#3  0x000000000044d08a in ngx_http_file_cache_set_header ()
#4  0x0000000000427329 in ngx_single_process_cycle ()
#5  0x00000000004219aa in ngx_event_expire_timers ()
#6  0x000000000042189d in ngx_process_events_and_timers ()
#7  0x0000000000427f57 in ngx_master_process_cycle ()
#8  0x0000000000425cff in ngx_spawn_process ()
#9  0x0000000000426833 in ngx_single_process_cycle ()
#10 0x00000000004276c6 in ngx_master_process_cycle ()
#11 0x000000000040e616 in main ()

Any input is really appreciated.

Thanks,
Srebrenko



More information about the nginx mailing list