[BUG?]fail_timeout/max_fails: code doesn't do what doc says

Dmitry Popov dp at highloadlab.com
Tue May 21 12:43:31 UTC 2013


Sorry, clipboard issue, corrupted patch send, here's a good one:

Upstream bugfix: fail_timeout could be ignored

Upstream could be disabled no matter what fail_timeout is (if requests fail 
each fail_timeout seconds).

Signed-off-by: Dmitry Popov <dp at highloadlab.com>

diff -ur old/src/http/ngx_http_upstream_round_robin.h new/src/http/ngx_http_upstream_round_robin.h
--- old/src/http/ngx_http_upstream_round_robin.h	2013-05-21 13:22:09.478903537 +0400
+++ new/src/http/ngx_http_upstream_round_robin.h	2013-05-21 15:38:05.488236353 +0400
@@ -24,13 +24,17 @@
     ngx_int_t                       weight;
 
     ngx_uint_t                      fails;
-    time_t                          accessed;
-    time_t                          checked;
+    union {
+        time_t                      first_fail;
+        time_t                      unavail_from;
+    };
 
     ngx_uint_t                      max_fails;
     time_t                          fail_timeout;
 
-    ngx_uint_t                      down;          /* unsigned  down:1; */
+    unsigned                        avail:1;   
+    unsigned                        avail_check:1;   
+    unsigned                        down:1;
 
 #if (NGX_HTTP_SSL)
     ngx_ssl_session_t              *ssl_session;   /* local to a process */
diff -ur old/src/http/modules/ngx_http_upstream_ip_hash_module.c new/src/http/modules/ngx_http_upstream_ip_hash_module.c
--- old/src/http/modules/ngx_http_upstream_ip_hash_module.c	2013-05-21 13:22:09.475570203 +0400
+++ new/src/http/modules/ngx_http_upstream_ip_hash_module.c	2013-05-21 15:44:22.671564821 +0400
@@ -208,12 +208,14 @@
 
             if (!peer->down) {
 
-                if (peer->max_fails == 0 || peer->fails < peer->max_fails) {
+                if (peer->avail) {
                     break;
                 }
 
-                if (now - peer->checked > peer->fail_timeout) {
-                    peer->checked = now;
+                if (now - peer->unavail_from >= peer->fail_timeout
+                    && !peer->avail_check)
+                {
+                    peer->avail_check = 1;
                     break;
                 }
             }
diff -ur old/src/http/modules/ngx_http_upstream_least_conn_module.c new/src/http/modules/ngx_http_upstream_least_conn_module.c
--- old/src/http/modules/ngx_http_upstream_least_conn_module.c	2013-05-21 13:22:09.472236869 +0400
+++ new/src/http/modules/ngx_http_upstream_least_conn_module.c	2013-05-21 15:45:35.254897219 +0400
@@ -203,9 +203,9 @@
             continue;
         }
 
-        if (peer->max_fails
-            && peer->fails >= peer->max_fails
-            && now - peer->checked <= peer->fail_timeout)
+        if (!peer->avail
+            && (now - peer->unavail_from < peer->fail_timeout
+                || peer->avail_check))
         {
             continue;
         }
@@ -260,9 +260,9 @@
                 continue;
             }
 
-            if (peer->max_fails
-                && peer->fails >= peer->max_fails
-                && now - peer->checked <= peer->fail_timeout)
+            if (!peer->avail
+                && (now - peer->unavail_from < peer->fail_timeout
+                    || peer->avail_check))
             {
                 continue;
             }
@@ -282,7 +282,9 @@
     }
 
     best->current_weight -= total;
-    best->checked = now;
+    if (!best->avail) {
+        best->avail_check = 1;
+    }
 
     pc->sockaddr = best->sockaddr;
     pc->socklen = best->socklen;
@@ -331,6 +333,9 @@
 
     for (i = 0; i < peers->number; i++) {
         peers->peer[i].fails = 0;
+        peers->peer[i].avail = 1;
+        peers->peer[i].avail_check = 0;
+        /* peers->peer[i].first_fail = 0; */
     }
 
     pc->name = peers->name;
diff -ur old/src/http/ngx_http_upstream_round_robin.c new/src/http/ngx_http_upstream_round_robin.c
--- old/src/http/ngx_http_upstream_round_robin.c	2013-05-21 13:22:09.472236869 +0400
+++ new/src/http/ngx_http_upstream_round_robin.c	2013-05-21 15:46:07.171563474 +0400
@@ -85,6 +85,8 @@
                 peers->peer[n].weight = server[i].weight;
                 peers->peer[n].effective_weight = server[i].weight;
                 peers->peer[n].current_weight = 0;
+                peers->peer[n].avail = 1;
+                peers->peer[n].avail_check = 0;
                 n++;
             }
         }
@@ -139,6 +141,8 @@
                 backup->peer[n].max_fails = server[i].max_fails;
                 backup->peer[n].fail_timeout = server[i].fail_timeout;
                 backup->peer[n].down = server[i].down;
+                backup->peer[n].avail = 1;
+                backup->peer[n].avail_check = 0;
                 n++;
             }
         }
@@ -196,6 +200,8 @@
         peers->peer[i].current_weight = 0;
         peers->peer[i].max_fails = 1;
         peers->peer[i].fail_timeout = 10;
+        peers->peer[i].avail = 1;
+        peers->peer[i].avail_check = 0;
     }
 
     us->peer.data = peers;
@@ -301,6 +307,8 @@
         peers->peer[0].current_weight = 0;
         peers->peer[0].max_fails = 1;
         peers->peer[0].fail_timeout = 10;
+        peers->peer[0].avail = 1;
+        peers->peer[0].avail_check = 0;
 
     } else {
 
@@ -334,6 +342,8 @@
             peers->peer[i].current_weight = 0;
             peers->peer[i].max_fails = 1;
             peers->peer[i].fail_timeout = 10;
+            peers->peer[i].avail = 1;
+            peers->peer[i].avail_check = 0;
         }
     }
 
@@ -451,6 +461,9 @@
 
     for (i = 0; i < peers->number; i++) {
         peers->peer[i].fails = 0;
+        peers->peer[i].avail = 1;
+        peers->peer[i].avail_check = 0;
+        /* peers->peer[i].first_fail = 0; */
     }
 
     /* ngx_unlock_mutex(peers->mutex); */
@@ -490,9 +503,9 @@
             continue;
         }
 
-        if (peer->max_fails
-            && peer->fails >= peer->max_fails
-            && now - peer->checked <= peer->fail_timeout)
+        if (!peer->avail
+            && (now - peer->unavail_from < peer->fail_timeout
+                || peer->avail_check))
         {
             continue;
         }
@@ -523,7 +536,9 @@
     rrp->tried[n] |= m;
 
     best->current_weight -= total;
-    best->checked = now;
+    if (!best->avail) {
+        best->avail_check = 1;
+    }
 
     return best;
 }
@@ -550,35 +565,49 @@
 
     peer = &rrp->peers->peer[rrp->current];
 
-    if (state & NGX_PEER_FAILED) {
+    if (peer->max_fails) {
         now = ngx_time();
 
-        /* ngx_lock_mutex(rrp->peers->mutex); */
+        if (peer->avail && now - peer->first_fail >= peer->fail_timeout) {
+            peer->fails = 0;
+        }
+
+        if (state & NGX_PEER_FAILED) {
+            /* ngx_lock_mutex(rrp->peers->mutex); */
+
+            if (peer->fails++ == 0) {
+                peer->first_fail = now;
+            }
+
+            if (peer->fails >= peer->max_fails) {
+                peer->avail = 0;
+                peer->unavail_from = now;
+            }
 
-        peer->fails++;
-        peer->accessed = now;
-        peer->checked = now;
+            peer->avail_check = 0;
 
-        if (peer->max_fails) {
             peer->effective_weight -= peer->weight / peer->max_fails;
-        }
 
-        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
-                       "free rr peer failed: %ui %i",
-                       rrp->current, peer->effective_weight);
+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
+                           "free rr peer failed: %ui %i",
+                           rrp->current, peer->effective_weight);
 
-        if (peer->effective_weight < 0) {
-            peer->effective_weight = 0;
-        }
+            if (peer->effective_weight < 0) {
+                peer->effective_weight = 0;
+            }
 
-        /* ngx_unlock_mutex(rrp->peers->mutex); */
+            /* ngx_unlock_mutex(rrp->peers->mutex); */
 
-    } else {
+        } else {
 
-        /* mark peer live if check passed */
+            /* mark peer live if check passed */
 
-        if (peer->accessed < peer->checked) {
-            peer->fails = 0;
+            if (peer->avail_check) {
+                peer->fails = 0;
+                peer->avail = 1;
+                peer->avail_check = 0;
+                /* peers->peer[i].first_fail = 0; */
+            }
         }
     }



More information about the nginx-devel mailing list