[PATCH] always process short preread body

Maxim Dounin mdounin at mdounin.ru
Tue Sep 30 23:13:50 MSD 2008


Hello!

On Thu, Sep 25, 2008 at 06:40:56PM +0400, Maxim Dounin wrote:

> On Thu, Sep 25, 2008 at 09:25:08PM +0900, John Fremlin wrote:
>
> [...]
>>>   From strace in your message, it looks like the real problems are:
>>>
>>> 1. Backend, which hasn't closed connection after sending reply.
>>
>> Yes, the backend does not close.
>
> Ok, so I was right and backend isn't http-complaint.
>
>>> 2. The fact that nginx waits for connection close before it
>>> realizes that request was completed, even if got 'Content-Length'
>>> header and appropriate number of body bytes.
>>
>> And 3. nginx never forwards the body of the message after the time out,
>> (even though it has read it) sending only the header
>> with content-length!
>
> Yep, thats another problem in nginx with handling such buggy backends.

[...]

> The problem is that patch penalizes configurations with non-buggy  
> backends in an attempt to solve problem with buggy ones.
>
> I'm not SPEWS^W^W not Igor, but it looks for me that the chances the 
> patch will be accepted is rather small.
>
> On the other hand I've seen at least two similar reports in last month or 
> two, and it will be good for nginx to accept such backend replies.  And 
> handling of this situation correctly is anyway required to support 
> persistent connections in future (if any).
>
> I believe I will be able to reproduce the problem and I'll try to  
> produce more correct patch as time permits.

I've finally build two patches for this, attached.

The patch-nginx-proxy-flush.txt patch adds flushing of partially 
filled buffers in pipe if upstream times out.  This makes nginx 
replies as close as possible to upstream's ones.

The patch-nginx-proxy-length.txt allows nginx to use 
Content-Length it got from upstream as a hint that there is no 
need to wait for upstream's connection close.

Patches are independent and apply cleanly in any order.

Igor, could you please take a look?

Maxim Dounin
-------------- next part --------------
# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1222799784 -14400
# Node ID 6c9923c9a9f90c335adccd0563858d181369c959
# Parent  35409f39a096a1575bb24e34c61ee1b4b553aa94
Proxy: flush partially filled buffers on upstream timeout.

Fix typo in comment while here.

diff --git a/src/event/ngx_event_pipe.c b/src/event/ngx_event_pipe.c
--- a/src/event/ngx_event_pipe.c
+++ b/src/event/ngx_event_pipe.c
@@ -10,7 +10,8 @@
 #include <ngx_event_pipe.h>
 
 
-static ngx_int_t ngx_event_pipe_read_upstream(ngx_event_pipe_t *p);
+static ngx_int_t ngx_event_pipe_read_upstream(ngx_event_pipe_t *p,
+                                              ngx_int_t do_flush);
 static ngx_int_t ngx_event_pipe_write_to_downstream(ngx_event_pipe_t *p);
 
 static ngx_int_t ngx_event_pipe_write_chain_to_temp_file(ngx_event_pipe_t *p);
@@ -40,7 +41,7 @@ ngx_event_pipe(ngx_event_pipe_t *p, ngx_
 
         p->log->action = "reading upstream";
 
-        if (ngx_event_pipe_read_upstream(p) == NGX_ABORT) {
+        if (ngx_event_pipe_read_upstream(p, 0) == NGX_ABORT) {
             return NGX_ABORT;
         }
 
@@ -88,8 +89,27 @@ ngx_event_pipe(ngx_event_pipe_t *p, ngx_
 }
 
 
+ngx_int_t
+ngx_event_pipe_flush(ngx_event_pipe_t *p)
+{
+    p->log->action = "reading upstream";
+
+    if (ngx_event_pipe_read_upstream(p, 1) == NGX_ABORT) {
+        return NGX_ABORT;
+    }
+
+    p->log->action = "sending to client";
+
+    if (ngx_event_pipe_write_to_downstream(p) == NGX_ABORT) {
+        return NGX_ABORT;
+    }
+
+    return NGX_OK;
+}
+
+
 static ngx_int_t
-ngx_event_pipe_read_upstream(ngx_event_pipe_t *p)
+ngx_event_pipe_read_upstream(ngx_event_pipe_t *p, ngx_int_t do_flush)
 {
     ssize_t       n, size;
     ngx_int_t     rc;
@@ -97,6 +117,9 @@ ngx_event_pipe_read_upstream(ngx_event_p
     ngx_chain_t  *chain, *cl, *ln;
 
     if (p->upstream_eof || p->upstream_error || p->upstream_done) {
+        if (do_flush) {
+            goto flush;
+        }
         return NGX_OK;
     }
 
@@ -330,6 +353,8 @@ ngx_event_pipe_read_upstream(ngx_event_p
             p->free_raw_bufs = cl;
         }
     }
+
+flush:
 
 #if (NGX_DEBUG)
 
@@ -928,7 +953,7 @@ ngx_event_pipe_add_free_buf(ngx_event_pi
         return NGX_OK;
     }
 
-    /* the first free buf is partialy filled, thus add the free buf after it */
+    /* the first free buf is partially filled, thus add the free buf after it */
 
     cl->next = p->free_raw_bufs->next;
     p->free_raw_bufs->next = cl;
diff --git a/src/event/ngx_event_pipe.h b/src/event/ngx_event_pipe.h
--- a/src/event/ngx_event_pipe.h
+++ b/src/event/ngx_event_pipe.h
@@ -87,6 +87,7 @@ struct ngx_event_pipe_s {
 
 
 ngx_int_t ngx_event_pipe(ngx_event_pipe_t *p, ngx_int_t do_write);
+ngx_int_t ngx_event_pipe_flush(ngx_event_pipe_t *p);
 ngx_int_t ngx_event_pipe_copy_input_filter(ngx_event_pipe_t *p, ngx_buf_t *buf);
 ngx_int_t ngx_event_pipe_add_free_buf(ngx_event_pipe_t *p, ngx_buf_t *b);
 
diff --git a/src/http/ngx_http_upstream.c b/src/http/ngx_http_upstream.c
--- a/src/http/ngx_http_upstream.c
+++ b/src/http/ngx_http_upstream.c
@@ -2034,6 +2034,16 @@ ngx_http_upstream_process_body(ngx_event
         } else {
             p->upstream_error = 1;
             ngx_connection_error(c, NGX_ETIMEDOUT, "upstream timed out");
+
+            if (ngx_event_pipe_flush(p) == NGX_ABORT) {
+
+                if (downstream->destroyed) {
+                    return;
+                }
+
+                ngx_http_upstream_finalize_request(r, u, 0);
+                return;
+            }
         }
 
     } else {
-------------- next part --------------
# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1222800716 -14400
# Node ID 2316a58455cd955e0acbfcc36af2fcbf23e273fe
# Parent  6c9923c9a9f90c335adccd0563858d181369c959
Proxy: don't wait for upstream close if we know length.

This change basically deligates right to control how many bytes may be
buffered by ngx_event_pipe to input filter by means of changing p->length.

Currently p->length is set to NGX_MAX_OFF_T_VALUE if content length isn't
known (and thus input must be terminated by upstream connection close), or
to content length if it's known.

Futher improvement may involve correct parsing of chunked transfer encoding -
by setting p->length in input filter as approprate.  E.g. set it to 2 if you
just parsed chunk end and expect other chunk to start (chunk takes at least
3 bytes, "0" CRLF, but RFC 2616 recomends to be tolerant and recognize bare
LF too).

diff --git a/src/event/ngx_event_pipe.c b/src/event/ngx_event_pipe.c
--- a/src/event/ngx_event_pipe.c
+++ b/src/event/ngx_event_pipe.c
@@ -328,22 +328,26 @@ ngx_event_pipe_read_upstream(ngx_event_p
 
             if (n >= size) {
                 cl->buf->last = cl->buf->end;
-
-                /* STUB */ cl->buf->num = p->num++;
-
-                if (p->input_filter(p, cl->buf) == NGX_ERROR) {
-                    return NGX_ABORT;
-                }
-
                 n -= size;
-                ln = cl;
-                cl = cl->next;
-                ngx_free_chain(p->pool, ln);
 
             } else {
                 cl->buf->last += n;
                 n = 0;
+
+                if (cl->buf->last - cl->buf->pos < p->length) {
+                    continue;
+                }
             }
+
+            /* STUB */ cl->buf->num = p->num++;
+
+            if (p->input_filter(p, cl->buf) == NGX_ERROR) {
+                return NGX_ABORT;
+            }
+
+            ln = cl;
+            cl = cl->next;
+            ngx_free_chain(p->pool, ln);
         }
 
         if (cl) {
@@ -409,6 +413,9 @@ flush:
                        cl->buf->file_pos,
                        cl->buf->file_last - cl->buf->file_pos);
     }
+
+    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, p->log, 0,
+                   "pipe length: %O", p->length);
 
 #endif
 
@@ -856,6 +863,16 @@ ngx_event_pipe_copy_input_filter(ngx_eve
     }
     p->last_in = &cl->next;
 
+    if (p->length == NGX_MAX_OFF_T_VALUE) {
+        return NGX_OK;
+    }
+
+    p->length -= b->last - b->pos;
+
+    if (p->length <= 0) {
+        p->upstream_done = 1;
+    }
+
     return NGX_OK;
 }
 
diff --git a/src/event/ngx_event_pipe.h b/src/event/ngx_event_pipe.h
--- a/src/event/ngx_event_pipe.h
+++ b/src/event/ngx_event_pipe.h
@@ -65,6 +65,7 @@ struct ngx_event_pipe_s {
     ssize_t            busy_size;
 
     off_t              read_length;
+    off_t              length;
 
     off_t              max_temp_file_size;
     ssize_t            temp_file_write_size;
diff --git a/src/http/ngx_http_upstream.c b/src/http/ngx_http_upstream.c
--- a/src/http/ngx_http_upstream.c
+++ b/src/http/ngx_http_upstream.c
@@ -1668,6 +1668,13 @@ ngx_http_upstream_send_response(ngx_http
     p->pool = r->pool;
     p->log = c->log;
 
+    if (r->headers_out.content_length_n != -1) {
+        p->length = r->headers_out.content_length_n;
+
+    } else {
+        p->length = NGX_MAX_OFF_T_VALUE;
+    }
+
     p->cacheable = u->cacheable || u->store;
 
     p->temp_file = ngx_pcalloc(r->pool, sizeof(ngx_temp_file_t));


More information about the nginx mailing list