keepalive not work with grpc

Maxim Dounin mdounin at mdounin.ru
Wed Aug 22 01:57:50 UTC 2018


Hello!

On Fri, Aug 17, 2018 at 03:36:15AM -0400, hunterqin wrote:

> I used protobuf v310, grpc v181, nginx v1152 and v1140.
> The server is just same as the example async_server on grpc website. 
> https://grpc.io/docs/tutorials/async/helloasync-cpp.html
> You can use it directly.
> https://github.com/grpc/grpc/blob/v1.14.1/examples/cpp/helloworld/greeter_async_server.cc

Thank you for details, I was able to reproduce the problem.  
Please try the patch below.

> By the way, there is another problem found in nginx. Maybe you are intersted
> in it.
> After fixing the problem above, I tried more. The rpc service is simple, it
> just transfers a short string(less than 10 bytes). At first, the grpc client
> , nginx and grpc server works well together. But after some time, the
> service stopped. I used the wireshark to check the problem. The last DATA
> frame from nginx to grpc server is wrong. For example, in correct packet,
> the DATA length is 13, message in protobuf is "Hunter", so the length of
> string in protobuf is 6. 
> protobuf part:
> 0a 06 48 75 6e 74 65 72.
> But the last one is wrong, its DATA frame length is 10, it didn't send the
> message completely
> 0a 06 48 75 6e.
> Then the grpc server just send back a TCP ACK and ignore the wrong message,
> finally nginx stop the TCP connection.
> Then I test for longer string. When the string length is 1000bytes, it can
> finish for about 4000 times. When the string length is 500bytes, it turns to
> 8000+times. I guess some problems may happen in nginx buffer. You could
> check it , too.

The "After fixing the problem above" part is most likely the 
culprit.  If you've simply modified nginx to keep connections 
alive despite the fact that there were unparsed control frames, 
this will quickly result in problems due to incorrect flow 
control because of lost window update frames.  And this matches 
symptoms you've observed.

Please try the following patch to see if it helps:

# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1534897290 -10800
#      Wed Aug 22 03:21:30 2018 +0300
# Node ID 2cc5ef571e7a9a4c299bb3ffd6234529cc2183f1
# Parent  70c6b08973a02551612da4a4273757dc77c70ae2
gRPC: improved keepalive handling.

The code is now able to parse additional control frames after
the response is received, and can send control frames as well.
This fixes keepalive problems as observed with grpc-c, which can
send window update and ping frames after the response.

diff --git a/src/http/modules/ngx_http_grpc_module.c b/src/http/modules/ngx_http_grpc_module.c
--- a/src/http/modules/ngx_http_grpc_module.c
+++ b/src/http/modules/ngx_http_grpc_module.c
@@ -111,6 +111,7 @@ typedef struct {
     unsigned                   output_closed:1;
     unsigned                   parsing_headers:1;
     unsigned                   end_stream:1;
+    unsigned                   done:1;
     unsigned                   status:1;
 
     ngx_http_request_t        *request;
@@ -1074,6 +1075,7 @@ ngx_http_grpc_reinit_request(ngx_http_re
     ctx->output_closed = 0;
     ctx->parsing_headers = 0;
     ctx->end_stream = 0;
+    ctx->done = 0;
     ctx->status = 0;
     ctx->connection = NULL;
 
@@ -1407,6 +1409,27 @@ ngx_http_grpc_body_output_filter(void *d
         rc = NGX_AGAIN;
     }
 
+    if (ctx->done) {
+
+        /*
+         * We have already got the response and were sending some additional
+         * control frames.  Even if there is still something unsent, stop
+         * here anyway.
+         */
+
+        r->upstream->length = 0;
+
+        if (ctx->in == NULL
+            && ctx->out == NULL
+            && ctx->output_closed
+            && ctx->state == ngx_http_grpc_st_start)
+        {
+            r->upstream->keepalive = 1;
+        }
+
+        ngx_post_event(r->upstream->peer.connection->read, &ngx_posted_events);
+    }
+
     return rc;
 }
 
@@ -1832,6 +1855,33 @@ ngx_http_grpc_filter(void *data, ssize_t
             rc = ngx_http_grpc_parse_frame(r, ctx, b);
 
             if (rc == NGX_AGAIN) {
+
+                if (ctx->done) {
+
+                    /*
+                     * We have finished parsing the response and the
+                     * remaining control frames.  If there are unsent
+                     * control frames, post a write event to sent them.
+                     */
+
+                    if (ctx->out) {
+                        ngx_post_event(u->peer.connection->write,
+                                       &ngx_posted_events);
+                        return NGX_AGAIN;
+                    }
+
+                    u->length = 0;
+
+                    if (ctx->in == NULL
+                        && ctx->output_closed
+                        && ctx->state == ngx_http_grpc_st_start)
+                    {
+                        u->keepalive = 1;
+                    }
+   
+                    return NGX_OK;
+                }
+
                 return NGX_AGAIN;
             }
 
@@ -1898,6 +1948,13 @@ ngx_http_grpc_filter(void *data, ssize_t
                 return NGX_ERROR;
             }
 
+            if (ctx->stream_id && ctx->done) {
+                ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
+                              "upstream sent frame for closed stream %ui",
+                              ctx->stream_id);
+                return NGX_ERROR;
+            }
+
             ctx->padding = 0;
         }
 
@@ -1914,17 +1971,7 @@ ngx_http_grpc_filter(void *data, ssize_t
             ctx->state = ngx_http_grpc_st_start;
 
             if (ctx->flags & NGX_HTTP_V2_END_STREAM_FLAG) {
-                u->length = 0;
-
-                if (ctx->in == NULL
-                    && ctx->out == NULL
-                    && ctx->output_closed
-                    && b->last == b->pos)
-                {
-                    u->keepalive = 1;
-                }
-
-                break;
+                ctx->done = 1;
             }
 
             continue;
@@ -2094,17 +2141,8 @@ ngx_http_grpc_filter(void *data, ssize_t
                                    "grpc trailer done");
 
                     if (ctx->end_stream) {
-                        u->length = 0;
-
-                        if (ctx->in == NULL
-                            && ctx->out == NULL
-                            && ctx->output_closed
-                            && b->last == b->pos)
-                        {
-                            u->keepalive = 1;
-                        }
-
-                        return NGX_OK;
+                        ctx->done = 1;
+                        break;
                     }
 
                     ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
@@ -2121,6 +2159,10 @@ ngx_http_grpc_filter(void *data, ssize_t
                 return NGX_ERROR;
             }
 
+            if (rc == NGX_HTTP_PARSE_HEADER_DONE) {
+                continue;
+            }
+
             /* rc == NGX_AGAIN */
 
             if (ctx->rest == 0) {
@@ -2237,17 +2279,7 @@ ngx_http_grpc_filter(void *data, ssize_t
         ctx->state = ngx_http_grpc_st_start;
 
         if (ctx->flags & NGX_HTTP_V2_END_STREAM_FLAG) {
-            u->length = 0;
-
-            if (ctx->in == NULL
-                && ctx->out == NULL
-                && ctx->output_closed
-                && b->last == b->pos)
-            {
-                u->keepalive = 1;
-            }
-
-            break;
+            ctx->done = 1;
         }
     }
 

-- 
Maxim Dounin
http://mdounin.ru/


More information about the nginx mailing list