[PATCH] Range filter: always support ascending multipart ranges.

Robert Burke sharpobject at gmail.com
Thu Apr 11 12:51:57 UTC 2019


Hello,

At Cloudflare we'd like to support multiple byte-ranges if they are
non-overlapping and in ascending order on cache miss or when using
slice. This patch adds support for that. It preserves the existing
multipart range support for response bodies that are in a single
buffer. It does not remove buffers from the passed-in chain, which
seems like it was the biggest issue mdounin raised with the previous
patch.

I was not sure what semantics proxy_cache_max_range_offset should have
for multipart range requests, but the TODO tests seemed to think that
it should apply to requests that have any range beyond the limit. This
could get silly, like it could apply to bytes=5-6,7-8 but not apply to
bytes=5-8. Oh well. I just made it work like the tests say it should.

On second thought, maybe it would be better not to make a totally new
chain here. I could instead set unused buffers to length 0 and sync
and leave them in the chain as we found them.

Below is first the patch for nginx, then the patch for nginx-tests.
Looking forward to your thoughts.

Thanks!

# HG changeset patch
# User Robert Burke <sharpobject at gmail.com>
# Date 1554985755 25200
#      Thu Apr 11 05:29:15 2019 -0700
# Node ID 75b953a2f40d0083ff9aa9585dd6f5e061cf964c
# Parent  a6e23e343081b79eb924da985a414909310aa7a3
Range filter: always support ascending multipart ranges.

Arbitrary multipart ranges still work if the body is in a single buffer.
This patch is based on hucongcong's similar patch from 2017.

diff -r a6e23e343081 -r 75b953a2f40d src/http/modules/ngx_http_mp4_module.c
--- a/src/http/modules/ngx_http_mp4_module.c Tue Apr 09 16:00:30 2019 +0300
+++ b/src/http/modules/ngx_http_mp4_module.c Thu Apr 11 05:29:15 2019 -0700
@@ -568,7 +568,7 @@
     }

     if (start >= 0) {
-        r->single_range = 1;
+        r->ascending_ranges = 1;

         mp4 = ngx_pcalloc(r->pool, sizeof(ngx_http_mp4_file_t));
         if (mp4 == NULL) {
diff -r a6e23e343081 -r 75b953a2f40d
src/http/modules/ngx_http_range_filter_module.c
--- a/src/http/modules/ngx_http_range_filter_module.c Tue Apr 09
16:00:30 2019 +0300
+++ b/src/http/modules/ngx_http_range_filter_module.c Thu Apr 11
05:29:15 2019 -0700
@@ -54,6 +54,8 @@

 typedef struct {
     off_t        offset;
+    ngx_uint_t   index;
+    ngx_uint_t   descending;
     ngx_str_t    boundary_header;
     ngx_array_t  ranges;
 } ngx_http_range_filter_ctx_t;
@@ -72,6 +74,10 @@
     ngx_http_range_filter_ctx_t *ctx, ngx_chain_t *in);
 static ngx_int_t ngx_http_range_multipart_body(ngx_http_request_t *r,
     ngx_http_range_filter_ctx_t *ctx, ngx_chain_t *in);
+static ngx_int_t ngx_http_range_link_boundary_header(ngx_http_request_t *r,
+    ngx_http_range_filter_ctx_t *ctx, ngx_chain_t **ll);
+static ngx_int_t ngx_http_range_link_last_boundary(ngx_http_request_t *r,
+    ngx_http_range_filter_ctx_t *ctx, ngx_chain_t **ll);

 static ngx_int_t ngx_http_range_header_filter_init(ngx_conf_t *cf);
 static ngx_int_t ngx_http_range_body_filter_init(ngx_conf_t *cf);
@@ -148,7 +154,6 @@
 {
     time_t                        if_range_time;
     ngx_str_t                    *if_range, *etag;
-    ngx_uint_t                    ranges;
     ngx_http_core_loc_conf_t     *clcf;
     ngx_http_range_filter_ctx_t  *ctx;

@@ -222,11 +227,10 @@
         return NGX_ERROR;
     }

+    ctx->index = (ngx_uint_t) -1;
     ctx->offset = r->headers_out.content_offset;

-    ranges = r->single_range ? 1 : clcf->max_ranges;
-
-    switch (ngx_http_range_parse(r, ctx, ranges)) {
+    switch (ngx_http_range_parse(r, ctx, clcf->max_ranges)) {

     case NGX_OK:
         ngx_http_set_ctx(r, ctx, ngx_http_range_body_filter_module);
@@ -281,6 +285,7 @@
                                        ngx_http_range_body_filter_module);
         if (mctx) {
             ctx->ranges = mctx->ranges;
+            ctx->boundary_header = mctx->boundary_header;
             return NGX_OK;
         }
     }
@@ -293,6 +298,8 @@

     p = r->headers_in.range->value.data + 6;
     size = 0;
+    range = NULL;
+    ctx->descending = 0;
     content_length = r->headers_out.content_length_n;

     cutoff = NGX_MAX_OFF_T_VALUE / 10;
@@ -369,6 +376,10 @@
     found:

         if (start < end) {
+            if (range && start < range->end) {
+                ctx->descending = 1;
+            }
+
             range = ngx_array_push(&ctx->ranges);
             if (range == NULL) {
                 return NGX_ERROR;
@@ -383,10 +394,6 @@

             size += end - start;

-            if (ranges-- == 0) {
-                return NGX_DECLINED;
-            }
-
         } else if (start == 0) {
             return NGX_DECLINED;
         }
@@ -400,7 +407,9 @@
         return NGX_HTTP_RANGE_NOT_SATISFIABLE;
     }

-    if (size > content_length) {
+    if (ctx->ranges.nelts > ranges ||
+        (ctx->descending && r->ascending_ranges) ||
+        size > content_length) {
         return NGX_DECLINED;
     }

@@ -469,6 +478,22 @@
     ngx_http_range_t   *range;
     ngx_atomic_uint_t   boundary;

+    if (ctx->index == (ngx_uint_t) -1) {
+        ctx->index = 0;
+        range = ctx->ranges.elts;
+
+        for (i = 0; i < ctx->ranges.nelts; i++) {
+            if (ctx->offset < range[i].end) {
+                ctx->index = i;
+                break;
+            }
+        }
+    }
+
+    if (r != r->main) {
+        return ngx_http_next_header_filter(r);
+    }
+
     size = sizeof(CRLF "--") - 1 + NGX_ATOMIC_T_LEN
            + sizeof(CRLF "Content-Type: ") - 1
            + r->headers_out.content_type.len
@@ -574,6 +599,7 @@
     }

     r->headers_out.content_length_n = len;
+    r->headers_out.content_offset = range[0].start;

     if (r->headers_out.content_length) {
         r->headers_out.content_length->hash = 0;
@@ -640,14 +666,16 @@
     }

     /*
-     * multipart ranges are supported only if whole body is in a single buffer
+     * descending or overlapping ranges are supported only if whole body is
+     * in a single buffer
      */

-    if (ngx_buf_special(in->buf)) {
+    if (ctx->descending && ngx_buf_special(in->buf)) {
         return ngx_http_next_body_filter(r, in);
     }

-    if (ngx_http_range_test_overlapped(r, ctx, in) != NGX_OK) {
+    if (ctx->descending &&
+            ngx_http_range_test_overlapped(r, ctx, in) != NGX_OK) {
         return NGX_ERROR;
     }

@@ -659,7 +687,7 @@
 ngx_http_range_test_overlapped(ngx_http_request_t *r,
     ngx_http_range_filter_ctx_t *ctx, ngx_chain_t *in)
 {
-    off_t              start, last;
+    off_t              last;
     ngx_buf_t         *buf;
     ngx_uint_t         i;
     ngx_http_range_t  *range;
@@ -671,19 +699,16 @@
     buf = in->buf;

     if (!buf->last_buf) {
-        start = ctx->offset;
-        last = ctx->offset + ngx_buf_size(buf);
+        last = ngx_buf_size(buf);

         range = ctx->ranges.elts;
         for (i = 0; i < ctx->ranges.nelts; i++) {
-            if (start > range[i].start || last < range[i].end) {
+            if (last < range[i].end) {
                 goto overlapped;
             }
         }
     }

-    ctx->offset = ngx_buf_size(buf);
-
     return NGX_OK;

 overlapped:
@@ -701,7 +726,7 @@
 {
     off_t              start, last;
     ngx_buf_t         *buf;
-    ngx_chain_t       *out, *cl, **ll;
+    ngx_chain_t       *out, *cl, *tl, **ll;
     ngx_http_range_t  *range;

     out = NULL;
@@ -721,8 +746,16 @@
                        "http range body buf: %O-%O", start, last);

         if (ngx_buf_special(buf)) {
-            *ll = cl;
-            ll = &cl->next;
+            tl = ngx_alloc_chain_link(r->pool);
+            if (tl == NULL) {
+                return NGX_ERROR;
+            }
+
+            tl->buf = buf;
+            tl->next = NULL;
+            *ll = tl;
+            ll = &tl->next;
+
             continue;
         }

@@ -741,6 +774,14 @@
             continue;
         }

+        tl = ngx_alloc_chain_link(r->pool);
+        if (tl == NULL) {
+            return NGX_ERROR;
+        }
+
+        tl->buf = buf;
+        tl->next = NULL;
+
         if (range->start > start) {

             if (buf->in_file) {
@@ -764,14 +805,13 @@

             buf->last_buf = (r == r->main) ? 1 : 0;
             buf->last_in_chain = 1;
-            *ll = cl;
-            cl->next = NULL;
+            *ll = tl;

             break;
         }

-        *ll = cl;
-        ll = &cl->next;
+        *ll = tl;
+        ll = &tl->next;
     }

     if (out == NULL) {
@@ -787,95 +827,193 @@
     ngx_http_range_filter_ctx_t *ctx, ngx_chain_t *in)
 {
     ngx_buf_t         *b, *buf;
-    ngx_uint_t         i;
-    ngx_chain_t       *out, *hcl, *rcl, *dcl, **ll;
-    ngx_http_range_t  *range;
+    off_t              start, last;
+    ngx_chain_t       *out, *cl, *tl, **ll;
+    ngx_http_range_t  *range, *tail;

+    b = NULL;
+    out = NULL;
     ll = &out;
-    buf = in->buf;
+
     range = ctx->ranges.elts;
-
-    for (i = 0; i < ctx->ranges.nelts; i++) {
+    tail = range + ctx->ranges.nelts;
+    range += ctx->index;

-        /*
-         * The boundary header of the range:
-         * CRLF
-         * "--0123456789" CRLF
-         * "Content-Type: image/jpeg" CRLF
-         * "Content-Range: bytes "
-         */
+    for (cl = in; cl; cl = cl->next) {
+
+        buf = cl->buf;
+
+        start = ctx->offset;
+        last = ctx->offset + ngx_buf_size(buf);

-        b = ngx_calloc_buf(r->pool);
-        if (b == NULL) {
-            return NGX_ERROR;
+        ctx->offset = last;
+
+        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                       "http range multipart body buf: %O-%O", start, last);
+
+        if (ngx_buf_special(buf)) {
+            continue;
         }

-        b->memory = 1;
-        b->pos = ctx->boundary_header.data;
-        b->last = ctx->boundary_header.data + ctx->boundary_header.len;
+        if (range->end <= start || range->start >= last) {

-        hcl = ngx_alloc_chain_link(r->pool);
-        if (hcl == NULL) {
-            return NGX_ERROR;
-        }
+            ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                           "http range multipart body skip");

-        hcl->buf = b;
-
+            if (buf->in_file) {
+                buf->file_pos = buf->file_last;
+            }

-        /* "SSSS-EEEE/TTTT" CRLF CRLF */
+            buf->pos = buf->last;
+            buf->sync = 1;

-        b = ngx_calloc_buf(r->pool);
-        if (b == NULL) {
-            return NGX_ERROR;
+            continue;
         }

-        b->temporary = 1;
-        b->pos = range[i].content_range.data;
-        b->last = range[i].content_range.data + range[i].content_range.len;
+        buf->last_in_chain = 0;
+        buf->last_buf = 0;
+
+        while (range < tail && range->start < last) {
+
+            if (range + 1 < tail && range[1].start < last) {
+                b = ngx_alloc_buf(r->pool);
+                if (b == NULL) {
+                    return NGX_ERROR;
+                }

-        rcl = ngx_alloc_chain_link(r->pool);
-        if (rcl == NULL) {
-            return NGX_ERROR;
-        }
+                ngx_memcpy(b, buf, sizeof(ngx_buf_t));
+            } else {
+                b = buf;
+            }
+
+            tl = ngx_alloc_chain_link(r->pool);
+            if (tl == NULL) {
+                return NGX_ERROR;
+            }
+
+            tl->buf = b;
+            tl->next = NULL;
+
+            if (range->start >= start) {

-        rcl->buf = b;
+                if (ngx_http_range_link_boundary_header(r, ctx, ll)
!= NGX_OK) {
+                    return NGX_ERROR;
+                }
+
+                ll = &(*ll)->next->next;

+                if (b->in_file) {
+                    b->file_pos += range->start - start;
+                }
+
+                if (ngx_buf_in_memory(b)) {
+                    b->pos += (size_t) (range->start - start);
+                }
+            }

-        /* the range data */
+            *ll = tl;
+            ll = &tl->next;
+
+            if (range->end <= last) {
+                if (b->in_file) {
+                    b->file_last -= last - range->end;
+                }

-        b = ngx_calloc_buf(r->pool);
-        if (b == NULL) {
-            return NGX_ERROR;
+                if (ngx_buf_in_memory(b)) {
+                    b->last -= (size_t) (last - range->end);
+                }
+
+                ctx->index++;
+                range++;
+            } else {
+                break;
+            }
         }

-        b->in_file = buf->in_file;
-        b->temporary = buf->temporary;
-        b->memory = buf->memory;
-        b->mmap = buf->mmap;
-        b->file = buf->file;
+        if (range == tail) {
+            if (ngx_http_range_link_last_boundary(r, ctx, ll) != NGX_OK) {
+                return NGX_ERROR;
+            }
+
+            break;
+        }
+    }
+
+    if (out == NULL) {
+        return NGX_OK;
+    }
+
+    return ngx_http_next_body_filter(r, out);
+}
+

-        if (buf->in_file) {
-            b->file_pos = buf->file_pos + range[i].start;
-            b->file_last = buf->file_pos + range[i].end;
-        }
+static ngx_int_t
+ngx_http_range_link_boundary_header(ngx_http_request_t *r,
+    ngx_http_range_filter_ctx_t *ctx, ngx_chain_t **ll)
+{
+    ngx_buf_t         *b;
+    ngx_chain_t       *hcl, *rcl;
+    ngx_http_range_t  *range;
+
+    /*
+     * The boundary header of the range:
+     * CRLF
+     * "--0123456789" CRLF
+     * "Content-Type: image/jpeg" CRLF
+     * "Content-Range: bytes "
+     */
+
+    b = ngx_calloc_buf(r->pool);
+    if (b == NULL) {
+        return NGX_ERROR;
+    }
+
+    b->memory = 1;
+    b->pos = ctx->boundary_header.data;
+    b->last = ctx->boundary_header.data + ctx->boundary_header.len;

-        if (ngx_buf_in_memory(buf)) {
-            b->pos = buf->pos + (size_t) range[i].start;
-            b->last = buf->pos + (size_t) range[i].end;
-        }
+    hcl = ngx_alloc_chain_link(r->pool);
+    if (hcl == NULL) {
+        return NGX_ERROR;
+    }
+
+    hcl->buf = b;
+
+
+    /* "SSSS-EEEE/TTTT" CRLF CRLF */
+
+    b = ngx_calloc_buf(r->pool);
+    if (b == NULL) {
+        return NGX_ERROR;
+    }
+
+    range = ctx->ranges.elts;
+    b->temporary = 1;
+    b->pos = range[ctx->index].content_range.data;
+    b->last = range[ctx->index].content_range.data
+              + range[ctx->index].content_range.len;

-        dcl = ngx_alloc_chain_link(r->pool);
-        if (dcl == NULL) {
-            return NGX_ERROR;
-        }
+    rcl = ngx_alloc_chain_link(r->pool);
+    if (rcl == NULL) {
+        return NGX_ERROR;
+    }
+
+    rcl->buf = b;
+
+    rcl->next = NULL;
+    hcl->next = rcl;
+    *ll = hcl;

-        dcl->buf = b;
+    return NGX_OK;
+}
+

-        *ll = hcl;
-        hcl->next = rcl;
-        rcl->next = dcl;
-        ll = &dcl->next;
-    }
+static ngx_int_t
+ngx_http_range_link_last_boundary(ngx_http_request_t *r,
+    ngx_http_range_filter_ctx_t *ctx, ngx_chain_t **ll)
+{
+    ngx_buf_t    *b;
+    ngx_chain_t  *hcl;

     /* the last boundary CRLF "--0123456789--" CRLF  */

@@ -885,7 +1023,8 @@
     }

     b->temporary = 1;
-    b->last_buf = 1;
+    b->last_in_chain = 1;
+    b->last_buf = (r == r->main) ? 1 : 0;

     b->pos = ngx_pnalloc(r->pool, sizeof(CRLF "--") - 1 + NGX_ATOMIC_T_LEN
                                   + sizeof("--" CRLF) - 1);
@@ -908,7 +1047,7 @@

     *ll = hcl;

-    return ngx_http_next_body_filter(r, out);
+    return NGX_OK;
 }


diff -r a6e23e343081 -r 75b953a2f40d
src/http/modules/ngx_http_slice_filter_module.c
--- a/src/http/modules/ngx_http_slice_filter_module.c Tue Apr 09
16:00:30 2019 +0300
+++ b/src/http/modules/ngx_http_slice_filter_module.c Thu Apr 11
05:29:15 2019 -0700
@@ -182,7 +182,7 @@

     r->allow_ranges = 1;
     r->subrequest_ranges = 1;
-    r->single_range = 1;
+    r->ascending_ranges = 1;

     rc = ngx_http_next_header_filter(r);

diff -r a6e23e343081 -r 75b953a2f40d src/http/ngx_http_request.h
--- a/src/http/ngx_http_request.h Tue Apr 09 16:00:30 2019 +0300
+++ b/src/http/ngx_http_request.h Thu Apr 11 05:29:15 2019 -0700
@@ -547,7 +547,7 @@
     unsigned                          preserve_body:1;
     unsigned                          allow_ranges:1;
     unsigned                          subrequest_ranges:1;
-    unsigned                          single_range:1;
+    unsigned                          ascending_ranges:1;
     unsigned                          disable_not_modified:1;
     unsigned                          stat_reading:1;
     unsigned                          stat_writing:1;
diff -r a6e23e343081 -r 75b953a2f40d src/http/ngx_http_upstream.c
--- a/src/http/ngx_http_upstream.c Tue Apr 09 16:00:30 2019 +0300
+++ b/src/http/ngx_http_upstream.c Thu Apr 11 05:29:15 2019 -0700
@@ -1157,20 +1157,42 @@

     p = h->value.data + 6;

-    while (*p == ' ') { p++; }
-
-    if (*p == '-') {
-        return NGX_DECLINED;
-    }
-
-    start = p;
-
-    while (*p >= '0' && *p <= '9') { p++; }
-
-    offset = ngx_atoof(start, p - start);
-
-    if (offset >= u->conf->cache_max_range_offset) {
-        return NGX_DECLINED;
+    for ( ;; ) {
+        while (*p == ' ') { p++; }
+
+        if (*p == '-') {
+            return NGX_DECLINED;
+        }
+
+        start = p;
+
+        while (*p >= '0' && *p <= '9') { p++; }
+
+        offset = ngx_atoof(start, p - start);
+
+        if (offset >= u->conf->cache_max_range_offset) {
+            return NGX_DECLINED;
+        }
+
+        while (*p == ' ') { p++; }
+
+        if (*p++ != '-') {
+            return NGX_DECLINED;
+        }
+
+        while (*p == ' ') { p++; }
+
+        while (*p >= '0' && *p <= '9') { p++; }
+
+        while (*p == ' ') { p++; }
+
+        if (*p == '\0') {
+            break;
+        }
+
+        if (*p++ != ',') {
+            return NGX_DECLINED;
+        }
     }

     return NGX_OK;
@@ -2839,11 +2861,11 @@

     if (u->conf->force_ranges) {
         r->allow_ranges = 1;
-        r->single_range = 1;
+        r->ascending_ranges = 1;

 #if (NGX_HTTP_CACHE)
         if (r->cached) {
-            r->single_range = 0;
+            r->ascending_ranges = 0;
         }
 #endif
     }
@@ -5204,7 +5226,7 @@

     if (r->upstream->cacheable) {
         r->allow_ranges = 1;
-        r->single_range = 1;
+        r->ascending_ranges = 1;
         return NGX_OK;
     }


# HG changeset patch
# User Robert Burke <sharpobject at gmail.com>
# Date 1554986947 25200
#      Thu Apr 11 05:49:07 2019 -0700
# Node ID 118b3d5f356f5d881029b6b42c49625310ec7de1
# Parent  fe886814b25a5842f4ee3844ac68e3bafbfd8458
Tests: uncomment multipart range tests

Fix mp4 multipart range test.

diff -r fe886814b25a -r 118b3d5f356f proxy_cache_max_range_offset.t
--- a/proxy_cache_max_range_offset.t Tue Apr 09 18:52:53 2019 +0300
+++ b/proxy_cache_max_range_offset.t Thu Apr 11 05:49:07 2019 -0700
@@ -85,14 +85,7 @@
 unlike(get('/t.html?1', 'bytes=1-'), qr/X-Range/, 'range - below');
 like(get('/t.html?2', 'bytes=3-'), qr/X-Range/, 'range - above');
 like(get('/t.html?3', 'bytes=-1'), qr/X-Range/, 'range - last');
-
-TODO: {
-local $TODO = 'not yet';
-
 like(get('/t.html?4', 'bytes=1-1,3-'), qr/X-Range/, 'range - multipart above');
-
-}
-
 like(get('/zero/t.html?5', 'bytes=0-0'), qr/X-Range/, 'always non-cacheable');
 like(get('/min_uses/t.html?6', 'bytes=1-'), qr/X-Range/, 'below min_uses');

diff -r fe886814b25a -r 118b3d5f356f proxy_cache_range.t
--- a/proxy_cache_range.t Tue Apr 09 18:52:53 2019 +0300
+++ b/proxy_cache_range.t Thu Apr 11 05:49:07 2019 -0700
@@ -84,12 +84,8 @@
 like(http_get_range('/t.html?1', 'Range: bytes=4-'), qr/^THIS/m,
  'range on first request');

-{
-local $TODO = 'not yet';
-
 like(http_get_range('/t.html?2', 'Range: bytes=0-2,4-'), qr/^SEE.*^THIS/ms,
  'multipart range on first request');
-}

 like(http_get_range('/t.html?1', 'Range: bytes=4-'), qr/^THIS/m,
  'cached range');
diff -r fe886814b25a -r 118b3d5f356f range_mp4.t
--- a/range_mp4.t Tue Apr 09 18:52:53 2019 +0300
+++ b/range_mp4.t Thu Apr 11 05:49:07 2019 -0700
@@ -54,7 +54,7 @@
  . "-pix_fmt yuv420p -c:v libx264 ${\($t->testdir())}/test.mp4") == 0
  or die "Can't create mp4 file: $!";

-$t->run()->plan(13);
+$t->run()->plan(14);

 ###############################################################################

@@ -88,23 +88,20 @@
 like($t1, qr/Content-Range: bytes 0-99\/$fsz/,
  'multi buffers - content range');

-TODO: {
-local $TODO = 'multipart range on mp4';
-
 $t1 = http_get_range('/test.mp4?start=1', 'Range: bytes=0-10,11-99');
 like($t1, qr/ 206 /, 'multipart range - 206 partial reply');
-like($t1, qr/Content-Length: 100/, 'multipart range - content length');
-like($t1, qr/Content-Range: bytes 0-10,11-99\/$fsz/,
+like($t1, qr/Content-Length: 303/, 'multipart range - content length');
+like($t1, qr/Content-Range: bytes 0-10\/$fsz/,
  'multipart range - content range');
-
-}
+like($t1, qr/Content-Range: bytes 11-99\/$fsz/,
+ 'multipart range - content range');

 ###############################################################################

 sub http_get_range {
  my ($url, $extra) = @_;
  return http(<<EOF);
-HEAD $url HTTP/1.1
+GET $url HTTP/1.1
 Host: localhost
 Connection: close
 $extra


More information about the nginx-devel mailing list