[PATCH] Make ngx_http_parse_unsafe_uri() to be able to unescape uri

Tue Dec 10 17:32:03 UTC 2013

Hello!

On Sun, Dec 08, 2013 at 02:40:26AM +0300, Raman Shishniou wrote:

> # HG changeset patch
> # User Raman Shishniou <rommer at activecloud.com>
> # Date 1386459301 -10800
> # Node ID 54f3670e04e82e00aa424d9773868749026bc693
> # Parent  58716fd3bd2d63c93b0c04fa121232b7126e724b
> Make ngx_http_parse_unsafe_uri() to be able to unescape uri
> 
> It makes possible to use an escaped uri with "X-Accel-Redirect" header in
> http_upstream module and "Destination" header http_dav module. No need
> to unescape uri in http_ssi_filter module any more.
> 
> diff --git a/src/http/modules/ngx_http_dav_module.c b/src/http/modules/ngx_http_dav_module.c
> --- a/src/http/modules/ngx_http_dav_module.c
> +++ b/src/http/modules/ngx_http_dav_module.c
> @@ -517,7 +517,7 @@
>      size_t                    len, root;
>      ngx_err_t                 err;
>      ngx_int_t                 rc, depth;
> -    ngx_uint_t                overwrite, slash, dir, flags;
> +    ngx_uint_t                overwrite, slash, dir;
>      ngx_str_t                 path, uri, duri, args;
>      ngx_tree_ctx_t            tree;
>      ngx_copy_file_t           cf;
> @@ -604,9 +604,8 @@
>  
>      duri.len = last - p;
>      duri.data = p;
> -    flags = 0;
>  
> -    if (ngx_http_parse_unsafe_uri(r, &duri, &args, &flags) != NGX_OK) {
> +    if (ngx_http_parse_unsafe_uri(r, &duri, &args, 0) != NGX_OK) {
>          goto invalid_destination;
>      }

Please avoid doing multiple things in the same patch.  If you want 
to change the interface of the ngx_http_parse_unsafe_uri() to 
accept flags as a value, not as a pointer, please submit a 
separate patch for this.

[...]

> diff --git a/src/http/ngx_http_parse.c b/src/http/ngx_http_parse.c
> --- a/src/http/ngx_http_parse.c
> +++ b/src/http/ngx_http_parse.c
> @@ -1778,57 +1778,233 @@
>  
>  ngx_int_t
>  ngx_http_parse_unsafe_uri(ngx_http_request_t *r, ngx_str_t *uri,
> -    ngx_str_t *args, ngx_uint_t *flags)
> +    ngx_str_t *args, ngx_uint_t flags)
>  {
> -    u_char  ch, *p;
> -    size_t  len;
> -
> +    u_char   *src, *dst, *newuri, ch, c, decoded;
> +    size_t    len;
> +    enum {
> +        sw_usual = 0,
> +        sw_quoted,
> +        sw_quoted_second
> +    } state;
> +
> +    dst = NULL;
> +    newuri = NULL;
> +    src = uri->data;
>      len = uri->len;
> -    p = uri->data;
> -
> -    if (len == 0 || p[0] == '?') {
> +
> +    state = 0;
> +    decoded = 0;
> +
> +    if (len == 0 || src[0] == '?') {
>          goto unsafe;
>      }
>  
> -    if (p[0] == '.' && len == 3 && p[1] == '.' && (ngx_path_separator(p[2]))) {
> -        goto unsafe;
> -    }
> -
> -    for ( /* void */ ; len; len--) {
> -
> -        ch = *p++;
> -
> -        if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
> -            continue;
> -        }
> -
> -        if (ch == '?') {
> -            args->len = len - 1;
> -            args->data = p;
> -            uri->len -= len;
> -
> -            return NGX_OK;
> -        }
> -
> -        if (ch == '\0') {
> -            goto unsafe;
> -        }
> -
> -        if (ngx_path_separator(ch) && len > 2) {
> -
> -            /* detect "/../" */
> -
> -            if (p[0] == '.' && p[1] == '.' && ngx_path_separator(p[2])) {
> +    while (len--) {
> +

Changing a 

    for ( /* void */ ; len; len--) {

to 

    while (len--) {

seems to be something not really needed.

> +        ch = *src++;
> +
> +        switch (state) {
> +        case sw_usual:
> +            if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
> +                if (dst != NULL) {
> +                    *dst++ = ch;
> +                }
> +                break;
> +            }
> +
> +            if (ch == '\0') {
>                  goto unsafe;
>              }
> +
> +            if (ch == '?') {
> +                goto args;
> +            }
> +
> +            if (ch == '%') {
> +                if (dst == NULL) {
> +                    newuri = ngx_pnalloc(r->pool, uri->len);
> +                    if (newuri == NULL) {
> +                        return NGX_ERROR;
> +                    }
> +                    if (src - 1 > uri->data) {
> +                        ngx_memcpy(newuri, uri->data, src - uri->data - 1);
> +                        dst = newuri + (src - uri->data - 1);
> +                    } else {
> +                        dst = newuri;
> +                    }
> +                }
> +                state = sw_quoted;
> +                break;
> +            }
> +
> +            /* detect "../" from begin or "/../" in the middle */
> +            if (ngx_path_separator(ch)) {
> +                if (dst == NULL) {
> +
> +                    if (src - 2 == uri->data &&
> +                        *(src - 2) == '.' && *(src - 1) == '.') {
> +                        goto unsafe;
> +                    }
> +
> +                    if (src - 2 > uri->data &&
> +                        ngx_path_separator(*(dst - 3)) &&
> +                        *(src - 2) == '.' && *(src - 1) == '.') {
> +                        goto unsafe;
> +                    }
> +
> +                } else {
> +
> +                    if (dst - 2 == newuri &&
> +                        newuri[0] == '.' && newuri[1] == '.') {
> +                        goto unsafe;
> +                    }
> +
> +                    if (dst - 2 > newuri &&
> +                        ngx_path_separator(*(dst - 3)) &&
> +                        *(dst - 2) == '.' && *(dst - 1) == '.') {
> +                        goto unsafe;
> +                    }
> +
> +                }
> +            }
> +
> +            if (dst != NULL) {
> +                *dst++ = ch;
> +            }
> +            break;
> +
> +        case sw_quoted:
> +            if (ch >= '0' && ch <= '9') {
> +                decoded = (u_char) (ch - '0');
> +                state = sw_quoted_second;
> +                break;
> +            }
> +
> +            c = (u_char) (ch | 0x20);
> +            if (c >= 'a' && c <= 'f') {
> +                decoded = (u_char) (c - 'a' + 10);
> +                state = sw_quoted_second;
> +                break;
> +            }
> +
> +            if (ch == '\0') {
> +                goto unsafe;
> +            }
> +
> +            if (ch == '%') {
> +                *dst++ = '%';
> +                break;
> +            }
> +
> +            if (ch == '?') {
> +                *dst++ = '%';
> +                goto args;
> +            }
> +
> +            *dst++ = '%';
> +            *dst++ = ch;
> +            state = sw_usual;
> +            break;
> +
> +        case sw_quoted_second:
> +
> +            state = sw_usual;
> +
> +            if (ch >= '0' && ch <= '9') {
> +                ch = (u_char) ((decoded << 4) + ch - '0');
> +
> +                if (ch == '\0') {
> +                    goto unsafe;
> +                }
> +
> +                *dst++ = ch;
> +                break;
> +            }
> +
> +            c = (u_char) (ch | 0x20);
> +            if (c >= 'a' && c <= 'f') {
> +                ch = (u_char) ((decoded << 4) + c - 'a' + 10);
> +
> +                /* detect "../" from begin or "/../" in the middle */
> +                if (ngx_path_separator(ch)) {
> +
> +                    if (dst - 2 == newuri &&
> +                        newuri[0] == '.' && newuri[1] == '.') {
> +                        goto unsafe;
> +                    }
> +
> +                    if (dst - 2 > newuri &&
> +                        ngx_path_separator(*(dst - 3)) &&
> +                        *(dst - 2) == '.' && *(dst - 1) == '.') {
> +                        goto unsafe;
> +                    }
> +                }
> +
> +                *dst++ = ch;
> +                break;
> +            }
> +
> +            if (ch == '\0') {
> +                goto unsafe;
> +            }
> +
> +            if (ch == '%') {
> +                *dst++ = '%';
> +                *dst++ = *(src - 2);
> +                state = sw_quoted;
> +                break;
> +            }
> +
> +            if (ch == '?') {
> +                *dst++ = '%';
> +                *dst++ = *(src - 2);
> +                goto args;
> +            }
> +
> +            *dst++ = '%';
> +            *dst++ = *(src - 2);
> +            *dst++ = ch;
> +            break;
>          }
>      }

The code seems to be overcomplicated (and there are lots of minor 
style problems).  It should really be a way to write the code 
better.

E.g., you may consider writing two loops for cases with unmodified 
uri and new uri allocated.  Or, alternatively, one loop to parse 
simple case without unescaping to find out request arguments and 
to detect if unescaping is needed, then unescape uri call, and 
then another loop to check if unescaped uri is still safe.

[...]

-- 
Maxim Dounin
http://nginx.org/