[PATCH] Make ngx_http_parse_unsafe_uri() to be able to unescape uri
Maxim Dounin
mdounin at mdounin.ru
Tue Dec 10 17:32:03 UTC 2013
Hello!
On Sun, Dec 08, 2013 at 02:40:26AM +0300, Raman Shishniou wrote:
> # HG changeset patch
> # User Raman Shishniou <rommer at activecloud.com>
> # Date 1386459301 -10800
> # Node ID 54f3670e04e82e00aa424d9773868749026bc693
> # Parent 58716fd3bd2d63c93b0c04fa121232b7126e724b
> Make ngx_http_parse_unsafe_uri() to be able to unescape uri
>
> It makes possible to use an escaped uri with "X-Accel-Redirect" header in
> http_upstream module and "Destination" header http_dav module. No need
> to unescape uri in http_ssi_filter module any more.
>
> diff --git a/src/http/modules/ngx_http_dav_module.c b/src/http/modules/ngx_http_dav_module.c
> --- a/src/http/modules/ngx_http_dav_module.c
> +++ b/src/http/modules/ngx_http_dav_module.c
> @@ -517,7 +517,7 @@
> size_t len, root;
> ngx_err_t err;
> ngx_int_t rc, depth;
> - ngx_uint_t overwrite, slash, dir, flags;
> + ngx_uint_t overwrite, slash, dir;
> ngx_str_t path, uri, duri, args;
> ngx_tree_ctx_t tree;
> ngx_copy_file_t cf;
> @@ -604,9 +604,8 @@
>
> duri.len = last - p;
> duri.data = p;
> - flags = 0;
>
> - if (ngx_http_parse_unsafe_uri(r, &duri, &args, &flags) != NGX_OK) {
> + if (ngx_http_parse_unsafe_uri(r, &duri, &args, 0) != NGX_OK) {
> goto invalid_destination;
> }
Please avoid doing multiple things in the same patch. If you want
to change the interface of the ngx_http_parse_unsafe_uri() to
accept flags as a value, not as a pointer, please submit a
separate patch for this.
[...]
> diff --git a/src/http/ngx_http_parse.c b/src/http/ngx_http_parse.c
> --- a/src/http/ngx_http_parse.c
> +++ b/src/http/ngx_http_parse.c
> @@ -1778,57 +1778,233 @@
>
> ngx_int_t
> ngx_http_parse_unsafe_uri(ngx_http_request_t *r, ngx_str_t *uri,
> - ngx_str_t *args, ngx_uint_t *flags)
> + ngx_str_t *args, ngx_uint_t flags)
> {
> - u_char ch, *p;
> - size_t len;
> -
> + u_char *src, *dst, *newuri, ch, c, decoded;
> + size_t len;
> + enum {
> + sw_usual = 0,
> + sw_quoted,
> + sw_quoted_second
> + } state;
> +
> + dst = NULL;
> + newuri = NULL;
> + src = uri->data;
> len = uri->len;
> - p = uri->data;
> -
> - if (len == 0 || p[0] == '?') {
> +
> + state = 0;
> + decoded = 0;
> +
> + if (len == 0 || src[0] == '?') {
> goto unsafe;
> }
>
> - if (p[0] == '.' && len == 3 && p[1] == '.' && (ngx_path_separator(p[2]))) {
> - goto unsafe;
> - }
> -
> - for ( /* void */ ; len; len--) {
> -
> - ch = *p++;
> -
> - if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
> - continue;
> - }
> -
> - if (ch == '?') {
> - args->len = len - 1;
> - args->data = p;
> - uri->len -= len;
> -
> - return NGX_OK;
> - }
> -
> - if (ch == '\0') {
> - goto unsafe;
> - }
> -
> - if (ngx_path_separator(ch) && len > 2) {
> -
> - /* detect "/../" */
> -
> - if (p[0] == '.' && p[1] == '.' && ngx_path_separator(p[2])) {
> + while (len--) {
> +
Changing a
for ( /* void */ ; len; len--) {
to
while (len--) {
seems to be something not really needed.
> + ch = *src++;
> +
> + switch (state) {
> + case sw_usual:
> + if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
> + if (dst != NULL) {
> + *dst++ = ch;
> + }
> + break;
> + }
> +
> + if (ch == '\0') {
> goto unsafe;
> }
> +
> + if (ch == '?') {
> + goto args;
> + }
> +
> + if (ch == '%') {
> + if (dst == NULL) {
> + newuri = ngx_pnalloc(r->pool, uri->len);
> + if (newuri == NULL) {
> + return NGX_ERROR;
> + }
> + if (src - 1 > uri->data) {
> + ngx_memcpy(newuri, uri->data, src - uri->data - 1);
> + dst = newuri + (src - uri->data - 1);
> + } else {
> + dst = newuri;
> + }
> + }
> + state = sw_quoted;
> + break;
> + }
> +
> + /* detect "../" from begin or "/../" in the middle */
> + if (ngx_path_separator(ch)) {
> + if (dst == NULL) {
> +
> + if (src - 2 == uri->data &&
> + *(src - 2) == '.' && *(src - 1) == '.') {
> + goto unsafe;
> + }
> +
> + if (src - 2 > uri->data &&
> + ngx_path_separator(*(dst - 3)) &&
> + *(src - 2) == '.' && *(src - 1) == '.') {
> + goto unsafe;
> + }
> +
> + } else {
> +
> + if (dst - 2 == newuri &&
> + newuri[0] == '.' && newuri[1] == '.') {
> + goto unsafe;
> + }
> +
> + if (dst - 2 > newuri &&
> + ngx_path_separator(*(dst - 3)) &&
> + *(dst - 2) == '.' && *(dst - 1) == '.') {
> + goto unsafe;
> + }
> +
> + }
> + }
> +
> + if (dst != NULL) {
> + *dst++ = ch;
> + }
> + break;
> +
> + case sw_quoted:
> + if (ch >= '0' && ch <= '9') {
> + decoded = (u_char) (ch - '0');
> + state = sw_quoted_second;
> + break;
> + }
> +
> + c = (u_char) (ch | 0x20);
> + if (c >= 'a' && c <= 'f') {
> + decoded = (u_char) (c - 'a' + 10);
> + state = sw_quoted_second;
> + break;
> + }
> +
> + if (ch == '\0') {
> + goto unsafe;
> + }
> +
> + if (ch == '%') {
> + *dst++ = '%';
> + break;
> + }
> +
> + if (ch == '?') {
> + *dst++ = '%';
> + goto args;
> + }
> +
> + *dst++ = '%';
> + *dst++ = ch;
> + state = sw_usual;
> + break;
> +
> + case sw_quoted_second:
> +
> + state = sw_usual;
> +
> + if (ch >= '0' && ch <= '9') {
> + ch = (u_char) ((decoded << 4) + ch - '0');
> +
> + if (ch == '\0') {
> + goto unsafe;
> + }
> +
> + *dst++ = ch;
> + break;
> + }
> +
> + c = (u_char) (ch | 0x20);
> + if (c >= 'a' && c <= 'f') {
> + ch = (u_char) ((decoded << 4) + c - 'a' + 10);
> +
> + /* detect "../" from begin or "/../" in the middle */
> + if (ngx_path_separator(ch)) {
> +
> + if (dst - 2 == newuri &&
> + newuri[0] == '.' && newuri[1] == '.') {
> + goto unsafe;
> + }
> +
> + if (dst - 2 > newuri &&
> + ngx_path_separator(*(dst - 3)) &&
> + *(dst - 2) == '.' && *(dst - 1) == '.') {
> + goto unsafe;
> + }
> + }
> +
> + *dst++ = ch;
> + break;
> + }
> +
> + if (ch == '\0') {
> + goto unsafe;
> + }
> +
> + if (ch == '%') {
> + *dst++ = '%';
> + *dst++ = *(src - 2);
> + state = sw_quoted;
> + break;
> + }
> +
> + if (ch == '?') {
> + *dst++ = '%';
> + *dst++ = *(src - 2);
> + goto args;
> + }
> +
> + *dst++ = '%';
> + *dst++ = *(src - 2);
> + *dst++ = ch;
> + break;
> }
> }
The code seems to be overcomplicated (and there are lots of minor
style problems). It should really be a way to write the code
better.
E.g., you may consider writing two loops for cases with unmodified
uri and new uri allocated. Or, alternatively, one loop to parse
simple case without unescaping to find out request arguments and
to detect if unescaping is needed, then unescape uri call, and
then another loop to check if unescaped uri is still safe.
[...]
--
Maxim Dounin
http://nginx.org/
More information about the nginx-devel
mailing list