[PATCH] HTTP: Add new uri_normalization_percent_decode option

Michael Kourlas michael.kourlas at solace.com
Wed Feb 15 16:50:13 UTC 2023


# HG changeset patch
# User Michael Kourlas <michael.kourlas at solace.com>
# Date 1676408746 18000
#      Tue Feb 14 16:05:46 2023 -0500
# Node ID 129437ade41b14a584fb4b7558accc1b8dee7f45
# Parent  cffaf3f2eec8fd33605c2a37814f5ffc30371989
HTTP: Add new uri_normalization_percent_decode option

This patch addresses ticket #2225 by adding a new
uri_normalization_percent_decode configuration option that controls which
characters are percent-decoded by nginx as part of its URI normalization.

The option has two values: "all" and "all-except-reserved". "all" is the
default value and is the current behaviour. When the option is set to
"all-except-reserved", nginx percent-decodes all characters except those in the
reserved set defined by RFC 3986:

      reserved    = gen-delims / sub-delims

      gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"

      sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
                  / "*" / "+" / "," / ";" / "="

In addition, when "all-except-reserved" is used, nginx will not re-encode "%"
from the request URI when it observes that it is part of a percent-encoded
reserved character.

When nginx percent-decodes reserved characters, this can often change the
request URI's semantics, making it impossible to use a normalized URI for
certain use cases. "uri_normalization_percent_decode" gives the configuration
author the freedom to determine which reserved characters are semantically
relevant and which are not.

For example, consider the following location block, which handles part of a
hypothetical API:

location ~ ^/api/objects/[^/]+/subobjects(/.*)?$ {
    ...
}

Because nginx always normalizes "%2F" to "/", this location block will not
match a path of /api/objects/sample%2Fname/subobjects, even if the API permits
"/" to appear percent-encoded in the URI as part of object names. nginx will
instead interpret this as /api/objects/sample/name/subobjects, a completely
different path. Setting "uri_normalization_percent_decode" to
"all-except-reserved" will leave "%2F" encoded, resulting in the expected
behaviour.

diff -r cffaf3f2eec8 -r 129437ade41b src/core/ngx_string.c
--- a/src/core/ngx_string.c     Thu Feb 02 23:38:48 2023 +0300
+++ b/src/core/ngx_string.c     Tue Feb 14 16:05:46 2023 -0500
@@ -1487,7 +1487,8 @@


 uintptr_t
-ngx_escape_uri(u_char *dst, u_char *src, size_t size, ngx_uint_t type)
+ngx_escape_uri(u_char *dst, u_char *src, size_t size, ngx_uint_t type,
+    ngx_uint_t skip_preencoded_type)
 {
     ngx_uint_t      n;
     uint32_t       *escape;
@@ -1641,7 +1642,11 @@
         n = 0;

         while (size) {
-            if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+            if ((escape[*src >> 5] & (1U << (*src & 0x1f)))
+                && !(*src == '%' && size >= 3
+                     && ngx_escape_uri_skip_preencoded_character(
+                                               src + 1, skip_preencoded_type)))
+            {
                 n++;
             }
             src++;
@@ -1652,7 +1657,11 @@
     }

     while (size) {
-        if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+        if ((escape[*src >> 5] & (1U << (*src & 0x1f)))
+            && !(*src == '%' && size >= 3
+                 && ngx_escape_uri_skip_preencoded_character(
+                                               src + 1, skip_preencoded_type)))
+        {
             *dst++ = '%';
             *dst++ = hex[*src >> 4];
             *dst++ = hex[*src & 0xf];
@@ -1668,6 +1677,87 @@
 }


+ngx_uint_t
+ngx_escape_uri_skip_preencoded_character(u_char *hex_component,
+    ngx_uint_t skip_preencoded_type)
+{
+    u_char    ch, decoded_ch;
+    uint32_t *skip;
+
+    static uint32_t   none[] = {
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+    };
+
+    static uint32_t   reserved_only[] = {
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
+        0xac009fda, /* 1010 1100 0000 0000  1001 1111 1101 1010 */
+
+                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
+        0x28000001, /* 0010 1000 0000 0000  0000 0000 0000 0001 */
+
+                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+    };
+
+    static uint32_t  *skip_map[] = { none, reserved_only };
+
+    skip = skip_map[skip_preencoded_type];
+
+    ch = *hex_component;
+    if (ch >= '0' && ch <= '9') {
+        decoded_ch = (u_char) (ch - '0');
+    } else {
+        ch = (u_char) (ch | 0x20);
+        if (ch >= 'a' && ch <= 'f') {
+            decoded_ch = (u_char) (ch - 'a' + 10);
+        } else {
+            /* not part of a percent-encoded character */
+            return 0;
+        }
+    }
+
+    ch = *(hex_component + 1);
+    if (ch >= '0' && ch <= '9') {
+        decoded_ch = (u_char) ((decoded_ch << 4) + (ch - '0'));
+    } else {
+        ch = (u_char) (ch | 0x20);
+        if (ch >= 'a' && ch <= 'f') {
+            decoded_ch = (u_char) ((decoded_ch << 4) + (ch - 'a') + 10);
+        } else {
+            /* not part of a percent-encoded character */
+            return 0;
+        }
+    }
+
+    if (skip[decoded_ch >> 5] & (1U << (decoded_ch & 0x1f))) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+
 void
 ngx_unescape_uri(u_char **dst, u_char **src, size_t size, ngx_uint_t type)
 {
diff -r cffaf3f2eec8 -r 129437ade41b src/core/ngx_string.h
--- a/src/core/ngx_string.h     Thu Feb 02 23:38:48 2023 +0300
+++ b/src/core/ngx_string.h     Tue Feb 14 16:05:46 2023 -0500
@@ -204,11 +204,20 @@
 #define NGX_ESCAPE_MEMCACHED      5
 #define NGX_ESCAPE_MAIL_AUTH      6

+/*
+ * these enumeration values must correspond to the enumeration values for
+ * NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE
+ */
+#define NGX_ESCAPE_SKIP_PREENCODED_NONE      0
+#define NGX_ESCAPE_SKIP_PREENCODED_RESERVED  1
+
 #define NGX_UNESCAPE_URI       1
 #define NGX_UNESCAPE_REDIRECT  2

 uintptr_t ngx_escape_uri(u_char *dst, u_char *src, size_t size,
-    ngx_uint_t type);
+    ngx_uint_t type, ngx_uint_t skip_preencoded_type);
+ngx_uint_t ngx_escape_uri_skip_preencoded_character(u_char *seq,
+    ngx_uint_t skip_preencoded_type);
 void ngx_unescape_uri(u_char **dst, u_char **src, size_t size, ngx_uint_t type);
 uintptr_t ngx_escape_html(u_char *dst, u_char *src, size_t size);
 uintptr_t ngx_escape_json(u_char *dst, u_char *src, size_t size);
diff -r cffaf3f2eec8 -r 129437ade41b src/event/ngx_event_openssl.c
--- a/src/event/ngx_event_openssl.c     Thu Feb 02 23:38:48 2023 +0300
+++ b/src/event/ngx_event_openssl.c     Tue Feb 14 16:05:46 2023 -0500
@@ -5366,7 +5366,8 @@
         return NGX_OK;
     }

-    n = ngx_escape_uri(NULL, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT);
+    n = ngx_escape_uri(NULL, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT,
+                       NGX_ESCAPE_SKIP_PREENCODED_NONE);

     s->len = cert.len + n * 2;
     s->data = ngx_pnalloc(pool, s->len);
@@ -5374,7 +5375,8 @@
         return NGX_ERROR;
     }

-    ngx_escape_uri(s->data, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT);
+    ngx_escape_uri(s->data, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT,
+                   NGX_ESCAPE_SKIP_PREENCODED_NONE);

     return NGX_OK;
 }
diff -r cffaf3f2eec8 -r 129437ade41b src/event/ngx_event_openssl_stapling.c
--- a/src/event/ngx_event_openssl_stapling.c    Thu Feb 02 23:38:48 2023 +0300
+++ b/src/event/ngx_event_openssl_stapling.c    Tue Feb 14 16:05:46 2023 -0500
@@ -1747,7 +1747,8 @@
     ngx_encode_base64(&base64, &binary);

     escape = ngx_escape_uri(NULL, base64.data, base64.len,
-                            NGX_ESCAPE_URI_COMPONENT);
+                            NGX_ESCAPE_URI_COMPONENT,
+                            NGX_ESCAPE_SKIP_PREENCODED_NONE);

     ngx_log_debug2(NGX_LOG_DEBUG_EVENT, ctx->log, 0,
                    "ssl ocsp request length %z, escape %d",
@@ -1777,7 +1778,8 @@

     } else {
         p = (u_char *) ngx_escape_uri(p, base64.data, base64.len,
-                                      NGX_ESCAPE_URI_COMPONENT);
+                                      NGX_ESCAPE_URI_COMPONENT,
+                                      NGX_ESCAPE_SKIP_PREENCODED_NONE);
     }

     p = ngx_cpymem(p, " HTTP/1.0" CRLF, sizeof(" HTTP/1.0" CRLF) - 1);
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_autoindex_module.c
--- a/src/http/modules/ngx_http_autoindex_module.c      Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_autoindex_module.c      Tue Feb 14 16:05:46 2023 -0500
@@ -487,7 +487,8 @@
     for (i = 0; i < entries->nelts; i++) {
         entry[i].escape = 2 * ngx_escape_uri(NULL, entry[i].name.data,
                                              entry[i].name.len,
-                                             NGX_ESCAPE_URI_COMPONENT);
+                                             NGX_ESCAPE_URI_COMPONENT,
+                                             NGX_ESCAPE_SKIP_PREENCODED_NONE);

         entry[i].escape_html = ngx_escape_html(NULL, entry[i].name.data,
                                                entry[i].name.len);
@@ -549,7 +550,8 @@

         if (entry[i].escape) {
             ngx_escape_uri(b->last, entry[i].name.data, entry[i].name.len,
-                           NGX_ESCAPE_URI_COMPONENT);
+                           NGX_ESCAPE_URI_COMPONENT,
+                           NGX_ESCAPE_SKIP_PREENCODED_NONE);

             b->last += entry[i].name.len + entry[i].escape;

diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_dav_module.c
--- a/src/http/modules/ngx_http_dav_module.c    Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_dav_module.c    Tue Feb 14 16:05:46 2023 -0500
@@ -1072,9 +1072,12 @@
 static ngx_int_t
 ngx_http_dav_location(ngx_http_request_t *r)
 {
-    u_char     *p;
-    size_t      len;
-    uintptr_t   escape;
+    u_char                   *p;
+    size_t                    len;
+    uintptr_t                 escape;
+    ngx_http_core_srv_conf_t *cscf;
+
+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);

     r->headers_out.location = ngx_list_push(&r->headers_out.headers);
     if (r->headers_out.location == NULL) {
@@ -1085,7 +1088,8 @@
     r->headers_out.location->next = NULL;
     ngx_str_set(&r->headers_out.location->key, "Location");

-    escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len, NGX_ESCAPE_URI);
+    escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len, NGX_ESCAPE_URI,
+                                cscf->uri_normalization_percent_decode);

     if (escape) {
         len = r->uri.len + escape;
@@ -1099,7 +1103,8 @@
         r->headers_out.location->value.len = len;
         r->headers_out.location->value.data = p;

-        ngx_escape_uri(p, r->uri.data, r->uri.len, NGX_ESCAPE_URI);
+        ngx_escape_uri(p, r->uri.data, r->uri.len, NGX_ESCAPE_URI,
+                       cscf->uri_normalization_percent_decode);

     } else {
         r->headers_out.location->value = r->uri;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_grpc_module.c
--- a/src/http/modules/ngx_http_grpc_module.c   Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_grpc_module.c   Tue Feb 14 16:05:46 2023 -0500
@@ -720,12 +720,15 @@
     ngx_http_upstream_t          *u;
     ngx_http_grpc_frame_t        *f;
     ngx_http_script_code_pt       code;
+    ngx_http_core_srv_conf_t     *cscf;
     ngx_http_grpc_loc_conf_t     *glcf;
     ngx_http_script_engine_t      e, le;
     ngx_http_script_len_code_pt   lcode;

     u = r->upstream;

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     glcf = ngx_http_get_module_loc_conf(r, ngx_http_grpc_module);

     ctx = ngx_http_get_module_ctx(r, ngx_http_grpc_module);
@@ -756,7 +759,8 @@

     } else {
         escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len,
-                                    NGX_ESCAPE_URI);
+                                    NGX_ESCAPE_URI,
+                                    cscf->uri_normalization_percent_decode);
         uri_len = r->uri.len + escape + sizeof("?") - 1 + r->args.len;
     }

@@ -950,7 +954,7 @@

         if (escape) {
             p = (u_char *) ngx_escape_uri(p, r->uri.data, r->uri.len,
-                                          NGX_ESCAPE_URI);
+                       NGX_ESCAPE_URI, cscf->uri_normalization_percent_decode);

         } else {
             p = ngx_copy(p, r->uri.data, r->uri.len);
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_memcached_module.c
--- a/src/http/modules/ngx_http_memcached_module.c      Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_memcached_module.c      Tue Feb 14 16:05:46 2023 -0500
@@ -255,7 +255,8 @@
         return NGX_ERROR;
     }

-    escape = 2 * ngx_escape_uri(NULL, vv->data, vv->len, NGX_ESCAPE_MEMCACHED);
+    escape = 2 * ngx_escape_uri(NULL, vv->data, vv->len, NGX_ESCAPE_MEMCACHED,
+                                NGX_ESCAPE_SKIP_PREENCODED_NONE);

     len = sizeof("get ") - 1 + vv->len + escape + sizeof(CRLF) - 1;

@@ -285,7 +286,7 @@

     } else {
         b->last = (u_char *) ngx_escape_uri(b->last, vv->data, vv->len,
-                                            NGX_ESCAPE_MEMCACHED);
+                        NGX_ESCAPE_MEMCACHED, NGX_ESCAPE_SKIP_PREENCODED_NONE);
     }

     ctx->key.len = b->last - ctx->key.data;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_proxy_module.c
--- a/src/http/modules/ngx_http_proxy_module.c  Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_proxy_module.c  Tue Feb 14 16:05:46 2023 -0500
@@ -1143,10 +1143,13 @@
     ngx_str_t                  *key;
     ngx_http_upstream_t        *u;
     ngx_http_proxy_ctx_t       *ctx;
+    ngx_http_core_srv_conf_t   *cscf;
     ngx_http_proxy_loc_conf_t  *plcf;

     u = r->upstream;

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     plcf = ngx_http_get_module_loc_conf(r, ngx_http_proxy_module);

     ctx = ngx_http_get_module_ctx(r, ngx_http_proxy_module);
@@ -1190,7 +1193,8 @@

     if (r->quoted_uri || r->internal) {
         escape = 2 * ngx_escape_uri(NULL, r->uri.data + loc_len,
-                                    r->uri.len - loc_len, NGX_ESCAPE_URI);
+                                    r->uri.len - loc_len, NGX_ESCAPE_URI,
+                                    cscf->uri_normalization_percent_decode);
     } else {
         escape = 0;
     }
@@ -1211,7 +1215,8 @@

     if (escape) {
         ngx_escape_uri(p, r->uri.data + loc_len,
-                       r->uri.len - loc_len, NGX_ESCAPE_URI);
+                       r->uri.len - loc_len, NGX_ESCAPE_URI,
+                       cscf->uri_normalization_percent_decode);
         p += r->uri.len - loc_len + escape;

     } else {
@@ -1249,11 +1254,14 @@
     ngx_http_script_code_pt       code;
     ngx_http_proxy_headers_t     *headers;
     ngx_http_script_engine_t      e, le;
+    ngx_http_core_srv_conf_t     *cscf;
     ngx_http_proxy_loc_conf_t    *plcf;
     ngx_http_script_len_code_pt   lcode;

     u = r->upstream;

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     plcf = ngx_http_get_module_loc_conf(r, ngx_http_proxy_module);

 #if (NGX_HTTP_CACHE)
@@ -1303,7 +1311,7 @@

         if (r->quoted_uri || r->internal) {
             escape = 2 * ngx_escape_uri(NULL, r->uri.data + loc_len,
-                                        r->uri.len - loc_len, NGX_ESCAPE_URI);
+ r->uri.len - loc_len, NGX_ESCAPE_URI, cscf->uri_normalization_percent_decode);
         }

         uri_len = ctx->vars.uri.len + r->uri.len - loc_len + escape
@@ -1428,7 +1436,8 @@

         if (escape) {
             ngx_escape_uri(b->last, r->uri.data + loc_len,
-                           r->uri.len - loc_len, NGX_ESCAPE_URI);
+                           r->uri.len - loc_len, NGX_ESCAPE_URI,
+                           cscf->uri_normalization_percent_decode);
             b->last += r->uri.len - loc_len + escape;

         } else {
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_ssi_filter_module.c
--- a/src/http/modules/ngx_http_ssi_filter_module.c     Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_ssi_filter_module.c     Tue Feb 14 16:05:46 2023 -0500
@@ -2348,7 +2348,8 @@

     case NGX_HTTP_SSI_URL_ENCODING:
         len = 2 * ngx_escape_uri(NULL, value->data, value->len,
-                                 NGX_ESCAPE_HTML);
+                                 NGX_ESCAPE_HTML,
+                                 NGX_ESCAPE_SKIP_PREENCODED_NONE);

         if (len) {
             p = ngx_pnalloc(r->pool, value->len + len);
@@ -2356,7 +2357,8 @@
                 return NGX_HTTP_SSI_ERROR;
             }

-            (void) ngx_escape_uri(p, value->data, value->len, NGX_ESCAPE_HTML);
+            (void) ngx_escape_uri(p, value->data, value->len, NGX_ESCAPE_HTML,
+                                  NGX_ESCAPE_SKIP_PREENCODED_NONE);
         }

         len += value->len;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_static_module.c
--- a/src/http/modules/ngx_http_static_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_static_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -58,6 +58,7 @@
     ngx_buf_t                 *b;
     ngx_chain_t                out;
     ngx_open_file_info_t       of;
+    ngx_http_core_srv_conf_t  *cscf;
     ngx_http_core_loc_conf_t  *clcf;

     if (!(r->method & (NGX_HTTP_GET|NGX_HTTP_HEAD|NGX_HTTP_POST))) {
@@ -85,6 +86,8 @@
     ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0,
                    "http filename: \"%s\"", path.data);

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);

     ngx_memzero(&of, sizeof(ngx_open_file_info_t));
@@ -157,7 +160,8 @@
         }

         escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len,
-                                    NGX_ESCAPE_URI);
+                                    NGX_ESCAPE_URI,
+                                    cscf->uri_normalization_percent_decode);

         if (!clcf->alias && r->args.len == 0 && escape == 0) {
             len = r->uri.len + 1;
@@ -180,7 +184,7 @@

             if (escape) {
                 last = (u_char *) ngx_escape_uri(location, r->uri.data,
-                                                 r->uri.len, NGX_ESCAPE_URI);
+           r->uri.len, NGX_ESCAPE_URI, cscf->uri_normalization_percent_decode);

             } else {
                 last = ngx_copy(location, r->uri.data, r->uri.len);
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http.c
--- a/src/http/ngx_http.c       Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http.c       Tue Feb 14 16:05:46 2023 -0500
@@ -900,7 +900,8 @@
     uintptr_t   escape;

     escape = 2 * ngx_escape_uri(NULL, clcf->name.data, clcf->name.len,
-                                NGX_ESCAPE_URI);
+                                NGX_ESCAPE_URI,
+                                NGX_ESCAPE_SKIP_PREENCODED_NONE);

     if (escape) {
         len = clcf->name.len + escape;
@@ -913,7 +914,8 @@
         clcf->escaped_name.len = len;
         clcf->escaped_name.data = p;

-        ngx_escape_uri(p, clcf->name.data, clcf->name.len, NGX_ESCAPE_URI);
+        ngx_escape_uri(p, clcf->name.data, clcf->name.len, NGX_ESCAPE_URI,
+                       NGX_ESCAPE_SKIP_PREENCODED_NONE);

     } else {
         clcf->escaped_name = clcf->name;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http.h
--- a/src/http/ngx_http.h       Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http.h       Tue Feb 14 16:05:46 2023 -0500
@@ -96,7 +96,7 @@
 ngx_int_t ngx_http_parse_request_line(ngx_http_request_t *r, ngx_buf_t *b);
 ngx_int_t ngx_http_parse_uri(ngx_http_request_t *r);
 ngx_int_t ngx_http_parse_complex_uri(ngx_http_request_t *r,
-    ngx_uint_t merge_slashes);
+    ngx_uint_t merge_slashes, ngx_uint_t uri_normalization_percent_decode);
 ngx_int_t ngx_http_parse_status_line(ngx_http_request_t *r, ngx_buf_t *b,
     ngx_http_status_t *status);
 ngx_int_t ngx_http_parse_unsafe_uri(ngx_http_request_t *r, ngx_str_t *uri,
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_core_module.c
--- a/src/http/ngx_http_core_module.c   Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_core_module.c   Tue Feb 14 16:05:46 2023 -0500
@@ -180,6 +180,14 @@
 #endif


+static ngx_conf_enum_t  ngx_http_core_uri_normalization_percent_decode[] = {
+    { ngx_string("all"), NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL },
+    { ngx_string("all-except-reserved"),
+      NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL_EXCEPT_RESERVED },
+    { ngx_null_string, 0 }
+};
+
+
 static ngx_command_t  ngx_http_core_commands[] = {

     { ngx_string("variables_hash_max_size"),
@@ -778,6 +786,13 @@

 #endif

+    { ngx_string("uri_normalization_percent_decode"),
+      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_CONF_TAKE1,
+      ngx_conf_set_enum_slot,
+      NGX_HTTP_SRV_CONF_OFFSET,
+      offsetof(ngx_http_core_srv_conf_t, uri_normalization_percent_decode),
+      &ngx_http_core_uri_normalization_percent_decode },
+
       ngx_null_command
 };

@@ -3462,6 +3477,7 @@
     cscf->ignore_invalid_headers = NGX_CONF_UNSET;
     cscf->merge_slashes = NGX_CONF_UNSET;
     cscf->underscores_in_headers = NGX_CONF_UNSET;
+    cscf->uri_normalization_percent_decode = NGX_CONF_UNSET;

     cscf->file_name = cf->conf_file->file.name.data;
     cscf->line = cf->conf_file->line;
@@ -3539,6 +3555,10 @@
         return NGX_CONF_ERROR;
     }

+    ngx_conf_merge_uint_value(conf->uri_normalization_percent_decode,
+                              prev->uri_normalization_percent_decode,
+                              NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL);
+
     return NGX_CONF_OK;
 }

diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_core_module.h
--- a/src/http/ngx_http_core_module.h   Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_core_module.h   Tue Feb 14 16:05:46 2023 -0500
@@ -60,6 +60,14 @@
 #define NGX_HTTP_SERVER_TOKENS_BUILD    2


+/*
+ * these enumeration values must correspond to the enumeration values for
+ * NGX_ESCAPE_SKIP_PREENCODED
+ */
+#define NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL                  0
+#define NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL_EXCEPT_RESERVED  1
+
+
 typedef struct ngx_http_location_tree_node_s  ngx_http_location_tree_node_t;
 typedef struct ngx_http_core_loc_conf_s  ngx_http_core_loc_conf_t;

@@ -200,6 +208,8 @@
     ngx_flag_t                  merge_slashes;
     ngx_flag_t                  underscores_in_headers;

+    ngx_uint_t                  uri_normalization_percent_decode;
+
     unsigned                    listen:1;
 #if (NGX_PCRE)
     unsigned                    captures:1;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_parse.c
--- a/src/http/ngx_http_parse.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_parse.c Tue Feb 14 16:05:46 2023 -0500
@@ -1245,9 +1245,11 @@


 ngx_int_t
-ngx_http_parse_complex_uri(ngx_http_request_t *r, ngx_uint_t merge_slashes)
+ngx_http_parse_complex_uri(ngx_http_request_t *r, ngx_uint_t merge_slashes,
+    ngx_uint_t uri_normalization_percent_decode)
 {
     u_char  c, ch, decoded, *p, *u;
+    uint32_t* decode;
     enum {
         sw_usual = 0,
         sw_slash,
@@ -1257,6 +1259,44 @@
         sw_quoted_second
     } state, quoted_state;

+    static uint32_t   all[] = {
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+
+                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+
+                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+
+                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+        0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+    };
+
+    static uint32_t   all_except_reserved[] = {
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+
+                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
+        0x53ff6025, /* 0101 0011 1111 1111  0110 0000 0010 0101 */
+
+                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
+        0xd7fffffe, /* 1101 0111 1111 1111  1111 1111 1111 1110 */
+
+                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+        0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
+    };
+
+    static uint32_t  *decode_map[] = { all, all_except_reserved };
+
 #if (NGX_SUPPRESS_WARN)
     decoded = '\0';
     quoted_state = sw_usual;
@@ -1267,6 +1307,7 @@
     u = r->uri.data;
     r->uri_ext = NULL;
     r->args_start = NULL;
+    decode = decode_map[uri_normalization_percent_decode];

     if (r->empty_path_in_uri) {
         *u++ = '/';
@@ -1520,6 +1561,14 @@
             if (ch >= '0' && ch <= '9') {
                 ch = (u_char) ((decoded << 4) + (ch - '0'));

+                if (!(decode[ch >> 5] & (1U << (ch & 0x1f)))) {
+                    state = sw_usual;
+                    ngx_memcpy(u, p - 3, 3);
+                    u += 3;
+                    ch = *p++;
+                    break;
+                }
+
                 if (ch == '%' || ch == '#') {
                     state = sw_usual;
                     *u++ = ch;
@@ -1538,6 +1587,14 @@
             if (c >= 'a' && c <= 'f') {
                 ch = (u_char) ((decoded << 4) + (c - 'a') + 10);

+                if (!(decode[ch >> 5] & (1U << (ch & 0x1f)))) {
+                    state = sw_usual;
+                    ngx_memcpy(u, p - 3, 3);
+                    u += 3;
+                    ch = *p++;
+                    break;
+                }
+
                 if (ch == '?') {
                     state = sw_usual;
                     *u++ = ch;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_request.c
--- a/src/http/ngx_http_request.c       Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_request.c       Tue Feb 14 16:05:46 2023 -0500
@@ -1234,7 +1234,9 @@

         cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);

-        if (ngx_http_parse_complex_uri(r, cscf->merge_slashes) != NGX_OK) {
+        if (ngx_http_parse_complex_uri(r, cscf->merge_slashes,
+                             cscf->uri_normalization_percent_decode) != NGX_OK)
+        {
             r->uri.len = 0;

             ngx_log_error(NGX_LOG_INFO, r->connection->log, 0,
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_script.c
--- a/src/http/ngx_http_script.c        Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_script.c        Tue Feb 14 16:05:46 2023 -0500
@@ -1044,11 +1044,14 @@
     ngx_http_script_engine_t       le;
     ngx_http_script_len_code_pt    lcode;
     ngx_http_script_regex_code_t  *code;
+    ngx_http_core_srv_conf_t      *cscf;

     code = (ngx_http_script_regex_code_t *) e->ip;

     r = e->request;

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
                    "http script regex: \"%V\"", &code->name);

@@ -1146,7 +1149,7 @@
         if (code->uri) {
             if (r->ncaptures && (r->quoted_uri || r->plus_in_uri)) {
                 e->buf.len += 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len,
-                                                 NGX_ESCAPE_ARGS);
+                      NGX_ESCAPE_ARGS, cscf->uri_normalization_percent_decode);
             }
         }

@@ -1339,9 +1342,12 @@
     ngx_uint_t                            n;
     ngx_http_request_t                   *r;
     ngx_http_script_copy_capture_code_t  *code;
+    ngx_http_core_srv_conf_t             *cscf;

     r = e->request;

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     code = (ngx_http_script_copy_capture_code_t *) e->ip;

     e->ip += sizeof(ngx_http_script_copy_capture_code_t);
@@ -1359,7 +1365,7 @@

             return cap[n + 1] - cap[n]
                    + 2 * ngx_escape_uri(NULL, &p[cap[n]], cap[n + 1] - cap[n],
-                                        NGX_ESCAPE_ARGS);
+                      NGX_ESCAPE_ARGS, cscf->uri_normalization_percent_decode);
         } else {
             return cap[n + 1] - cap[n];
         }
@@ -1377,9 +1383,12 @@
     ngx_uint_t                            n;
     ngx_http_request_t                   *r;
     ngx_http_script_copy_capture_code_t  *code;
+    ngx_http_core_srv_conf_t             *cscf;

     r = e->request;

+    cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
     code = (ngx_http_script_copy_capture_code_t *) e->ip;

     e->ip += sizeof(ngx_http_script_copy_capture_code_t);
@@ -1397,8 +1406,7 @@
             && (e->request->quoted_uri || e->request->plus_in_uri))
         {
             e->pos = (u_char *) ngx_escape_uri(pos, &p[cap[n]],
-                                               cap[n + 1] - cap[n],
-                                               NGX_ESCAPE_ARGS);
+ cap[n + 1] - cap[n], NGX_ESCAPE_ARGS, cscf->uri_normalization_percent_decode);
         } else {
             e->pos = ngx_copy(pos, &p[cap[n]], cap[n + 1] - cap[n]);
         }
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_special_response.c
--- a/src/http/ngx_http_special_response.c      Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_special_response.c      Tue Feb 14 16:05:46 2023 -0500
@@ -797,7 +797,8 @@
     len = r->headers_out.location->value.len;
     location = r->headers_out.location->value.data;

-    escape = 2 * ngx_escape_uri(NULL, location, len, NGX_ESCAPE_REFRESH);
+    escape = 2 * ngx_escape_uri(NULL, location, len, NGX_ESCAPE_REFRESH,
+                                NGX_ESCAPE_SKIP_PREENCODED_NONE);

     size = sizeof(ngx_http_msie_refresh_head) - 1
            + escape + len
@@ -841,7 +842,8 @@
         p = ngx_cpymem(p, location, len);

     } else {
-        p = (u_char *) ngx_escape_uri(p, location, len, NGX_ESCAPE_REFRESH);
+        p = (u_char *) ngx_escape_uri(p, location, len, NGX_ESCAPE_REFRESH,
+                                      NGX_ESCAPE_SKIP_PREENCODED_NONE);
     }

     b->last = ngx_cpymem(p, ngx_http_msie_refresh_tail,
diff -r cffaf3f2eec8 -r 129437ade41b src/mail/ngx_mail_auth_http_module.c
--- a/src/mail/ngx_mail_auth_http_module.c      Thu Feb 02 23:38:48 2023 +0300
+++ b/src/mail/ngx_mail_auth_http_module.c      Tue Feb 14 16:05:46 2023 -0500
@@ -1478,7 +1478,8 @@
     u_char     *p;
     uintptr_t   n;

-    n = ngx_escape_uri(NULL, text->data, text->len, NGX_ESCAPE_MAIL_AUTH);
+    n = ngx_escape_uri(NULL, text->data, text->len, NGX_ESCAPE_MAIL_AUTH,
+                       NGX_ESCAPE_SKIP_PREENCODED_NONE);

     if (n == 0) {
         *escaped = *text;
@@ -1492,7 +1493,8 @@
         return NGX_ERROR;
     }

-    (void) ngx_escape_uri(p, text->data, text->len, NGX_ESCAPE_MAIL_AUTH);
+    (void) ngx_escape_uri(p, text->data, text->len, NGX_ESCAPE_MAIL_AUTH,
+                          NGX_ESCAPE_SKIP_PREENCODED_NONE);

     escaped->data = p;

________________________________
 Confidentiality notice

This e-mail message and any attachment hereto contain confidential information which may be privileged and which is intended for the exclusive use of its addressee(s). If you receive this message in error, please inform sender immediately and destroy any copy thereof. Furthermore, any disclosure, distribution or copying of this message and/or any attachment hereto without the consent of the sender is strictly prohibited. Thank you.


More information about the nginx-devel mailing list