[nginx] Core: escaping of chars not allowed in URIs per RFC 3986.

Maxim Dounin mdounin at mdounin.ru
Mon Jun 28 18:36:25 UTC 2021


details:   https://hg.nginx.org/nginx/rev/dfd8dfb436e5
branches:  
changeset: 7880:dfd8dfb436e5
user:      Maxim Dounin <mdounin at mdounin.ru>
date:      Mon Jun 28 18:01:11 2021 +0300
description:
Core: escaping of chars not allowed in URIs per RFC 3986.

Per RFC 3986 only the following characters are allowed in URIs unescaped:

unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
              / "*" / "+" / "," / ";" / "="

And "%" can appear as a part of escaping itself.  The following
characters are not allowed and need to be escaped: %00-%1F, %7F-%FF,
" ", """, "<", ">", "\", "^", "`", "{", "|", "}".

Not escaping ">" is known to cause problems at least with MS Exchange (see
http://nginx.org/pipermail/nginx-ru/2010-January/031261.html) and in
Tomcat (ticket #2191).

The patch adds escaping of the following chars in all URI parts: """, "<",
">", "\", "^", "`", "{", "|", "}".  Note that comments are mostly preserved
to outline important characters being escaped.

diffstat:

 src/core/ngx_string.c |  45 +++++++++++++++++++++++++++++----------------
 1 files changed, 29 insertions(+), 16 deletions(-)

diffs (112 lines):

diff -r b7407334c60d -r dfd8dfb436e5 src/core/ngx_string.c
--- a/src/core/ngx_string.c	Mon Jun 28 18:01:09 2021 +0300
+++ b/src/core/ngx_string.c	Mon Jun 28 18:01:11 2021 +0300
@@ -1493,19 +1493,32 @@ ngx_escape_uri(u_char *dst, u_char *src,
     uint32_t       *escape;
     static u_char   hex[] = "0123456789ABCDEF";
 
-                    /* " ", "#", "%", "?", %00-%1F, %7F-%FF */
+    /*
+     * Per RFC 3986 only the following chars are allowed in URIs unescaped:
+     *
+     * unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+     * gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+     * sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+     *               / "*" / "+" / "," / ";" / "="
+     *
+     * And "%" can appear as a part of escaping itself.  The following
+     * characters are not allowed and need to be escaped: %00-%1F, %7F-%FF,
+     * " ", """, "<", ">", "\", "^", "`", "{", "|", "}".
+     */
+
+                    /* " ", "#", "%", "?", not allowed */
 
     static uint32_t   uri[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x80000029, /* 1000 0000 0000 0000  0000 0000 0010 1001 */
+        0xd000002d, /* 1101 0000 0000 0000  0000 0000 0010 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1513,19 +1526,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", "#", "%", "&", "+", ";", "?", %00-%1F, %7F-%FF */
+                    /* " ", "#", "%", "&", "+", ";", "?", not allowed */
 
     static uint32_t   args[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x88000869, /* 1000 1000 0000 0000  0000 1000 0110 1001 */
+        0xd800086d, /* 1101 1000 0000 0000  0000 1000 0110 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1553,19 +1566,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", "#", """, "%", "'", %00-%1F, %7F-%FF */
+                    /* " ", "#", """, "%", "'", not allowed */
 
     static uint32_t   html[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x000000ad, /* 0000 0000 0000 0000  0000 0000 1010 1101 */
+        0x500000ad, /* 0101 0000 0000 0000  0000 0000 1010 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1573,19 +1586,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", """, "'", %00-%1F, %7F-%FF */
+                    /* " ", """, "'", not allowed */
 
     static uint32_t   refresh[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x00000085, /* 0000 0000 0000 0000  0000 0000 1000 0101 */
+        0x50000085, /* 0101 0000 0000 0000  0000 0000 1000 0101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xd8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */


More information about the nginx-devel mailing list