[PATCH] Core: escape chars not allowed in URI per RFC 3986

Maxim Dounin mdounin at mdounin.ru
Sun Apr 18 22:13:57 MSD 2010


# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1271614181 -14400
# Node ID d12d04e37d354fb51e5fac0b03633521a86a3b90
# Parent  c47f46ea4c3eda5666f6e1b87d88ae15a7395a8c
Core: escape chars not allowed in URI per RFC 3986.

Per RFC 3986 only the following chars are allowed in URI unescaped:

unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
              / "*" / "+" / "," / ";" / "="

The patch adds escaping of the following chars in all URI parts: """, "<",
">", "\", "^", "`", "{", "|", "}".

Not escaping ">" known to cause problems with MS Exchange at least, see
http://nginx.org/pipermail/nginx-ru/2010-January/031261.html.

diff --git a/src/core/ngx_string.c b/src/core/ngx_string.c
--- a/src/core/ngx_string.c
+++ b/src/core/ngx_string.c
@@ -1258,19 +1258,34 @@ ngx_escape_uri(u_char *dst, u_char *src,
     uint32_t       *escape;
     static u_char   hex[] = "0123456789abcdef";
 
-                    /* " ", "#", "%", "?", %00-%1F, %7F-%FF */
+    /*
+     * per RFC 3986 only allowed chars in URI:
+     *
+     * unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+     * gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+     * sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+     *               / "*" / "+" / "," / ";" / "="
+     *
+     * (and "%" for escaping), i.e. the following should be escaped in all
+     * parts:
+     *
+     * %00-%1F, %7F-%FF, " ", """, "%", "<", ">", "\", "^", "`", "{", "|", "}"
+     *
+     */
+
+                    /* "#", "?" */
 
     static uint32_t   uri[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x80000029, /* 1000 0000 0000 0000  0000 0000 0010 1001 */
+        0xd000002d, /* 1101 0000 0000 0000  0000 0000 0010 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1278,19 +1293,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", "#", "%", "&", "+", "?", %00-%1F, %7F-%FF */
+                    /* "#", "&", "+", "?" */
 
     static uint32_t   args[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x80000869, /* 1000 0000 0000 0000  0000 1000 0110 1001 */
+        0xd000086d, /* 1101 0000 0000 0000  0000 1000 0110 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1298,19 +1313,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", "#", """, "%", "'", %00-%1F, %7F-%FF */
+                    /* "#", "'" */
 
     static uint32_t   html[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x000000ad, /* 0000 0000 0000 0000  0000 0000 1010 1101 */
+        0x500000ad, /* 0101 0000 0000 0000  0000 0000 1010 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1318,19 +1333,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", """, "%", "'", %00-%1F, %7F-%FF */
+                    /* "'" */
 
     static uint32_t   refresh[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x00000085, /* 0000 0000 0000 0000  0000 0000 1000 0101 */
+        0x50000085, /* 0101 0000 0000 0000  0000 0000 1000 0101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */



More information about the nginx-devel mailing list