[njs] decodeURI() and decodeURIComponent() functions.

Igor Sysoev igor at sysoev.ru
Wed Aug 31 12:54:33 UTC 2016


details:   http://hg.nginx.org/njs/rev/323f00dc9879
branches:  
changeset: 161:323f00dc9879
user:      Igor Sysoev <igor at sysoev.ru>
date:      Tue Aug 30 12:05:46 2016 +0300
description:
decodeURI() and decodeURIComponent() functions.

diffstat:

 njs/njs_builtin.c        |    4 +
 njs/njs_generator.c      |    2 +
 njs/njs_lexer_keyword.c  |    2 +
 njs/njs_parser.c         |    2 +
 njs/njs_parser.h         |   10 +-
 njs/njs_string.c         |  206 +++++++++++++++++++++++++++++++++++++++++++++++
 njs/njs_string.h         |    4 +
 njs/njs_vm.c             |    1 +
 njs/njs_vm.h             |    5 +-
 njs/test/njs_unit_test.c |   33 +++++++
 10 files changed, 264 insertions(+), 5 deletions(-)

diffs (403 lines):

diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_builtin.c
--- a/njs/njs_builtin.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_builtin.c	Tue Aug 30 12:05:46 2016 +0300
@@ -90,6 +90,8 @@ njs_builtin_objects_create(njs_vm_t *vm)
         NULL,                         /* parseFloat         */
         NULL,                         /* encodeURI          */
         NULL,                         /* encodeURIComponent */
+        NULL,                         /* decodeURI          */
+        NULL,                         /* decodeURIComponent */
     };
 
     static const njs_function_init_t  native_functions[] = {
@@ -103,6 +105,8 @@ njs_builtin_objects_create(njs_vm_t *vm)
         { njs_number_parse_float,          { NJS_SKIP_ARG, NJS_STRING_ARG } },
         { njs_string_encode_uri,           { NJS_SKIP_ARG, NJS_STRING_ARG } },
         { njs_string_encode_uri_component, { NJS_SKIP_ARG, NJS_STRING_ARG } },
+        { njs_string_decode_uri,           { NJS_SKIP_ARG, NJS_STRING_ARG } },
+        { njs_string_decode_uri_component, { NJS_SKIP_ARG, NJS_STRING_ARG } },
     };
 
     static const njs_object_prop_t    null_proto_property = {
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_generator.c
--- a/njs/njs_generator.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_generator.c	Tue Aug 30 12:05:46 2016 +0300
@@ -302,6 +302,8 @@ njs_generator(njs_vm_t *vm, njs_parser_t
     case NJS_TOKEN_PARSE_FLOAT:
     case NJS_TOKEN_ENCODE_URI:
     case NJS_TOKEN_ENCODE_URI_COMPONENT:
+    case NJS_TOKEN_DECODE_URI:
+    case NJS_TOKEN_DECODE_URI_COMPONENT:
         return njs_generate_builtin_object(vm, parser, node);
 
     case NJS_TOKEN_FUNCTION:
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_lexer_keyword.c
--- a/njs/njs_lexer_keyword.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_lexer_keyword.c	Tue Aug 30 12:05:46 2016 +0300
@@ -95,6 +95,8 @@ static const njs_keyword_t  njs_keywords
     { nxt_string("parseFloat"),    NJS_TOKEN_PARSE_FLOAT, 0 },
     { nxt_string("encodeURI"),     NJS_TOKEN_ENCODE_URI, 0 },
     { nxt_string("encodeURIComponent"),  NJS_TOKEN_ENCODE_URI_COMPONENT, 0 },
+    { nxt_string("decodeURI"),     NJS_TOKEN_DECODE_URI, 0 },
+    { nxt_string("decodeURIComponent"),  NJS_TOKEN_DECODE_URI_COMPONENT, 0 },
 
     /* Reserved words. */
 
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_parser.c
--- a/njs/njs_parser.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_parser.c	Tue Aug 30 12:05:46 2016 +0300
@@ -1673,6 +1673,8 @@ njs_parser_terminal(njs_vm_t *vm, njs_pa
     case NJS_TOKEN_PARSE_FLOAT:
     case NJS_TOKEN_ENCODE_URI:
     case NJS_TOKEN_ENCODE_URI_COMPONENT:
+    case NJS_TOKEN_DECODE_URI:
+    case NJS_TOKEN_DECODE_URI_COMPONENT:
         return njs_parser_builtin_function(vm, parser, node);
 
     default:
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_parser.h
--- a/njs/njs_parser.h	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_parser.h	Tue Aug 30 12:05:46 2016 +0300
@@ -183,14 +183,16 @@ typedef enum {
     NJS_TOKEN_PARSE_FLOAT,
     NJS_TOKEN_ENCODE_URI,
     NJS_TOKEN_ENCODE_URI_COMPONENT,
+    NJS_TOKEN_DECODE_URI,
+    NJS_TOKEN_DECODE_URI_COMPONENT,
 
     NJS_TOKEN_RESERVED,
 } njs_token_t;
 
 
 typedef struct {
-    njs_token_t                     token:8;
-    njs_token_t                     prev_token:8;
+    njs_token_t                     token:16;
+    njs_token_t                     prev_token:16;
     uint8_t                         property;      /* 1 bit */
     uint32_t                        key_hash;
 
@@ -222,8 +224,8 @@ typedef enum {
 typedef struct njs_parser_node_s    njs_parser_node_t;
 
 struct njs_parser_node_s {
-    njs_token_t                     token:8;
-    njs_variable_node_state_t       state:8;    /* 2 bits */
+    njs_token_t                     token:16;
+    njs_variable_node_state_t       state:2;    /* 2 bits */
     uint8_t                         ctor:1;     /* 1 bit  */
     uint8_t                         temporary;  /* 1 bit  */
     uint32_t                        token_line;
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_string.c
--- a/njs/njs_string.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_string.c	Tue Aug 30 12:05:46 2016 +0300
@@ -47,6 +47,8 @@ static njs_ret_t njs_string_split_part_a
     u_char *start, size_t size, nxt_uint_t utf8);
 static njs_ret_t njs_string_encode(njs_vm_t *vm, njs_value_t *value,
     const uint32_t *escape);
+static njs_ret_t njs_string_decode(njs_vm_t *vm, njs_value_t *value,
+    const uint32_t *reserve);
 
 
 njs_ret_t
@@ -2238,6 +2240,210 @@ njs_string_encode(njs_vm_t *vm, njs_valu
 }
 
 
+/*
+ * decodeURI(string)
+ */
+
+njs_ret_t
+njs_string_decode_uri(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
+    njs_index_t unused)
+{
+    static const uint32_t  reserve[] = {
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
+        0xac009858,  /* 1010 1100 0000 0000  1001 1000 0101 1000 */
+
+                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
+        0x00000001,  /* 0000 0000 0000 0000  0000 0000 0000 0001 */
+
+                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+    };
+
+    if (nargs > 1) {
+        return njs_string_decode(vm, &args[1], reserve);
+    }
+
+    vm->retval = njs_string_void;
+
+    return NXT_OK;
+}
+
+
+/*
+ * decodeURIComponent(string)
+ */
+
+njs_ret_t
+njs_string_decode_uri_component(njs_vm_t *vm, njs_value_t *args,
+    nxt_uint_t nargs, njs_index_t unused)
+{
+    static const uint32_t  reserve[] = {
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+    };
+
+    if (nargs > 1) {
+        return njs_string_decode(vm, &args[1], reserve);
+    }
+
+    vm->retval = njs_string_void;
+
+    return NXT_OK;
+}
+
+
+static njs_ret_t
+njs_string_decode(njs_vm_t *vm, njs_value_t *value, const uint32_t *reserve)
+{
+    int8_t               d0, d1;
+    u_char               byte, *start, *src, *dst;
+    size_t               n, size;
+    ssize_t              length;
+    nxt_bool_t           utf8;
+    njs_string_prop_t    string;
+
+    static const int8_t  hex[256]
+        nxt_aligned(32) =
+    {
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
+        -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    };
+
+    nxt_prefetch(&hex['0']);
+    nxt_prefetch(reserve);
+
+    (void) njs_string_prop(&string, value);
+
+    src = string.start;
+    n = 0;
+
+    for (size = string.size; size != 0; size--) {
+        byte = *src++;
+
+        if (byte == '%') {
+            if (size < 3) {
+                goto uri_error;
+            }
+
+            d0 = hex[*src++];
+            if (d0 < 0) {
+                goto uri_error;
+            }
+
+            d1 = hex[*src++];
+            if (d1 < 0) {
+                goto uri_error;
+            }
+
+            byte = (d0 << 4) + d1;
+
+            if ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) == 0) {
+                n += 2;
+            }
+        }
+    }
+
+    if (n == 0) {
+        /* GC: retain src. */
+        vm->retval = *value;
+        return NXT_OK;
+    }
+
+    n = string.size - n;
+
+    start = njs_string_alloc(vm, &vm->retval, n, n);
+    if (nxt_slow_path(start == NULL)) {
+        return NXT_ERROR;
+    }
+
+    utf8 = 0;
+    dst = start;
+    size = string.size;
+    src = string.start;
+
+    do {
+        byte = *src++;
+
+        if (byte == '%') {
+            d0 = hex[*src++];
+            d1 = hex[*src++];
+            byte = (d0 << 4) + d1;
+
+            utf8 |= (byte >= 0x80);
+
+            if ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0) {
+                size -= 2;
+                *dst++ = '%';
+                *dst++ = src[-2];
+                byte = src[-1];
+            }
+        }
+
+        *dst++ = byte;
+
+        size--;
+
+    } while (size != 0);
+
+    if (utf8) {
+        length = nxt_utf8_length(start, n);
+
+        if (length < 0) {
+            length = 0;
+        }
+
+        if (vm->retval.short_string.size != NJS_STRING_LONG) {
+            vm->retval.short_string.length = length;
+
+        } else {
+            vm->retval.data.u.string->length = length;
+        }
+    }
+
+    return NXT_OK;
+
+uri_error:
+
+    vm->exception = &njs_exception_uri_error;
+
+    return NXT_ERROR;
+}
+
+
 static nxt_int_t
 njs_values_hash_test(nxt_lvlhsh_query_t *lhq, void *data)
 {
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_string.h
--- a/njs/njs_string.h	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_string.h	Tue Aug 30 12:05:46 2016 +0300
@@ -108,6 +108,10 @@ njs_ret_t njs_string_encode_uri(njs_vm_t
     nxt_uint_t nargs, njs_index_t unused);
 njs_ret_t njs_string_encode_uri_component(njs_vm_t *vm, njs_value_t *args,
     nxt_uint_t nargs, njs_index_t unused);
+njs_ret_t njs_string_decode_uri(njs_vm_t *vm, njs_value_t *args,
+    nxt_uint_t nargs, njs_index_t unused);
+njs_ret_t njs_string_decode_uri_component(njs_vm_t *vm, njs_value_t *args,
+    nxt_uint_t nargs, njs_index_t unused);
 
 njs_index_t njs_value_index(njs_vm_t *vm, njs_parser_t *parser,
     const njs_value_t *src);
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_vm.c
--- a/njs/njs_vm.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_vm.c	Tue Aug 30 12:05:46 2016 +0300
@@ -141,6 +141,7 @@ const njs_value_t  njs_exception_syntax_
 const njs_value_t  njs_exception_reference_error = njs_string("ReferenceError");
 const njs_value_t  njs_exception_type_error =      njs_string("TypeError");
 const njs_value_t  njs_exception_range_error =     njs_string("RangeError");
+const njs_value_t  njs_exception_uri_error =       njs_string("URIError");
 const njs_value_t  njs_exception_memory_error =    njs_string("MemoryError");
 const njs_value_t  njs_exception_internal_error =  njs_string("InternalError");
 
diff -r d63ecb57f164 -r 323f00dc9879 njs/njs_vm.h
--- a/njs/njs_vm.h	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/njs_vm.h	Tue Aug 30 12:05:46 2016 +0300
@@ -712,7 +712,9 @@ enum njs_function_e {
     NJS_FUNCTION_PARSE_FLOAT,
     NJS_FUNCTION_STRING_ENCODE_URI,
     NJS_FUNCTION_STRING_ENCODE_URI_COMPONENT,
-#define NJS_FUNCTION_MAX       (NJS_FUNCTION_STRING_ENCODE_URI_COMPONENT + 1)
+    NJS_FUNCTION_STRING_DECODE_URI,
+    NJS_FUNCTION_STRING_DECODE_URI_COMPONENT,
+#define NJS_FUNCTION_MAX       (NJS_FUNCTION_STRING_DECODE_URI_COMPONENT + 1)
 };
 
 
@@ -1005,6 +1007,7 @@ extern const njs_value_t  njs_exception_
 extern const njs_value_t  njs_exception_reference_error;
 extern const njs_value_t  njs_exception_type_error;
 extern const njs_value_t  njs_exception_range_error;
+extern const njs_value_t  njs_exception_uri_error;
 extern const njs_value_t  njs_exception_memory_error;
 extern const njs_value_t  njs_exception_internal_error;
 
diff -r d63ecb57f164 -r 323f00dc9879 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Tue Aug 30 12:02:31 2016 +0300
+++ b/njs/test/njs_unit_test.c	Tue Aug 30 12:05:46 2016 +0300
@@ -3309,6 +3309,39 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("encodeURIComponent('~}|{`_^]\\\\[@?>=<;:/.-,+*)(\\\'&%$#\"! ')"),
       nxt_string("~%7D%7C%7B%60_%5E%5D%5C%5B%40%3F%3E%3D%3C%3B%3A%2F.-%2C%2B*)('%26%25%24%23%22!%20")},
 
+    { nxt_string("decodeURI()"),
+      nxt_string("undefined")},
+
+    { nxt_string("decodeURI('%QQ')"),
+      nxt_string("URIError")},
+
+    { nxt_string("decodeURI('%')"),
+      nxt_string("URIError")},
+
+    { nxt_string("decodeURI('%0')"),
+      nxt_string("URIError")},
+
+    { nxt_string("decodeURI('%00')"),
+      nxt_string("\0")},
+
+    { nxt_string("decodeURI('%3012%D0%B0%D0%B1%D0%B2')"),
+      nxt_string("012абв")},
+
+    { nxt_string("decodeURI('%7e%7d%7c%7b%60%5f%5e%5d%5c%5b%40%3f%3e%3d%3c%3b%3a%2f%2e%2c%2b%2a%29%28%27%26%25%24%23%22%21%20')"),
+      nxt_string("~}|{`_^]\\[%40%3f>%3d<%3b%3a%2f.%2c%2b*)('%26%%24%23\"! ")},
+
+    { nxt_string("decodeURIComponent('%7e%7d%7c%7b%60%5f%5e%5d%5c%5b%40%3f%3e%3d%3c%3b%3a%2f%2e%2c%2b%2a%29%28%27%26%25%24%23%22%21%20')"),
+      nxt_string("~}|{`_^]\\[@?>=<;:/.,+*)('&%$#\"! ")},
+
+    { nxt_string("decodeURI('%41%42%43').length"),
+      nxt_string("3")},
+
+    { nxt_string("decodeURI('%D0%B0%D0%B1%D0%B2').length"),
+      nxt_string("3")},
+
+    { nxt_string("decodeURI('%80%81%82').length"),
+      nxt_string("3")},
+
     /* Functions. */
 
     { nxt_string("return"),


More information about the nginx-devel mailing list