[njs] Fixed encodeURI() and decodeURI() according to the spec.

Dmitry Volyntsev xeioex at nginx.com
Tue Feb 18 15:56:40 UTC 2020


details:   https://hg.nginx.org/njs/rev/c60911765952
branches:  
changeset: 1332:c60911765952
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Tue Feb 18 18:55:50 2020 +0300
description:
Fixed encodeURI() and decodeURI() according to the spec.

diffstat:

 src/njs_builtin.c         |    8 +-
 src/njs_json.c            |    8 +-
 src/njs_parser_terminal.c |   12 +-
 src/njs_string.c          |  509 +++++++++++++++++++++++++--------------------
 src/njs_string.h          |   14 +-
 src/test/njs_unit_test.c  |   54 +++-
 6 files changed, 342 insertions(+), 263 deletions(-)

diffs (840 lines):

diff -r 9b70f038abfa -r c60911765952 src/njs_builtin.c
--- a/src/njs_builtin.c	Tue Feb 18 18:42:37 2020 +0300
+++ b/src/njs_builtin.c	Tue Feb 18 18:55:50 2020 +0300
@@ -1018,7 +1018,7 @@ static const njs_object_prop_t  njs_glob
     {
         .type = NJS_PROPERTY,
         .name = njs_string("encodeURI"),
-        .value = njs_native_function(njs_string_encode_uri, 1),
+        .value = njs_native_function2(njs_string_encode_uri, 1, 0),
         .writable = 1,
         .configurable = 1,
     },
@@ -1026,7 +1026,7 @@ static const njs_object_prop_t  njs_glob
     {
         .type = NJS_PROPERTY,
         .name = njs_long_string("encodeURIComponent"),
-        .value = njs_native_function(njs_string_encode_uri_component, 1),
+        .value = njs_native_function2(njs_string_encode_uri, 1, 1),
         .writable = 1,
         .configurable = 1,
     },
@@ -1034,7 +1034,7 @@ static const njs_object_prop_t  njs_glob
     {
         .type = NJS_PROPERTY,
         .name = njs_string("decodeURI"),
-        .value = njs_native_function(njs_string_decode_uri, 1),
+        .value = njs_native_function2(njs_string_decode_uri, 1, 0),
         .writable = 1,
         .configurable = 1,
     },
@@ -1042,7 +1042,7 @@ static const njs_object_prop_t  njs_glob
     {
         .type = NJS_PROPERTY,
         .name = njs_long_string("decodeURIComponent"),
-        .value = njs_native_function(njs_string_decode_uri_component, 1),
+        .value = njs_native_function2(njs_string_decode_uri, 1, 1),
         .writable = 1,
         .configurable = 1,
     },
diff -r 9b70f038abfa -r c60911765952 src/njs_json.c
--- a/src/njs_json.c	Tue Feb 18 18:42:37 2020 +0300
+++ b/src/njs_json.c	Tue Feb 18 18:55:50 2020 +0300
@@ -684,9 +684,7 @@ njs_json_parse_string(njs_json_parse_ctx
             utf = njs_json_unicode(p);
             p += 4;
 
-            if (utf >= 0xd800 && utf <= 0xdfff) {
-
-                /* Surrogate pair. */
+            if (njs_surrogate_any(utf)) {
 
                 if (utf > 0xdbff || p[0] != '\\' || p[1] != 'u') {
                     s = njs_utf8_encode(s, NJS_UTF8_REPLACEMENT);
@@ -698,10 +696,10 @@ njs_json_parse_string(njs_json_parse_ctx
                 utf_low = njs_json_unicode(p);
                 p += 4;
 
-                if (njs_fast_path(utf_low >= 0xdc00 && utf_low <= 0xdfff)) {
+                if (njs_fast_path(njs_surrogate_trailing(utf_low))) {
                     utf = njs_string_surrogate_pair(utf, utf_low);
 
-                } else if (utf_low >= 0xd800 && utf_low <= 0xdbff) {
+                } else if (njs_surrogate_leading(utf_low)) {
                     utf = NJS_UTF8_REPLACEMENT;
                     s = njs_utf8_encode(s, NJS_UTF8_REPLACEMENT);
 
diff -r 9b70f038abfa -r c60911765952 src/njs_parser_terminal.c
--- a/src/njs_parser_terminal.c	Tue Feb 18 18:42:37 2020 +0300
+++ b/src/njs_parser_terminal.c	Tue Feb 18 18:55:50 2020 +0300
@@ -1111,10 +1111,10 @@ njs_parser_escape_string_create(njs_vm_t
         }
 
         if (cp_pair != 0) {
-            if (njs_fast_path(cp >= 0xdc00 && cp <= 0xdfff)) {
+            if (njs_fast_path(njs_surrogate_trailing(cp))) {
                 cp = njs_string_surrogate_pair(cp_pair, cp);
 
-            } else if (njs_slow_path(cp >= 0xd800 && cp <= 0xdbff)) {
+            } else if (njs_slow_path(njs_surrogate_leading(cp))) {
                 cp = NJS_UTF8_REPLACEMENT;
 
                 dst = njs_utf8_encode(dst, (uint32_t) cp);
@@ -1125,7 +1125,7 @@ njs_parser_escape_string_create(njs_vm_t
 
             cp_pair = 0;
 
-        } else if (cp >= 0xd800 && cp <= 0xdfff) {
+        } else if (njs_surrogate_any(cp)) {
             if (cp <= 0xdbff && src[0] == '\\' && src[1] == 'u') {
                 cp_pair = cp;
                 continue;
@@ -1256,10 +1256,10 @@ njs_parser_escape_string_calc_length(njs
         }
 
         if (cp_pair != 0) {
-            if (njs_fast_path(cp >= 0xdc00 && cp <= 0xdfff)) {
+            if (njs_fast_path(njs_surrogate_trailing(cp))) {
                 cp = njs_string_surrogate_pair(cp_pair, cp);
 
-            } else if (njs_slow_path(cp >= 0xd800 && cp <= 0xdbff)) {
+            } else if (njs_slow_path(njs_surrogate_leading(cp))) {
                 cp = NJS_UTF8_REPLACEMENT;
 
                 size += njs_utf8_size(cp);
@@ -1272,7 +1272,7 @@ njs_parser_escape_string_calc_length(njs
 
             cp_pair = 0;
 
-        } else if (cp >= 0xd800 && cp <= 0xdfff) {
+        } else if (njs_surrogate_any(cp)) {
             if (cp <= 0xdbff && src[0] == '\\' && src[1] == 'u') {
                 cp_pair = cp;
                 continue;
diff -r 9b70f038abfa -r c60911765952 src/njs_string.c
--- a/src/njs_string.c	Tue Feb 18 18:42:37 2020 +0300
+++ b/src/njs_string.c	Tue Feb 18 18:55:50 2020 +0300
@@ -91,10 +91,6 @@ static njs_int_t njs_string_replace_subs
 static njs_int_t njs_string_replace_join(njs_vm_t *vm, njs_string_replace_t *r);
 static void njs_string_replacement_copy(njs_string_replace_part_t *string,
     const njs_value_t *value);
-static njs_int_t njs_string_encode(njs_vm_t *vm, njs_value_t *value,
-    const uint32_t *escape);
-static njs_int_t njs_string_decode(njs_vm_t *vm, njs_value_t *value,
-    const uint32_t *reserve);
 
 
 #define njs_base64_encoded_length(len)  (((len + 2) / 3) * 4)
@@ -4698,14 +4694,55 @@ const njs_object_init_t  njs_string_inst
 };
 
 
+njs_inline njs_bool_t
+njs_need_escape(const uint32_t *escape, uint32_t byte)
+{
+    return ((escape[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0);
+}
+
+
+njs_inline u_char *
+njs_string_encode(const uint32_t *escape, size_t size, const u_char *src,
+    u_char *dst)
+{
+    uint8_t              byte;
+    static const u_char  hex[16] = "0123456789ABCDEF";
+
+    do {
+        byte = *src++;
+
+        if (njs_need_escape(escape, byte)) {
+            *dst++ = '%';
+            *dst++ = hex[byte >> 4];
+            *dst++ = hex[byte & 0xf];
+
+        } else {
+            *dst++ = byte;
+        }
+
+        size--;
+
+    } while (size != 0);
+
+    return dst;
+}
+
+
 njs_int_t
 njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
-    njs_index_t unused)
+    njs_index_t component)
 {
-    njs_int_t    ret;
-    njs_value_t  *value;
-
-    static const uint32_t  escape[] = {
+    u_char             byte, *dst;
+    uint64_t           size;
+    uint32_t           cp, cp_low;
+    njs_int_t          ret;
+    njs_value_t        *value;
+    const u_char       *src, *end;
+    const uint32_t     *escape;
+    njs_string_prop_t  string;
+    u_char             encode[4];
+
+    static const uint32_t  escape_uri[] = {
         0xffffffff,  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                      /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
@@ -4723,33 +4760,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_
         0xffffffff,  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-    if (nargs < 2) {
-        njs_set_undefined(&vm->retval);
-
-        return NJS_OK;
-    }
-
-    value = njs_argument(args, 1);
-
-    if (!njs_is_string(value)) {
-        ret = njs_value_to_string(vm, value, value);
-        if (njs_slow_path(ret != NJS_OK)) {
-            return ret;
-        }
-    }
-
-    return njs_string_encode(vm, value, escape);
-}
-
-
-njs_int_t
-njs_string_encode_uri_component(njs_vm_t *vm, njs_value_t *args,
-    njs_uint_t nargs, njs_index_t unused)
-{
-    njs_int_t    ret;
-    njs_value_t  *value;
-
-    static const uint32_t  escape[] = {
+    static const uint32_t  escape_uri_component[] = {
         0xffffffff,  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                      /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
@@ -4768,91 +4779,172 @@ njs_string_encode_uri_component(njs_vm_t
     };
 
     if (nargs < 2) {
-        njs_set_undefined(&vm->retval);
-
+        vm->retval = njs_string_undefined;
         return NJS_OK;
     }
 
     value = njs_argument(args, 1);
-
-    if (!njs_is_string(value)) {
-        ret = njs_value_to_string(vm, value, value);
-        if (njs_slow_path(ret != NJS_OK)) {
-            return ret;
-        }
-    }
-
-    return njs_string_encode(vm, value, escape);
-}
-
-
-static njs_int_t
-njs_string_encode(njs_vm_t *vm, njs_value_t *value, const uint32_t *escape)
-{
-    u_char               byte, *src, *dst;
-    size_t               n, size;
-    njs_str_t            string;
-    static const u_char  hex[16] = "0123456789ABCDEF";
+    ret = njs_value_to_string(vm, value, value);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return ret;
+    }
+
+    escape = (component) ? escape_uri_component : escape_uri;
 
     njs_prefetch(escape);
 
-    njs_string_get(value, &string);
-
+    (void) njs_string_prop(&string, value);
+
+    size = 0;
     src = string.start;
-    n = 0;
-
-    for (size = string.length; size != 0; size--) {
-        byte = *src++;
-
-        if ((escape[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0) {
-            n += 2;
+    end = src + string.size;
+
+    if (string.length == 0 || string.length == string.size) {
+        /* Byte or ASCII string. */
+
+        while (src < end) {
+            byte = *src++;
+            size += njs_need_escape(escape, byte) ? 3 : 1;
         }
-    }
-
-    if (n == 0) {
+
+    } else {
+        /* UTF-8 string. */
+
+        while (src < end) {
+            cp = njs_utf8_decode(&src, end);
+
+            if (cp < 0x80 && !njs_need_escape(escape, cp)) {
+                size++;
+                continue;
+            }
+
+            if (njs_slow_path(njs_surrogate_any(cp))) {
+                if (src == end) {
+                    goto uri_error;
+                }
+
+                if (njs_surrogate_leading(cp)) {
+                    cp_low = njs_utf8_decode(&src, end);
+
+                    if (njs_slow_path(!njs_surrogate_trailing(cp_low))) {
+                        goto uri_error;
+                    }
+
+                    cp = njs_string_surrogate_pair(cp, cp_low);
+                    size += njs_utf8_size(cp) * 3;
+                    continue;
+                }
+
+                goto uri_error;
+            }
+
+            size += njs_utf8_size(cp) * 3;
+        }
+    }
+
+    if (size == 0) {
         /* GC: retain src. */
         vm->retval = *value;
         return NJS_OK;
     }
 
-    size = string.length + n;
-
     dst = njs_string_alloc(vm, &vm->retval, size, size);
     if (njs_slow_path(dst == NULL)) {
         return NJS_ERROR;
     }
 
-    size = string.length;
     src = string.start;
 
-    do {
-        byte = *src++;
-
-        if ((escape[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0) {
-            *dst++ = '%';
-            *dst++ = hex[byte >> 4];
-            *dst++ = hex[byte & 0xf];
-
-        } else {
-            *dst++ = byte;
+    if (string.length == 0 || string.length == string.size) {
+        /* Byte or ASCII string. */
+        (void) njs_string_encode(escape, string.size, src, dst);
+        return NJS_OK;
+    }
+
+    /* UTF-8 string. */
+
+    while (src < end) {
+        cp = njs_utf8_decode(&src, end);
+
+        if (njs_slow_path(njs_surrogate_leading(cp))) {
+            cp_low = njs_utf8_decode(&src, end);
+            cp = njs_string_surrogate_pair(cp, cp_low);
         }
 
-        size--;
-
-    } while (size != 0);
+        njs_utf8_encode(encode, cp);
+
+        dst = njs_string_encode(escape, njs_utf8_size(cp), encode, dst);
+    }
 
     return NJS_OK;
+
+uri_error:
+
+    njs_uri_error(vm, "malformed URI");
+
+    return NJS_ERROR;
+}
+
+
+njs_inline uint32_t
+njs_string_decode_uri_cp(const int8_t *hex, const u_char **start,
+    const u_char *end, njs_bool_t expect_percent)
+{
+    int8_t        d0, d1;
+    uint32_t      cp;
+    const u_char  *p;
+
+    cp = njs_utf8_decode(start, end);
+    if (njs_fast_path(cp != '%')) {
+        return expect_percent ? 0xFFFFFFFF: cp;
+    }
+
+    p = *start;
+
+    if (njs_slow_path((p + 1) >= end)) {
+        return 0xFFFFFFFF;
+    }
+
+    d0 = hex[*p++];
+    if (njs_slow_path(d0 < 0)) {
+        return 0xFFFFFFFF;
+    }
+
+    d1 = hex[*p++];
+    if (njs_slow_path(d1 < 0)) {
+        return 0xFFFFFFFF;
+    }
+
+    *start += 2;
+    return (d0 << 4) + d1;
+}
+
+
+njs_inline njs_bool_t
+njs_reserved(const uint32_t *reserve, uint32_t byte)
+{
+    return ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0);
 }
 
 
 njs_int_t
 njs_string_decode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
-    njs_index_t unused)
+    njs_index_t component)
 {
-    njs_int_t    ret;
-    njs_value_t  *value;
-
-    static const uint32_t  reserve[] = {
+    u_char             *dst;
+    int64_t            size, length;
+    uint32_t           cp;
+    njs_int_t          ret;
+    njs_chb_t          chain;
+    njs_uint_t         i, n;
+    njs_bool_t         percent;
+    njs_value_t        *value;
+    const u_char       *src, *p, *end;
+    const uint32_t     *reserve;
+    njs_string_prop_t  string;
+    u_char             encode[4];
+
+    static const uint32_t  reserve_uri[] = {
         0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
 
                      /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
@@ -4870,33 +4962,7 @@ njs_string_decode_uri(njs_vm_t *vm, njs_
         0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
     };
 
-    if (nargs < 2) {
-        njs_set_undefined(&vm->retval);
-
-        return NJS_OK;
-    }
-
-    value = njs_argument(args, 1);
-
-    if (njs_slow_path(!njs_is_string(value))) {
-        ret = njs_value_to_string(vm, value, value);
-        if (njs_slow_path(ret != NJS_OK)) {
-            return ret;
-        }
-    }
-
-    return njs_string_decode(vm, value, reserve);
-}
-
-
-njs_int_t
-njs_string_decode_uri_component(njs_vm_t *vm, njs_value_t *args,
-    njs_uint_t nargs, njs_index_t unused)
-{
-    njs_int_t    ret;
-    njs_value_t  *value;
-
-    static const uint32_t  reserve[] = {
+    static const uint32_t  reserve_uri_component[] = {
         0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
 
                      /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
@@ -4914,35 +4980,6 @@ njs_string_decode_uri_component(njs_vm_t
         0x00000000,  /* 0000 0000 0000 0000  0000 0000 0000 0000 */
     };
 
-    if (nargs < 2) {
-        njs_set_undefined(&vm->retval);
-
-        return NJS_OK;
-    }
-
-    value = njs_argument(args, 1);
-
-    if (njs_slow_path(!njs_is_string(value))) {
-        ret = njs_value_to_string(vm, value, value);
-        if (njs_slow_path(ret != NJS_OK)) {
-            return ret;
-        }
-    }
-
-    return njs_string_decode(vm, &args[1], reserve);
-}
-
-
-static njs_int_t
-njs_string_decode(njs_vm_t *vm, njs_value_t *value, const uint32_t *reserve)
-{
-    int8_t               d0, d1;
-    u_char               byte, *start, *src, *dst;
-    size_t               n;
-    ssize_t              size, length;
-    njs_str_t            string;
-    njs_bool_t           utf8;
-
     static const int8_t  hex[256]
         njs_aligned(32) =
     {
@@ -4964,104 +5001,126 @@ njs_string_decode(njs_vm_t *vm, njs_valu
         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     };
 
-    njs_prefetch(&hex['0']);
+    if (nargs < 2) {
+        vm->retval = njs_string_undefined;
+        return NJS_OK;
+    }
+
+    value = njs_argument(args, 1);
+    ret = njs_value_to_string(vm, value, value);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return ret;
+    }
+
+    reserve = component ? reserve_uri_component : reserve_uri;
+
     njs_prefetch(reserve);
-
-    njs_string_get(value, &string);
-
+    njs_prefetch(&hex['0']);
+
+    (void) njs_string_prop(&string, value);
+
+    length = 0;
     src = string.start;
-    n = 0;
-
-    for (size = string.length; size != 0; size--) {
-        byte = *src++;
-
-        if (byte == '%') {
-            size -= 2;
-
-            if (size <= 0) {
+    end = string.start + string.size;
+
+    njs_chb_init(&chain, vm->mem_pool);
+
+    while (src < end) {
+        percent = (src[0] == '%');
+        cp = njs_string_decode_uri_cp(hex, &src, end, 0);
+        if (njs_slow_path(cp == 0xFFFFFFFF)) {
+            goto uri_error;
+        }
+
+        if (!percent) {
+            length += 1;
+            dst = njs_chb_reserve(&chain, 4);
+            if (dst != NULL) {
+                njs_utf8_encode(dst, cp);
+                njs_chb_written(&chain, njs_utf8_size(cp));
+            }
+
+            continue;
+        }
+
+        if (cp < 0x80) {
+            if (njs_reserved(reserve, cp)) {
+                length += 3;
+                njs_chb_append(&chain, &src[-3], 3);
+
+            } else {
+                length += 1;
+                dst = njs_chb_reserve(&chain, 1);
+                if (dst != NULL) {
+                    *dst = cp;
+                    njs_chb_written(&chain, 1);
+                }
+            }
+
+            continue;
+        }
+
+        n = 1;
+
+        do {
+            n++;
+        } while (((cp << n) & 0x80));
+
+        if (njs_slow_path(n > 4)) {
+            goto uri_error;
+        }
+
+        encode[0] = cp;
+
+        for (i = 1; i < n; i++) {
+            cp = njs_string_decode_uri_cp(hex, &src, end, 1);
+            if (njs_slow_path(cp == 0xFFFFFFFF)) {
                 goto uri_error;
             }
 
-            d0 = hex[*src++];
-            if (d0 < 0) {
-                goto uri_error;
-            }
-
-            d1 = hex[*src++];
-            if (d1 < 0) {
-                goto uri_error;
-            }
-
-            byte = (d0 << 4) + d1;
-
-            if ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) == 0) {
-                n += 2;
-            }
+            encode[i] = cp;
+        }
+
+        p = encode;
+        cp = njs_utf8_decode(&p, p + n);
+        if (njs_slow_path(cp == 0xFFFFFFFF)) {
+            goto uri_error;
         }
-    }
-
-    if (n == 0) {
+
+        dst = njs_chb_reserve(&chain, 4);
+        if (dst != NULL) {
+            njs_utf8_encode(dst, cp);
+            njs_chb_written(&chain, njs_utf8_size(cp));
+        }
+
+        length += 1;
+    }
+
+    size = njs_chb_size(&chain);
+    if (njs_slow_path(size < 0)) {
+        njs_memory_error(vm);
+        return NJS_ERROR;
+    }
+
+    if (size == 0) {
         /* GC: retain src. */
         vm->retval = *value;
         return NJS_OK;
     }
 
-    n = string.length - n;
-
-    start = njs_string_alloc(vm, &vm->retval, n, n);
-    if (njs_slow_path(start == NULL)) {
+    dst = njs_string_alloc(vm, &vm->retval, size, length);
+    if (njs_slow_path(dst == NULL)) {
         return NJS_ERROR;
     }
 
-    utf8 = 0;
-    dst = start;
-    size = string.length;
-    src = string.start;
-
-    do {
-        byte = *src++;
-
-        if (byte == '%') {
-            size -= 2;
-
-            d0 = hex[*src++];
-            d1 = hex[*src++];
-            byte = (d0 << 4) + d1;
-
-            utf8 |= (byte >= 0x80);
-
-            if ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0) {
-                *dst++ = '%';
-                *dst++ = src[-2];
-                byte = src[-1];
-            }
-        }
-
-        *dst++ = byte;
-        size--;
-
-    } while (size != 0);
-
-    if (utf8) {
-        length = njs_utf8_length(start, n);
-
-        if (length < 0) {
-            length = 0;
-        }
-
-        if (vm->retval.short_string.size != NJS_STRING_LONG) {
-            vm->retval.short_string.length = length;
-
-        } else {
-            vm->retval.long_string.data->length = length;
-        }
-    }
+    njs_chb_join_to(&chain, dst);
+    njs_chb_destroy(&chain);
 
     return NJS_OK;
 
 uri_error:
 
-    njs_uri_error(vm, NULL);
+    njs_uri_error(vm, "malformed URI");
 
     return NJS_ERROR;
 }
diff -r 9b70f038abfa -r c60911765952 src/njs_string.h
--- a/src/njs_string.h	Tue Feb 18 18:42:37 2020 +0300
+++ b/src/njs_string.h	Tue Feb 18 18:55:50 2020 +0300
@@ -26,6 +26,12 @@
 /* The maximum signed int32_t. */
 #define NJS_STRING_MAX_LENGTH  0x7fffffff
 
+#define njs_surrogate_leading(cp)    ((cp) >= 0xd800 && (cp) <= 0xdbff)
+
+#define njs_surrogate_trailing(cp)   ((cp) >= 0xdc00 && (cp) <= 0xdfff)
+
+#define njs_surrogate_any(cp)        ((cp) >= 0xd800 && (cp) <= 0xdfff)
+
 /* Converting surrogate pair to code point.  */
 #define njs_string_surrogate_pair(high, low)                                  \
     (0x10000 + ((high - 0xd800) << 10) + (low - 0xdc00))
@@ -184,13 +190,9 @@ void njs_string_offset_map_init(const u_
 double njs_string_to_index(const njs_value_t *value);
 const char *njs_string_to_c_string(njs_vm_t *vm, njs_value_t *value);
 njs_int_t njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args,
-    njs_uint_t nargs, njs_index_t unused);
-njs_int_t njs_string_encode_uri_component(njs_vm_t *vm, njs_value_t *args,
-    njs_uint_t nargs, njs_index_t unused);
+    njs_uint_t nargs, njs_index_t component);
 njs_int_t njs_string_decode_uri(njs_vm_t *vm, njs_value_t *args,
-    njs_uint_t nargs, njs_index_t unused);
-njs_int_t njs_string_decode_uri_component(njs_vm_t *vm, njs_value_t *args,
-    njs_uint_t nargs, njs_index_t unused);
+    njs_uint_t nargs, njs_index_t component);
 
 njs_index_t njs_value_index(njs_vm_t *vm, const njs_value_t *src,
     njs_uint_t runtime);
diff -r 9b70f038abfa -r c60911765952 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c	Tue Feb 18 18:42:37 2020 +0300
+++ b/src/test/njs_unit_test.c	Tue Feb 18 18:55:50 2020 +0300
@@ -7979,12 +7979,20 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("encodeURI.length"),
       njs_str("1")},
 
-    { njs_str("encodeURI()"),
-      njs_str("undefined")},
-
     { njs_str("encodeURI('012абв')"),
       njs_str("012%D0%B0%D0%B1%D0%B2")},
 
+    { njs_str("["
+              " String.fromCharCode(0xD800),"
+              " String.fromCharCode(0xD800) + 'a',"
+              " String.fromCharCode(0xDC00),"
+              " String.fromCharCode(0xDC00) + 'a',"
+              "].every(v=>{try { encodeURI(v)} catch(e) {return e.name == 'URIError'}})"),
+      njs_str("true")},
+
+    { njs_str("encodeURI(String.fromCharCode(0xD800)+String.fromCharCode(0xDC00))"),
+      njs_str("%F0%90%80%80")},
+
     { njs_str("encodeURI('~}|{`_^]\\\\[@?>=<;:/.-,+*)(\\\'&%$#\"! ')"),
       njs_str("~%7D%7C%7B%60_%5E%5D%5C%5B@?%3E=%3C;:/.-,+*)('&%25$#%22!%20")},
 
@@ -8003,18 +8011,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("decodeURI.length"),
       njs_str("1")},
 
-    { njs_str("decodeURI()"),
-      njs_str("undefined")},
-
-    { njs_str("decodeURI('%QQ')"),
-      njs_str("URIError")},
-
-    { njs_str("decodeURI('%')"),
-      njs_str("URIError")},
-
-    { njs_str("decodeURI('%0')"),
-      njs_str("URIError")},
-
     { njs_str("decodeURI('%00')"),
       njs_str("\0")},
 
@@ -8039,8 +8035,32 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("decodeURI('%D0%B0%D0%B1%D0%B2').length"),
       njs_str("3")},
 
-    { njs_str("decodeURI('%80%81%82').length"),
-      njs_str("3")},
+    { njs_str("["
+              " '%',"
+              " '%0',"
+              " '%QQ',"
+              " '%C0%10',"
+              " '%DC%C7',"
+              " '%80%81%82',"
+              " '%EF%5C%A0',"
+              " '%EF%A0%5E',"
+              " '%E0%EF%A0',"
+              " '%E0%A0%EF',"
+              " '%FF%A2%95%BB',"
+              "].every(v=>{try { decodeURI(v)} catch(e) {return e.name == 'URIError'}})"),
+      njs_str("true")},
+
+    { njs_str("["
+              " 'abc',"
+              " 'αβγ',"
+              " '𝟘𝟙𝟚𝟛',"
+              " String.fromCodePoint(0x20000),"
+              "].every(v=>decodeURI(encodeURI(v)) === v)"),
+      njs_str("true")},
+
+    { njs_str("[encodeURI, encodeURIComponent, decodeURI, decodeURIComponent]"
+              ".every(v=>{var r = v(); return (typeof r === 'string') && r === 'undefined';})"),
+      njs_str("true")},
 
     /* Functions. */
 


More information about the nginx-devel mailing list