[njs] Fixed unicode argument trimming in parseInt().

Dmitry Volyntsev xeioex at nginx.com
Thu Jun 23 06:40:56 UTC 2022


details:   https://hg.nginx.org/njs/rev/20ee5213e3c4
branches:  
changeset: 1898:20ee5213e3c4
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Wed Jun 22 23:37:27 2022 -0700
description:
Fixed unicode argument trimming in parseInt().

diffstat:

 src/njs_number.c         |   67 ++++++++-------------
 src/njs_string.c         |  144 ++++++++++++++++++++++------------------------
 src/njs_string.h         |    8 ++
 src/test/njs_unit_test.c |    6 +
 4 files changed, 110 insertions(+), 115 deletions(-)

diffs (355 lines):

diff -r 33b1637aad33 -r 20ee5213e3c4 src/njs_number.c
--- a/src/njs_number.c	Wed Jun 22 23:37:16 2022 -0700
+++ b/src/njs_number.c	Wed Jun 22 23:37:27 2022 -0700
@@ -1064,42 +1064,31 @@ njs_int_t
 njs_number_parse_int(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
     njs_index_t unused)
 {
-    double        num;
-    int32_t       radix;
-    njs_int_t     ret;
-    njs_str_t     string;
-    njs_bool_t    minus, test_prefix;
-    njs_value_t   *value;
-    const u_char  *p, *end;
+    double             num;
+    int32_t            radix;
+    njs_int_t          ret;
+    njs_bool_t         minus, test_prefix;
+    njs_value_t        *value;
+    const u_char       *p, *end;
+    njs_string_prop_t  string;
 
     num = NAN;
 
-    if (nargs < 2) {
+    value = njs_arg(args, nargs, 1);
+
+    ret = njs_value_to_string(vm, value, value);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return ret;
+    }
+
+    (void) njs_string_trim(value, &string, NJS_TRIM_START);
+
+    if (string.size == 0) {
         goto done;
     }
 
-    value = njs_argument(args, 1);
-
-    if (!njs_is_string(value)) {
-        ret = njs_value_to_string(vm, value, value);
-        if (njs_slow_path(ret != NJS_OK)) {
-            return ret;
-        }
-    }
-
-    njs_string_get(value, &string);
-
-    end = string.start + string.length;
-
-    for (p = string.start; p < end; p++) {
-        if (*p != ' ') {
-            goto found;
-        }
-    }
-
-    goto done;
-
-found:
+    p = string.start;
+    end = p + string.size;
 
     minus = 0;
 
@@ -1156,21 +1145,17 @@ njs_int_t
 njs_number_parse_float(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
     njs_index_t unused)
 {
-    double     num;
-    njs_int_t  ret;
-
-    num = NAN;
+    njs_int_t    ret;
+    njs_value_t  *value;
 
-    if (nargs > 1) {
-        ret = njs_value_to_string(vm, &args[1], &args[1]);
-        if (njs_slow_path(ret != NJS_OK)) {
-            return ret;
-        }
+    value = njs_arg(args, nargs, 1);
 
-        num = njs_string_to_number(&args[1], 1);
+    ret = njs_value_to_string(vm, value, value);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return ret;
     }
 
-    njs_set_number(&vm->retval, num);
+    njs_set_number(&vm->retval, njs_string_to_number(value, 1));
 
     return NJS_OK;
 }
diff -r 33b1637aad33 -r 20ee5213e3c4 src/njs_string.c
--- a/src/njs_string.c	Wed Jun 22 23:37:16 2022 -0700
+++ b/src/njs_string.c	Wed Jun 22 23:37:27 2022 -0700
@@ -8,10 +8,6 @@
 #include <njs_main.h>
 
 
-#define NJS_TRIM_START  1
-#define NJS_TRIM_END    2
-
-
 static u_char   njs_basis64[] = {
     77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
     77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
@@ -2761,36 +2757,27 @@ njs_string_prototype_to_upper_case(njs_v
 }
 
 
-static njs_int_t
-njs_string_prototype_trim(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
-    njs_index_t mode)
+uint32_t
+njs_string_trim(const njs_value_t *value, njs_string_prop_t *string,
+    unsigned mode)
 {
-    uint32_t              u, trim, length;
-    njs_int_t             ret;
-    njs_value_t           *value;
+    uint32_t              cp, trim;
     const u_char          *p, *prev, *start, *end;
-    njs_string_prop_t     string;
     njs_unicode_decode_t  ctx;
 
-    value = njs_argument(args, 0);
-    ret = njs_string_object_validate(vm, value);
-    if (njs_slow_path(ret != NJS_OK)) {
-        return ret;
-    }
-
     trim = 0;
 
-    njs_string_prop(&string, value);
-
-    start = string.start;
-    end = string.start + string.size;
-
-    if (njs_is_byte_or_ascii_string(&string)) {
+    njs_string_prop(string, value);
+
+    start = string->start;
+    end = string->start + string->size;
+
+    if (njs_is_byte_or_ascii_string(string)) {
 
         if (mode & NJS_TRIM_START) {
             for ( ;; ) {
                 if (start == end) {
-                    goto empty;
+                    break;
                 }
 
                 if (njs_is_whitespace(*start)) {
@@ -2806,7 +2793,7 @@ njs_string_prototype_trim(njs_vm_t *vm, 
         if (mode & NJS_TRIM_END) {
             for ( ;; ) {
                 if (start == end) {
-                    goto empty;
+                    break;
                 }
 
                 end--;
@@ -2829,13 +2816,13 @@ njs_string_prototype_trim(njs_vm_t *vm, 
 
             for ( ;; ) {
                 if (start == end) {
-                    goto empty;
+                    break;
                 }
 
                 p = start;
-                u = njs_utf8_decode(&ctx, &start, end);
-
-                if (njs_utf8_is_whitespace(u)) {
+                cp = njs_utf8_decode(&ctx, &start, end);
+
+                if (njs_utf8_is_whitespace(cp)) {
                     trim++;
                     continue;
                 }
@@ -2852,14 +2839,14 @@ njs_string_prototype_trim(njs_vm_t *vm, 
 
             for ( ;; ) {
                 if (start == prev) {
-                    goto empty;
+                    break;
                 }
 
                 prev = njs_utf8_prev(prev);
                 p = prev;
-                u = njs_utf8_decode(&ctx, &p, end);
-
-                if (njs_utf8_is_whitespace(u)) {
+                cp = njs_utf8_decode(&ctx, &p, end);
+
+                if (njs_utf8_is_whitespace(cp)) {
                     trim++;
                     continue;
                 }
@@ -2870,22 +2857,52 @@ njs_string_prototype_trim(njs_vm_t *vm, 
         }
     }
 
+    if (start == end) {
+        string->length = 0;
+        string->size = 0;
+        return trim;
+    }
+
+    string->start = (u_char *) start;
+    string->size = end - start;
+
+    if (string->length != 0) {
+        string->length -= trim;
+    }
+
+    return trim;
+}
+
+
+static njs_int_t
+njs_string_prototype_trim(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
+    njs_index_t mode)
+{
+    uint32_t           trim;
+    njs_int_t          ret;
+    njs_value_t        *value;
+    njs_string_prop_t  string;
+
+    value = njs_argument(args, 0);
+    ret = njs_string_object_validate(vm, value);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return ret;
+    }
+
+    trim = njs_string_trim(value, &string, mode);
+
     if (trim == 0) {
-        /* GC: retain. */
-        vm->retval = *value;
-
+        njs_value_assign(&vm->retval, value);
         return NJS_OK;
     }
 
-    length = (string.length != 0) ? string.length - trim : 0;
-
-    return njs_string_new(vm, &vm->retval, start, end - start, length);
-
-empty:
-
-    vm->retval = njs_string_empty;
-
-    return NJS_OK;
+    if (string.size == 0) {
+        njs_value_assign(&vm->retval, &njs_string_empty);
+        return NJS_OK;
+    }
+
+    return njs_string_new(vm, &vm->retval, string.start, string.size,
+                          string.length);
 }
 
 
@@ -3811,38 +3828,17 @@ njs_string_prototype_iterator_obj(njs_vm
 double
 njs_string_to_number(const njs_value_t *value, njs_bool_t parse_float)
 {
-    double                num;
-    size_t                size;
-    uint32_t              u;
-    njs_bool_t            minus;
-    const u_char          *p, *start, *end;
-    njs_unicode_decode_t  ctx;
+    double             num;
+    njs_bool_t         minus;
+    const u_char       *p, *start, *end;
+    njs_string_prop_t  string;
 
     const size_t  infinity = njs_length("Infinity");
 
-    size = value->short_string.size;
-
-    if (size != NJS_STRING_LONG) {
-        p = value->short_string.start;
-
-    } else {
-        size = value->long_string.size;
-        p = value->long_string.data->start;
-    }
-
-    end = p + size;
-
-    njs_utf8_decode_init(&ctx);
-
-    while (p < end) {
-        start = p;
-        u = njs_utf8_decode(&ctx, &p, end);
-
-        if (!njs_utf8_is_whitespace(u)) {
-            p = start;
-            break;
-        }
-    }
+    (void) njs_string_trim(value, &string, NJS_TRIM_START);
+
+    p = string.start;
+    end = p + string.size;
 
     if (p == end) {
         return parse_float ? NAN : 0.0;
diff -r 33b1637aad33 -r 20ee5213e3c4 src/njs_string.h
--- a/src/njs_string.h	Wed Jun 22 23:37:16 2022 -0700
+++ b/src/njs_string.h	Wed Jun 22 23:37:27 2022 -0700
@@ -98,6 +98,12 @@ typedef enum {
 } njs_utf8_t;
 
 
+typedef enum {
+    NJS_TRIM_START = 1,
+    NJS_TRIM_END = 2,
+} njs_trim_t;
+
+
 njs_inline njs_bool_t
 njs_is_byte_string(njs_string_prop_t *string)
 {
@@ -224,6 +230,8 @@ njs_int_t njs_string_decode_base64(njs_v
 njs_int_t njs_string_decode_base64url(njs_vm_t *vm, njs_value_t *value,
     const njs_str_t *src);
 void njs_string_truncate(njs_value_t *value, uint32_t size, uint32_t length);
+uint32_t njs_string_trim(const njs_value_t *value, njs_string_prop_t *string,
+    unsigned mode);
 void njs_string_copy(njs_value_t *dst, njs_value_t *src);
 njs_int_t njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string,
     njs_value_t *value);
diff -r 33b1637aad33 -r 20ee5213e3c4 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c	Wed Jun 22 23:37:16 2022 -0700
+++ b/src/test/njs_unit_test.c	Wed Jun 22 23:37:27 2022 -0700
@@ -16744,6 +16744,12 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("parseInt(' 123')"),
       njs_str("123") },
 
+    { njs_str("parseInt('\\u0009123')"),
+      njs_str("123") },
+
+    { njs_str("parseInt('\\u200A123')"),
+      njs_str("123") },
+
     { njs_str("parseInt('1010', 2)"),
       njs_str("10") },
 



More information about the nginx-devel mailing list