[njs] Fixed creation of long UTF8 strings.

Dmitry Volyntsev xeioex at nginx.com
Wed Feb 20 18:05:16 UTC 2019


details:   https://hg.nginx.org/njs/rev/46144bf0a378
branches:  
changeset: 789:46144bf0a378
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Wed Feb 20 18:25:01 2019 +0300
description:
Fixed creation of long UTF8 strings.

This closes #98 issue on Github.

diffstat:

 njs/njs_array.c          |   4 ++--
 njs/njs_extern.c         |   4 ++--
 njs/njs_json.c           |  35 +++++++++++++++++++++++++++--------
 njs/njs_regexp.c         |   2 +-
 njs/njs_string.c         |  18 +++++-------------
 njs/test/njs_unit_test.c |  19 +++++++++++++++++--
 6 files changed, 54 insertions(+), 28 deletions(-)

diffs (235 lines):

diff -r 82794813ce4c -r 46144bf0a378 njs/njs_array.c
--- a/njs/njs_array.c	Sat Feb 16 23:18:43 2019 +0800
+++ b/njs/njs_array.c	Wed Feb 20 18:25:01 2019 +0300
@@ -192,8 +192,8 @@ njs_array_string_add(njs_vm_t *vm, njs_a
     ret = njs_array_expand(vm, array, 0, 1);
 
     if (nxt_fast_path(ret == NXT_OK)) {
-        return njs_string_create(vm, &array->start[array->length++],
-                                 start, size, length);
+        return njs_string_new(vm, &array->start[array->length++], start, size,
+                              length);
     }
 
     return ret;
diff -r 82794813ce4c -r 46144bf0a378 njs/njs_extern.c
--- a/njs/njs_extern.c	Sat Feb 16 23:18:43 2019 +0800
+++ b/njs/njs_extern.c	Wed Feb 20 18:25:01 2019 +0300
@@ -291,8 +291,8 @@ njs_extern_keys_array(njs_vm_t *vm, cons
             break;
         }
 
-        ret = njs_string_create(vm, &keys->start[n++], ext->name.start,
-                                ext->name.length, 0);
+        ret = njs_string_new(vm, &keys->start[n++], ext->name.start,
+                             ext->name.length, 0);
 
         if (ret != NXT_OK) {
             return NULL;
diff -r 82794813ce4c -r 46144bf0a378 njs/njs_json.c
--- a/njs/njs_json.c	Sat Feb 16 23:18:43 2019 +0800
+++ b/njs/njs_json.c	Wed Feb 20 18:25:01 2019 +0300
@@ -628,6 +628,7 @@ njs_json_parse_string(njs_json_parse_ctx
 
     start = p + 1;
 
+    dst = NULL;
     state = 0;
     surplus = 0;
 
@@ -807,11 +808,15 @@ njs_json_parse_string(njs_json_parse_ctx
         length = 0;
     }
 
-    ret = njs_string_create(ctx->vm, value, (u_char *) start, size, length);
+    ret = njs_string_new(ctx->vm, value, (u_char *) start, size, length);
     if (nxt_slow_path(ret != NXT_OK)) {
         return NULL;
     }
 
+    if (dst != NULL) {
+        nxt_mp_free(ctx->pool, dst);
+    }
+
     return last + 1;
 }
 
@@ -1188,6 +1193,8 @@ static njs_ret_t
 njs_json_stringify_continuation(njs_vm_t *vm, njs_value_t *args,
     nxt_uint_t nargs, njs_index_t unused)
 {
+    u_char                *start;
+    size_t                size;
     ssize_t               length;
     nxt_int_t             i;
     njs_ret_t             ret;
@@ -1406,25 +1413,37 @@ done:
      */
     if (str.length <= nxt_length("{\n\n}")) {
         vm->retval = njs_value_void;
-        return NXT_OK;
+        goto release;
     }
 
     /* Stripping the wrapper's data. */
 
-    str.start += nxt_length("{\"\":");
-    str.length -= nxt_length("{\"\":}");
+    start = str.start;
+    size = str.length;
+
+    start += nxt_length("{\"\":");
+    size -= nxt_length("{\"\":}");
 
     if (stringify->space.length != 0) {
-        str.start += nxt_length("\n ");
-        str.length -= nxt_length("\n \n");
+        start += nxt_length("\n ");
+        size -= nxt_length("\n \n");
     }
 
-    length = nxt_utf8_length(str.start, str.length);
+    length = nxt_utf8_length(start, size);
     if (nxt_slow_path(length < 0)) {
         length = 0;
     }
 
-    return njs_string_create(vm, &vm->retval, str.start, str.length, length);
+    ret = njs_string_new(vm, &vm->retval, start, size, length);
+    if (nxt_slow_path(ret != NXT_OK)) {
+        goto memory_error;
+    }
+
+release:
+
+    nxt_mp_free(vm->mem_pool, str.start);
+
+    return NXT_OK;
 
 memory_error:
 
diff -r 82794813ce4c -r 46144bf0a378 njs/njs_regexp.c
--- a/njs/njs_regexp.c	Sat Feb 16 23:18:43 2019 +0800
+++ b/njs/njs_regexp.c	Wed Feb 20 18:25:01 2019 +0300
@@ -798,7 +798,7 @@ njs_regexp_string_create(njs_vm_t *vm, n
 {
     length = (length >= 0) ? length : 0;
 
-    return njs_string_create(vm, value, start, size, length);
+    return njs_string_new(vm, value, start, size, length);
 }
 
 
diff -r 82794813ce4c -r 46144bf0a378 njs/njs_string.c
--- a/njs/njs_string.c	Sat Feb 16 23:18:43 2019 +0800
+++ b/njs/njs_string.c	Wed Feb 20 18:25:01 2019 +0300
@@ -907,14 +907,6 @@ njs_string_prototype_from_utf8(njs_vm_t 
     length = nxt_utf8_length(string.start, slice.length);
 
     if (length >= 0) {
-
-        if (length < NJS_STRING_MAP_STRIDE || (size_t) length == slice.length) {
-            /* ASCII or short UTF-8 string. */
-            return njs_string_create(vm, &vm->retval, string.start,
-                                     slice.length, length);
-        }
-
-        /* Long UTF-8 string. */
         return njs_string_new(vm, &vm->retval, string.start, slice.length,
                               length);
     }
@@ -2699,8 +2691,8 @@ njs_string_match_multiple(njs_vm_t *vm, 
 
                 length = njs_string_length(utf8, start, size);
 
-                ret = njs_string_create(vm, &array->start[array->length],
-                                        start, size, length);
+                ret = njs_string_new(vm, &array->start[array->length],
+                                     start, size, length);
                 if (nxt_slow_path(ret != NXT_OK)) {
                     return ret;
                 }
@@ -3136,7 +3128,7 @@ njs_string_replace_regexp_function(njs_v
 
         length = njs_string_length(r->utf8, start, size);
 
-        ret = njs_string_create(vm, &arguments[i], start, size, length);
+        ret = njs_string_new(vm, &arguments[i], start, size, length);
         if (nxt_slow_path(ret != NXT_OK)) {
             return NXT_ERROR;
         }
@@ -3148,8 +3140,8 @@ njs_string_replace_regexp_function(njs_v
     /* The whole string being examined. */
     length = njs_string_length(r->utf8, r->part[0].start, r->part[0].size);
 
-    ret = njs_string_create(vm, &arguments[n + 2], r->part[0].start,
-                            r->part[0].size, length);
+    ret = njs_string_new(vm, &arguments[n + 2], r->part[0].start,
+                         r->part[0].size, length);
 
     if (nxt_slow_path(ret != NXT_OK)) {
         return NXT_ERROR;
diff -r 82794813ce4c -r 46144bf0a378 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Sat Feb 16 23:18:43 2019 +0800
+++ b/njs/test/njs_unit_test.c	Wed Feb 20 18:25:01 2019 +0300
@@ -4993,6 +4993,9 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("'$1,$2'.replace(/(\\$(\\d))/g, '$$1-$1$2')"),
       nxt_string("$1-$11,$1-$22") },
 
+    { nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"),
+      nxt_string("βα") },
+
     { nxt_string("'abcdefgh'.match()"),
       nxt_string("") },
 
@@ -5062,6 +5065,9 @@ static njs_unit_test_t  njs_test[] =
                  "a +' '+ a.length"),
       nxt_string("αα 2") },
 
+    { nxt_string("('β' + 'α'.repeat(33) +'β').match(/α+/g)[0][32]"),
+      nxt_string("α") },
+
     { nxt_string("var a = '\\u00CE\\u00B1'.toBytes().match(/α/g)[0] + 'α';"
                  "a +' '+ a.length"),
       nxt_string("αα 4") },
@@ -5090,6 +5096,9 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("'囲α碁α織'.split('α')"),
       nxt_string("囲,碁,織") },
 
+    { nxt_string("('α'+'β'.repeat(33)).repeat(2).split('α')[1][32]"),
+      nxt_string("β") },
+
     { nxt_string("'abc'.split('abc')"),
       nxt_string(",") },
 
@@ -6364,6 +6373,9 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("var r = new RegExp('abc', 'i'); r.test('00ABC11')"),
       nxt_string("true") },
 
+    { nxt_string("RegExp('α'.repeat(33)).toString()[32]"),
+      nxt_string("α") },
+
     { nxt_string("new RegExp('', 'x')"),
       nxt_string("SyntaxError: Invalid RegExp flags \"x\"") },
 
@@ -10154,8 +10166,8 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("JSON.parse('\"абвгдеёжзийкл\"').length"),
       nxt_string("13") },
 
-    { nxt_string("JSON.parse('\"абвгдеёжзийкл\"').length"),
-      nxt_string("13") },
+    { nxt_string("JSON.parse('[\"' + 'α'.repeat(33) + '\"]')[0][32]"),
+      nxt_string("α") },
 
     { nxt_string("JSON.parse('\"\\\\u03B1\"')"),
       nxt_string("α") },
@@ -10509,6 +10521,9 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("JSON.stringify('α𐐀z'.repeat(10)).length"),
       nxt_string("32") },
 
+    { nxt_string("JSON.stringify('α'.repeat(33))[32]"),
+      nxt_string("α") },
+
     { nxt_string("JSON.stringify('a\\nbc')"),
       nxt_string("\"a\\nbc\"") },
 


More information about the nginx-devel mailing list