[njs] Removed code for byte strings in built-in functions.

Dmitry Volyntsev xeioex at nginx.com
Wed May 22 16:40:55 UTC 2024


details:   https://hg.nginx.org/njs/rev/2d098d2a1c85
branches:  
changeset: 2333:2d098d2a1c85
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Tue May 21 23:41:10 2024 -0700
description:
Removed code for byte strings in built-in functions.

diffstat:

 src/njs_array.c          |  52 ++++++++------------------------------
 src/njs_iterator.c       |   2 +-
 src/njs_json.c           |   9 +-----
 src/njs_object.c         |   4 +-
 src/njs_regexp.c         |  16 +++--------
 src/njs_string.c         |  65 +++++++++++++++++------------------------------
 src/njs_string.h         |  20 ++-----------
 src/njs_vmcode.c         |   9 ------
 src/test/njs_unit_test.c |  30 +---------------------
 9 files changed, 49 insertions(+), 158 deletions(-)

diffs (596 lines):

diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_array.c
--- a/src/njs_array.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_array.c	Tue May 21 23:41:10 2024 -0700
@@ -826,29 +826,15 @@ njs_array_prototype_slice_copy(njs_vm_t 
             src = string.start;
             end = src + string.size;
 
-            if (string.length == 0) {
-                /* Byte string. */
-                do {
-                    value = &array->start[n++];
-                    dst = njs_string_short_start(value);
-                    *dst = *src++;
-                    njs_string_short_set(value, 1, 0);
-
-                    length--;
-                } while (length != 0);
-
-            } else {
-                /* UTF-8 or ASCII string. */
-                do {
-                    value = &array->start[n++];
-                    dst = njs_string_short_start(value);
-                    dst = njs_utf8_copy(dst, &src, end);
-                    size = dst - njs_string_short_start(value);
-                    njs_string_short_set(value, size, 1);
-
-                    length--;
-                } while (length != 0);
-            }
+            do {
+                value = &array->start[n++];
+                dst = njs_string_short_start(value);
+                dst = njs_utf8_copy(dst, &src, end);
+                size = dst - njs_string_short_start(value);
+                njs_string_short_set(value, size, 1);
+
+                length--;
+            } while (length != 0);
 
         } else if (njs_is_object(this)) {
 
@@ -1647,11 +1633,10 @@ static njs_int_t
 njs_array_prototype_join(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
     njs_index_t unused, njs_value_t *retval)
 {
-    u_char             *p, *last;
+    u_char             *p;
     int64_t            i, size, len, length;
     njs_int_t          ret;
     njs_chb_t          chain;
-    njs_utf8_t         utf8;
     njs_value_t        *value, *this, entry;
     njs_string_prop_t  separator, string;
 
@@ -1684,7 +1669,6 @@ njs_array_prototype_join(njs_vm_t *vm, n
     }
 
     length = 0;
-    utf8 = njs_is_byte_string(&separator) ? NJS_STRING_BYTE : NJS_STRING_UTF8;
 
     ret = njs_object_length(vm, this, &len);
     if (njs_slow_path(ret == NJS_ERROR)) {
@@ -1708,29 +1692,15 @@ njs_array_prototype_join(njs_vm_t *vm, n
 
         if (!njs_is_null_or_undefined(value)) {
             if (!njs_is_string(value)) {
-                last = njs_chb_current(&chain);
-
                 ret = njs_value_to_chain(vm, &chain, value);
                 if (njs_slow_path(ret < NJS_OK)) {
                     return ret;
                 }
 
-                if (last != njs_chb_current(&chain) && ret == 0) {
-                    /*
-                     * Appended values was a byte string.
-                     */
-                    utf8 = NJS_STRING_BYTE;
-                }
-
                 length += ret;
 
             } else {
                 (void) njs_string_prop(&string, value);
-
-                if (njs_is_byte_string(&string)) {
-                    utf8 = NJS_STRING_BYTE;
-                }
-
                 length += string.length;
                 njs_chb_append(&chain, string.start, string.size);
             }
@@ -1755,7 +1725,7 @@ njs_array_prototype_join(njs_vm_t *vm, n
 
     length -= separator.length;
 
-    p = njs_string_alloc(vm, retval, size, utf8 ? length : 0);
+    p = njs_string_alloc(vm, retval, size, length);
     if (njs_slow_path(p == NULL)) {
         return NJS_ERROR;
     }
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_iterator.c
--- a/src/njs_iterator.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_iterator.c	Tue May 21 23:41:10 2024 -0700
@@ -355,7 +355,7 @@ njs_object_iterate(njs_vm_t *vm, njs_ite
         end = p + string_prop.size;
 
         if ((size_t) length == string_prop.size) {
-            /* Byte or ASCII string. */
+            /* ASCII string. */
 
             for (i = from; i < to; i++) {
                 /* This cannot fail. */
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_json.c
--- a/src/njs_json.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_json.c	Tue May 21 23:41:10 2024 -0700
@@ -226,13 +226,6 @@ njs_json_stringify(njs_vm_t *vm, njs_val
     switch (space->type) {
     case NJS_STRING:
         length = njs_string_prop(&prop, space);
-
-        if (njs_is_byte_string(&prop)) {
-            njs_internal_error(vm, "space argument cannot be"
-                               " a byte string");
-            return NJS_ERROR;
-        }
-
         p = njs_string_offset(&prop, njs_min(length, 10));
 
         stringify->space.start = prop.start;
@@ -1552,7 +1545,7 @@ njs_json_append_string(njs_chb_t *chain,
             dst = njs_utf8_copy(dst, &p, end);
 
         } else {
-            /* Byte or ASCII string. */
+            /* ASCII string. */
             *dst++ = *p++;
         }
 
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_object.c
--- a/src/njs_object.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_object.c	Tue May 21 23:41:10 2024 -0700
@@ -741,7 +741,7 @@ njs_object_enumerate_string(njs_vm_t *vm
 
     case NJS_ENUM_VALUES:
         if (str_prop.size == (size_t) len) {
-            /* Byte or ASCII string. */
+            /* ASCII string. */
 
             for (i = 0; i < len; i++) {
                 begin = njs_string_short_start(item);
@@ -774,7 +774,7 @@ njs_object_enumerate_string(njs_vm_t *vm
 
     case NJS_ENUM_BOTH:
         if (str_prop.size == (size_t) len) {
-            /* Byte or ASCII string. */
+            /* ASCII string. */
 
             for (i = 0; i < len; i++) {
 
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_regexp.c
--- a/src/njs_regexp.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_regexp.c	Tue May 21 23:41:10 2024 -0700
@@ -748,12 +748,6 @@ njs_regexp_prototype_to_string(njs_vm_t 
     size = source_string.size + flags_string.size + njs_length("//");
     length = source_string.length + flags_string.length + njs_length("//");
 
-    if (njs_is_byte_string(&source_string)
-        || njs_is_byte_string(&flags_string))
-    {
-        length = 0;
-    }
-
     p = njs_string_alloc(vm, retval, size, length);
     if (njs_slow_path(p == NULL)) {
         return NJS_ERROR;
@@ -908,13 +902,13 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
         goto not_found;
     }
 
-    utf8 = NJS_STRING_BYTE;
+    utf8 = NJS_STRING_ASCII;
     type = NJS_REGEXP_BYTE;
 
     if (string.length != 0) {
         type = NJS_REGEXP_UTF8;
 
-        if (string.length != string.size) {
+        if (!njs_is_ascii_string(&string)) {
             utf8 = NJS_STRING_UTF8;
         }
     }
@@ -1693,7 +1687,7 @@ njs_regexp_prototype_symbol_split(njs_vm
     arguments[0] = *rx;
 
     if (!sticky) {
-        length = njs_is_byte_string(&s) ? 0 : s.length + 1;
+        length = s.length + 1;
 
         dst = njs_string_alloc(vm, &arguments[1], s.size + 1, length);
         if (njs_slow_path(dst == NULL)) {
@@ -1749,9 +1743,9 @@ njs_regexp_prototype_symbol_split(njs_vm
         goto single;
     }
 
-    utf8 = NJS_STRING_BYTE;
+    utf8 = NJS_STRING_ASCII;
 
-    if (!njs_is_byte_or_ascii_string(&s)) {
+    if (!njs_is_ascii_string(&s)) {
         utf8 = NJS_STRING_UTF8;
     }
 
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_string.c
--- a/src/njs_string.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_string.c	Tue May 21 23:41:10 2024 -0700
@@ -186,6 +186,8 @@ njs_string_new(njs_vm_t *vm, njs_value_t
 {
     u_char  *p;
 
+    njs_assert((size == 0 && length == 0) || (size != 0 && length != 0));
+
     p = njs_string_alloc(vm, value, size, length);
 
     if (njs_fast_path(p != NULL)) {
@@ -880,10 +882,6 @@ njs_string_prototype_concat(njs_vm_t *vm
 
         size += string.size;
         length += string.length;
-
-        if (njs_is_byte_string(&string)) {
-            mask = 0;
-        }
     }
 
     length &= mask;
@@ -1238,15 +1236,10 @@ njs_string_slice_string_prop(njs_string_
     start = string->start;
 
     if (string->size == slice->string_length) {
-        /* Byte or ASCII string. */
+        /* ASCII string. */
         start += slice->start;
         size = slice->length;
 
-        if (string->length == 0) {
-            /* Byte string. */
-            length = 0;
-        }
-
     } else {
         /* UTF-8 string. */
         end = start + string->size;
@@ -1327,7 +1320,7 @@ njs_string_prototype_char_code_at(njs_vm
     }
 
     if (length == string.size) {
-        /* Byte or ASCII string. */
+        /* ASCII string. */
         code = string.start[index];
 
     } else {
@@ -1715,20 +1708,20 @@ njs_string_index_of(njs_string_prop_t *s
     size_t        index, length, search_length;
     const u_char  *p, *end;
 
-    length = (string->length == 0) ? string->size : string->length;
-
-    if (njs_slow_path(search->size == 0)) {
+    length = string->length;
+
+    if (njs_slow_path(search->length == 0)) {
         return (from < length) ? from : length;
     }
 
     index = from;
-    search_length = (search->length == 0) ? search->size : search->length;
+    search_length = search->length;
 
     if (length - index >= search_length) {
         end = string->start + string->size;
 
         if (string->size == length) {
-            /* Byte or ASCII string. */
+            /* ASCII string. */
 
             end -= (search->size - 1);
 
@@ -1863,7 +1856,7 @@ njs_string_prototype_last_index_of(njs_v
     end = string.start + string.size;
 
     if (string.size == (size_t) length) {
-        /* Byte or ASCII string. */
+        /* ASCII string. */
 
         p = &string.start[index];
 
@@ -2209,7 +2202,7 @@ njs_string_prototype_to_lower_case(njs_v
 
     (void) njs_string_prop(&string, njs_argument(args, 0));
 
-    if (njs_is_byte_or_ascii_string(&string)) {
+    if (njs_is_ascii_string(&string)) {
 
         p = njs_string_alloc(vm, retval, string.size, string.length);
         if (njs_slow_path(p == NULL)) {
@@ -2280,7 +2273,7 @@ njs_string_prototype_to_upper_case(njs_v
 
     (void) njs_string_prop(&string, njs_argument(args, 0));
 
-    if (njs_is_byte_or_ascii_string(&string)) {
+    if (njs_is_ascii_string(&string)) {
 
         p = njs_string_alloc(vm, retval, string.size, string.length);
         if (njs_slow_path(p == NULL)) {
@@ -2343,7 +2336,7 @@ njs_string_trim(const njs_value_t *value
     start = string->start;
     end = string->start + string->size;
 
-    if (njs_is_byte_or_ascii_string(string)) {
+    if (njs_is_ascii_string(string)) {
 
         if (mode & NJS_TRIM_START) {
             for ( ;; ) {
@@ -2831,14 +2824,13 @@ njs_string_match_multiple(njs_vm_t *vm, 
 
     (void) njs_string_prop(&string, &args[0]);
 
-    utf8 = NJS_STRING_BYTE;
+    utf8 = NJS_STRING_ASCII;
     type = NJS_REGEXP_BYTE;
 
     if (string.length != 0) {
-        utf8 = NJS_STRING_ASCII;
         type = NJS_REGEXP_UTF8;
 
-        if (string.length != string.size) {
+        if (!njs_is_ascii_string(&string)) {
             utf8 = NJS_STRING_UTF8;
         }
     }
@@ -2877,7 +2869,7 @@ njs_string_match_multiple(njs_vm_t *vm, 
 
             if (c1 == 0) {
                 if (start < end) {
-                    p = (utf8 != NJS_STRING_BYTE) ? njs_utf8_next(start, end)
+                    p = (utf8 == NJS_STRING_UTF8) ? njs_utf8_next(start, end)
                                                   : start + 1;
                     string.size = end - p;
 
@@ -3006,12 +2998,10 @@ njs_string_prototype_split(njs_vm_t *vm,
         goto done;
     }
 
-    utf8 = NJS_STRING_BYTE;
+    utf8 = NJS_STRING_ASCII;
 
     if (string.length != 0) {
-        utf8 = NJS_STRING_ASCII;
-
-        if (string.length != string.size) {
+        if (!njs_is_ascii_string(&string)) {
             utf8 = NJS_STRING_UTF8;
         }
     }
@@ -3037,7 +3027,7 @@ found:
         /* Empty split string. */
 
         if (p == next) {
-            p = (utf8 != NJS_STRING_BYTE) ? njs_utf8_next(p, end)
+            p = (utf8 == NJS_STRING_UTF8) ? njs_utf8_next(p, end)
                                           : p + 1;
             next = p;
         }
@@ -3241,7 +3231,7 @@ njs_string_prototype_replace(njs_vm_t *v
     njs_int_t          ret;
     njs_str_t          str;
     njs_chb_t          chain;
-    njs_bool_t         is_byte_or_ascii_string;
+    njs_bool_t         is_ascii_string;
     njs_value_t        *this, *search, *replace;
     njs_value_t        search_lvalue, replace_lvalue, replacer, value,
                        arguments[3];
@@ -3372,13 +3362,6 @@ njs_string_prototype_replace(njs_vm_t *v
         size = string.size + ret_string.size - s.size;
         length = string.length + ret_string.length - s.length;
 
-        if (njs_is_byte_string(&string)
-            || njs_is_byte_string(&s)
-            || njs_is_byte_string(&ret_string))
-        {
-            length = 0;
-        }
-
         r = njs_string_alloc(vm, retval, size, length);
         if (njs_slow_path(r == NULL)) {
             return NJS_ERROR;
@@ -3395,7 +3378,7 @@ njs_string_prototype_replace(njs_vm_t *v
 
     p_start = string.start;
     increment = s.length != 0 ? s.length : 1;
-    is_byte_or_ascii_string = njs_is_byte_or_ascii_string(&string);
+    is_ascii_string = njs_is_ascii_string(&string);
 
     do {
         if (func_replace == NULL) {
@@ -3422,7 +3405,7 @@ njs_string_prototype_replace(njs_vm_t *v
             }
         }
 
-        if (is_byte_or_ascii_string) {
+        if (is_ascii_string) {
             p = string.start + pos;
 
         } else {
@@ -3815,7 +3798,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_
     src = string.start;
     end = src + string.size;
 
-    if (njs_is_byte_or_ascii_string(&string)) {
+    if (njs_is_ascii_string(&string)) {
 
         while (src < end) {
             byte = *src++;
@@ -3871,7 +3854,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_
 
     src = string.start;
 
-    if (njs_is_byte_or_ascii_string(&string)) {
+    if (njs_is_ascii_string(&string)) {
         (void) njs_string_encode(escape, string.size, src, dst);
         return NJS_OK;
     }
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_string.h
--- a/src/njs_string.h	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_string.h	Tue May 21 23:41:10 2024 -0700
@@ -91,8 +91,7 @@ typedef struct {
 
 
 typedef enum {
-    NJS_STRING_BYTE = 0,
-    NJS_STRING_ASCII,
+    NJS_STRING_ASCII = 0,
     NJS_STRING_UTF8,
 } njs_utf8_t;
 
@@ -179,16 +178,9 @@ njs_int_t njs_string_get_substitution(nj
 
 
 njs_inline njs_bool_t
-njs_is_byte_string(njs_string_prop_t *string)
+njs_is_ascii_string(njs_string_prop_t *string)
 {
-    return (string->length == 0 && string->size != 0);
-}
-
-
-njs_inline njs_bool_t
-njs_is_byte_or_ascii_string(njs_string_prop_t *string)
-{
-    return (string->length == 0 || string->length == string->size);
+    return string->length == string->size;
 }
 
 
@@ -198,10 +190,6 @@ njs_string_calc_length(njs_utf8_t utf8, 
     ssize_t  length;
 
     switch (utf8) {
-
-    case NJS_STRING_BYTE:
-        return 0;
-
     case NJS_STRING_ASCII:
         return size;
 
@@ -251,7 +239,7 @@ njs_string_encode(const uint32_t *escape
 njs_inline const u_char *
 njs_string_offset(njs_string_prop_t *string, int64_t index)
 {
-    if (njs_is_byte_or_ascii_string(string)) {
+    if (njs_is_ascii_string(string)) {
         return string->start + index;
     }
 
diff -r 27da19960b72 -r 2d098d2a1c85 src/njs_vmcode.c
--- a/src/njs_vmcode.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/njs_vmcode.c	Tue May 21 23:41:10 2024 -0700
@@ -2318,15 +2318,6 @@ njs_string_concat(njs_vm_t *vm, njs_valu
     (void) njs_string_prop(&string2, val2);
 
     length = string1.length + string2.length;
-
-    /*
-     * A result of concatenation of Byte and ASCII or UTF-8 strings
-     * is a Byte string.
-     */
-    if (njs_is_byte_string(&string1) || njs_is_byte_string(&string2)) {
-        length = 0;
-    }
-
     size = string1.size + string2.size;
 
     start = njs_string_alloc(vm, retval, size, length);
diff -r 27da19960b72 -r 2d098d2a1c85 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c	Tue May 21 23:38:19 2024 -0700
+++ b/src/test/njs_unit_test.c	Tue May 21 23:41:10 2024 -0700
@@ -4372,10 +4372,9 @@ static njs_unit_test_t  njs_test[] =
 
     { njs_str("["
               "  'α'.repeat(33),"
-              "  $262.byteString(Array(16).fill(0x9d)),"
               "]"
               ".map(v=>{var out = ['β', 'γ'].join(v); return out.length})"),
-      njs_str("35,20") },
+      njs_str("35") },
 
     { njs_str("["
               "  [],"
@@ -4393,9 +4392,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("var a = ['β','γ']; a.join('').length"),
       njs_str("2") },
 
-    { njs_str("var a = ['β', $262.byteString([0x9d]),'γ']; a.join('').length"),
-      njs_str("5") },
-
     { njs_str("var a = []; a[5] = 5; a.join()"),
       njs_str(",,,,,5") },
 
@@ -4764,9 +4760,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("Array.prototype.slice.call('αβZγ')"),
       njs_str("α,β,Z,γ") },
 
-    { njs_str("Array.prototype.slice.call($262.byteString(Array(16).fill(0x9d)))[0].charCodeAt(0)"),
-      njs_str("157") },
-
     { njs_str("Array.prototype.slice.call('αβZγ', 1)"),
       njs_str("β,Z,γ") },
 
@@ -9941,10 +9934,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("'abc'.padEnd(10, Symbol())"),
       njs_str("TypeError: Cannot convert a Symbol value to a string") },
 
-    { njs_str("[undefined, null, Symbol()]"
-              ".every(v=> { try {$262.byteString(v);} catch(e) {return e.name == 'TypeError'} })"),
-      njs_str("true") },
-
     { njs_str("encodeURI.name"),
       njs_str("encodeURI")},
 
@@ -11796,10 +11785,6 @@ static njs_unit_test_t  njs_test[] =
       njs_str("3 БВ бв 2 /бв/gi") },
 #endif
 
-    { njs_str("var r = /_/g; var index = r.exec($262.byteString([255,149,15,97,95])).index;"
-              "[index, r.lastIndex]"),
-      njs_str("4,5") },
-
     { njs_str("var descs = Object.getOwnPropertyDescriptors(RegExp('a'));"
               "Object.keys(descs)"),
       njs_str("lastIndex") },
@@ -12103,13 +12088,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("Error('e').name + ': ' + Error('e').message"),
       njs_str("Error: e") },
 
-    { njs_str("Error($262.byteString(Array(1).fill(0x9d))).toString().length"),
-      njs_str("8") },
-
-    { njs_str("var e = Error('α'); e.name = $262.byteString(Array(1).fill(0x9d)); "
-              "e.toString().length"),
-      njs_str("5") },
-
     { njs_str("Error(1)"),
       njs_str("Error: 1") },
 
@@ -18565,9 +18543,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("JSON.stringify('абв'.repeat(100)).length"),
       njs_str("302") },
 
-    { njs_str("JSON.stringify($262.byteString([0xCE, 0xB1, 0xC2, 0xB6]))"),
-      njs_str("\"α¶\"") },
-
     /* Optional arguments. */
 
     { njs_str("JSON.stringify(undefined, undefined, 1)"),
@@ -18601,9 +18576,6 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("JSON.stringify([1], null, '!!βββββββββββββββββ').length"),
       njs_str("15") },
 
-    { njs_str("JSON.stringify([1], null, $262.byteString([0x9d])).length"),
-      njs_str("InternalError: space argument cannot be a byte string") },
-
     { njs_str("JSON.stringify([1], null, 11)"),
       njs_str("[\n          1\n]") },
 


More information about the nginx-devel mailing list