[njs] Introduced njs_string_offset() which support any string.

Dmitry Volyntsev xeioex at nginx.com
Wed Mar 8 05:09:22 UTC 2023


details:   https://hg.nginx.org/njs/rev/8170f061bbde
branches:  
changeset: 2066:8170f061bbde
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Tue Mar 07 20:35:00 2023 -0800
description:
Introduced njs_string_offset() which support any string.

diffstat:

 src/njs_iterator.c |   2 +-
 src/njs_json.c     |   7 +-----
 src/njs_parser.c   |   5 ++-
 src/njs_regexp.c   |  16 ++++++++------
 src/njs_string.c   |  57 +++++++++++++++--------------------------------------
 src/njs_string.h   |  18 +++++++++++++++-
 6 files changed, 46 insertions(+), 59 deletions(-)

diffs (297 lines):

diff -r e3a609ff9001 -r 8170f061bbde src/njs_iterator.c
--- a/src/njs_iterator.c	Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_iterator.c	Tue Mar 07 20:35:00 2023 -0800
@@ -558,7 +558,7 @@ njs_object_iterate_reverse(njs_vm_t *vm,
             i = from + 1;
 
             if (i > to) {
-                p = njs_string_offset(string_prop.start, end, from);
+                p = njs_string_utf8_offset(string_prop.start, end, from);
                 p = njs_utf8_next(p, end);
             }
 
diff -r e3a609ff9001 -r 8170f061bbde src/njs_json.c
--- a/src/njs_json.c	Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_json.c	Tue Mar 07 20:35:00 2023 -0800
@@ -233,12 +233,7 @@ njs_json_stringify(njs_vm_t *vm, njs_val
             return NJS_ERROR;
         }
 
-        if (length > 10) {
-            p = njs_string_offset(prop.start, prop.start + prop.size, 10);
-
-        } else {
-            p = prop.start + prop.size;
-        }
+        p = njs_string_offset(&prop, njs_min(length, 10));
 
         stringify->space.start = prop.start;
         stringify->space.length = p - prop.start;
diff -r e3a609ff9001 -r 8170f061bbde src/njs_parser.c
--- a/src/njs_parser.c	Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_parser.c	Tue Mar 07 20:35:00 2023 -0800
@@ -8589,7 +8589,8 @@ njs_parser_string_create(njs_vm_t *vm, n
     njs_decode_utf8(&dst, &token->text);
 
     if (length > NJS_STRING_MAP_STRIDE && dst.length != length) {
-        njs_string_offset_map_init(value->long_string.data->start, dst.length);
+        njs_string_utf8_offset_map_init(value->long_string.data->start,
+                                        dst.length);
     }
 
     return NJS_OK;
@@ -8833,7 +8834,7 @@ next_char:
     }
 
     if (length > NJS_STRING_MAP_STRIDE && length != size) {
-        njs_string_offset_map_init(start, size);
+        njs_string_utf8_offset_map_init(start, size);
     }
 
     return NJS_TOKEN_STRING;
diff -r e3a609ff9001 -r 8170f061bbde src/njs_regexp.c
--- a/src/njs_regexp.c	Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_regexp.c	Tue Mar 07 20:35:00 2023 -0800
@@ -891,9 +891,9 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
         offset = last_index;
 
     } else {
-        /* UTF-8 string. */
-        offset = njs_string_offset(string.start, string.start + string.size,
-                                   last_index) - string.start;
+        offset = njs_string_utf8_offset(string.start,
+                                        string.start + string.size, last_index)
+                 - string.start;
     }
 
     ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset,
@@ -1360,7 +1360,8 @@ njs_regexp_prototype_symbol_replace(njs_
 
         if ((size_t) length != s.size) {
             /* UTF-8 string. */
-            pos = njs_string_offset(s.start, s.start + s.size, pos) - s.start;
+            pos = njs_string_utf8_offset(s.start, s.start + s.size, pos)
+                  - s.start;
         }
 
         pos = njs_max(njs_min(pos, (int64_t) s.size), 0);
@@ -1643,8 +1644,8 @@ njs_regexp_prototype_symbol_split(njs_vm
         }
 
         if (utf8 == NJS_STRING_UTF8) {
-            start = njs_string_offset(s.start, s.start + s.size, p);
-            end = njs_string_offset(s.start, s.start + s.size, q);
+            start = njs_string_utf8_offset(s.start, s.start + s.size, p);
+            end = njs_string_utf8_offset(s.start, s.start + s.size, q);
 
         } else {
             start = &s.start[p];
@@ -1691,7 +1692,8 @@ njs_regexp_prototype_symbol_split(njs_vm
     end = &s.start[s.size];
 
     if (utf8 == NJS_STRING_UTF8) {
-        start = (p < length) ? njs_string_offset(s.start, s.start + s.size, p)
+        start = (p < length) ? njs_string_utf8_offset(s.start, s.start + s.size,
+                                                      p)
                              : end;
 
     } else {
diff -r e3a609ff9001 -r 8170f061bbde src/njs_string.c
--- a/src/njs_string.c	Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_string.c	Tue Mar 07 20:35:00 2023 -0800
@@ -1146,7 +1146,7 @@ njs_string_prototype_to_bytes(njs_vm_t *
             /* UTF-8 string. */
             end = string.start + string.size;
 
-            s = njs_string_offset(string.start, end, slice.start);
+            s = njs_string_utf8_offset(string.start, end, slice.start);
 
             length = slice.length;
 
@@ -1503,7 +1503,7 @@ njs_string_slice_string_prop(njs_string_
         end = start + string->size;
 
         if (slice->start < slice->string_length) {
-            start = njs_string_offset(start, end, slice->start);
+            start = njs_string_utf8_offset(start, end, slice->start);
 
             /* Evaluate size of the slice in bytes and adjust length. */
             p = start;
@@ -1584,9 +1584,8 @@ njs_string_prototype_char_code_at(njs_vm
     } else {
         njs_utf8_decode_init(&ctx);
 
-        /* UTF-8 string. */
         end = string.start + string.size;
-        start = njs_string_offset(string.start, end, index);
+        start = njs_string_utf8_offset(string.start, end, index);
         code = njs_utf8_decode(&ctx, &start, end);
     }
 
@@ -2151,7 +2150,7 @@ njs_string_index_of(njs_string_prop_t *s
         } else {
             /* UTF-8 string. */
 
-            p = njs_string_offset(string->start, end, index);
+            p = njs_string_utf8_offset(string->start, end, index);
             end -= search->size - 1;
 
             while (p < end) {
@@ -2296,7 +2295,7 @@ njs_string_prototype_last_index_of(njs_v
             goto done;
         }
 
-        p = njs_string_offset(string.start, end, index);
+        p = njs_string_utf8_offset(string.start, end, index);
 
         for (; p >= string.start; p = njs_utf8_prev(p)) {
             if ((p + s.size) <= end && memcmp(p, s.start, s.size) == 0) {
@@ -2376,15 +2375,7 @@ njs_string_prototype_includes(njs_vm_t *
 
         if (length - index >= search_length) {
             end = string.start + string.size;
-
-            if (string.size == (size_t) length) {
-                /* Byte or ASCII string. */
-                p = string.start + index;
-
-            } else {
-                /* UTF-8 string. */
-                p = njs_string_offset(string.start, end, index);
-            }
+            p = njs_string_offset(&string, index);
 
             end -= search.size - 1;
 
@@ -2482,15 +2473,7 @@ njs_string_prototype_starts_or_ends_with
         }
 
         end = string.start + string.size;
-
-        if (string.size == (size_t) length) {
-            /* Byte or ASCII string. */
-            p = string.start + index;
-
-        } else {
-            /* UTF-8 string. */
-            p = njs_string_offset(string.start, end, index);
-        }
+        p = njs_string_offset(&string, index);
 
         if ((size_t) (end - p) >= search.size
             && memcmp(p, search.start, search.size) == 0)
@@ -2512,11 +2495,11 @@ done:
 
 
 /*
- * njs_string_offset() assumes that index is correct.
+ * njs_string_utf8_offset() assumes that index is correct.
  */
 
 const u_char *
-njs_string_offset(const u_char *start, const u_char *end, size_t index)
+njs_string_utf8_offset(const u_char *start, const u_char *end, size_t index)
 {
     uint32_t    *map;
     njs_uint_t  skip;
@@ -2525,7 +2508,7 @@ njs_string_offset(const u_char *start, c
         map = njs_string_map_start(end);
 
         if (map[0] == 0) {
-            njs_string_offset_map_init(start, end - start);
+            njs_string_utf8_offset_map_init(start, end - start);
         }
 
         start += map[index / NJS_STRING_MAP_STRIDE - 1];
@@ -2562,7 +2545,7 @@ njs_string_index(njs_string_prop_t *stri
         map = njs_string_map_start(end);
 
         if (map[0] == 0) {
-            njs_string_offset_map_init(string->start, string->size);
+            njs_string_utf8_offset_map_init(string->start, string->size);
         }
 
         while (index + NJS_STRING_MAP_STRIDE < string->length
@@ -2587,7 +2570,7 @@ njs_string_index(njs_string_prop_t *stri
 
 
 void
-njs_string_offset_map_init(const u_char *start, size_t size)
+njs_string_utf8_offset_map_init(const u_char *start, size_t size)
 {
     size_t        offset;
     uint32_t      *map;
@@ -3055,7 +3038,7 @@ njs_string_prototype_pad(njs_vm_t *vm, n
         if (pad_string.size != (size_t) pad_length) {
             /* UTF-8 string. */
             end = pad_string.start + pad_string.size;
-            end = njs_string_offset(pad_string.start, end, trunc);
+            end = njs_string_utf8_offset(pad_string.start, end, trunc);
 
             trunc = end - pad_string.start;
             padding = pad_string.size * n + trunc;
@@ -3799,14 +3782,7 @@ njs_string_prototype_replace(njs_vm_t *v
             }
         }
 
-        if (njs_is_byte_or_ascii_string(&string)) {
-            p = string.start + pos;
-
-        } else {
-            /* UTF-8 string. */
-            p = njs_string_offset(string.start, string.start + string.size,
-                                  pos);
-        }
+        p = njs_string_offset(&string, pos);
 
         (void) njs_string_prop(&ret_string, &retval);
 
@@ -3867,9 +3843,8 @@ njs_string_prototype_replace(njs_vm_t *v
             p = string.start + pos;
 
         } else {
-            /* UTF-8 string. */
-            p = njs_string_offset(string.start, string.start + string.size,
-                                  pos);
+            p = njs_string_utf8_offset(string.start, string.start + string.size,
+                                       pos);
         }
 
         (void) njs_string_prop(&ret_string, &retval);
diff -r e3a609ff9001 -r 8170f061bbde src/njs_string.h
--- a/src/njs_string.h	Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_string.h	Tue Mar 07 20:35:00 2023 -0800
@@ -243,10 +243,10 @@ void njs_string_slice_string_prop(njs_st
     const njs_string_prop_t *string, const njs_slice_prop_t *slice);
 njs_int_t njs_string_slice(njs_vm_t *vm, njs_value_t *dst,
     const njs_string_prop_t *string, const njs_slice_prop_t *slice);
-const u_char *njs_string_offset(const u_char *start, const u_char *end,
+const u_char *njs_string_utf8_offset(const u_char *start, const u_char *end,
     size_t index);
 uint32_t njs_string_index(njs_string_prop_t *string, uint32_t offset);
-void njs_string_offset_map_init(const u_char *start, size_t size);
+void njs_string_utf8_offset_map_init(const u_char *start, size_t size);
 double njs_string_to_index(const njs_value_t *value);
 njs_int_t njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args,
     njs_uint_t nargs, njs_index_t component);
@@ -266,6 +266,20 @@ njs_int_t njs_string_get_substitution(nj
     njs_value_t *groups, njs_value_t *replacement, njs_value_t *retval);
 
 
+njs_inline const u_char *
+njs_string_offset(njs_string_prop_t *string, int64_t index)
+{
+    if (njs_is_byte_or_ascii_string(string)) {
+        return string->start + index;
+    }
+
+    /* UTF-8 string. */
+
+    return njs_string_utf8_offset(string->start, string->start + string->size,
+                                  index);
+}
+
+
 extern const njs_object_init_t  njs_string_instance_init;
 extern const njs_object_type_init_t  njs_string_type_init;
 


More information about the nginx-devel mailing list