[njs] Improved UTF-8 offset map related macros (no functional ch...

Valentin Bartenev vbart at nginx.com
Thu Nov 10 15:55:09 UTC 2016


details:   http://hg.nginx.org/njs/rev/187882f1895a
branches:  
changeset: 249:187882f1895a
user:      Valentin Bartenev <vbart at nginx.com>
date:      Thu Nov 10 18:45:10 2016 +0300
description:
Improved UTF-8 offset map related macros (no functional changes).

diffstat:

 njs/njs_parser.c |   4 +-
 njs/njs_string.c |  61 ++++++++++++++++++++++++++-----------------------------
 njs/njs_string.h |  14 ++++++++++--
 njs/njs_vm.c     |   2 +-
 4 files changed, 43 insertions(+), 38 deletions(-)

diffs (250 lines):

diff -r 60c2930eb951 -r 187882f1895a njs/njs_parser.c
--- a/njs/njs_parser.c	Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_parser.c	Thu Nov 10 18:45:10 2016 +0300
@@ -1984,7 +1984,7 @@ njs_parser_string_create(njs_vm_t *vm, n
     if (nxt_fast_path(p != NULL)) {
         memcpy(p, src->start, src->length);
 
-        if (length > NJS_STRING_MAP_OFFSET && (size_t) length != src->length) {
+        if (length > NJS_STRING_MAP_STRIDE && (size_t) length != src->length) {
             njs_string_offset_map_init(p, src->length);
         }
 
@@ -2144,7 +2144,7 @@ njs_parser_escape_string_create(njs_vm_t
         }
 
         if (start != NULL) {
-            if (length > NJS_STRING_MAP_OFFSET && length != size) {
+            if (length > NJS_STRING_MAP_STRIDE && length != size) {
                 njs_string_offset_map_init(start, size);
             }
 
diff -r 60c2930eb951 -r 187882f1895a njs/njs_string.c
--- a/njs/njs_string.c	Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_string.c	Thu Nov 10 18:45:10 2016 +0300
@@ -179,7 +179,7 @@ njs_string_new(njs_vm_t *vm, njs_value_t
     if (nxt_fast_path(p != NULL)) {
         memcpy(p, start, size);
 
-        if (size != length && length >= NJS_STRING_MAP_OFFSET) {
+        if (size != length && length >= NJS_STRING_MAP_STRIDE) {
             njs_string_offset_map_init(p, size);
         }
 
@@ -216,9 +216,8 @@ njs_string_alloc(njs_vm_t *vm, njs_value
     value->data.external0 = 0;
     value->data.string_size = size;
 
-    if (size != length && length > NJS_STRING_MAP_OFFSET) {
-        total = nxt_align_size(size, sizeof(uint32_t));
-        total += ((length - 1) / NJS_STRING_MAP_OFFSET) * sizeof(uint32_t);
+    if (size != length && length > NJS_STRING_MAP_STRIDE) {
+        total = njs_string_map_offset(size) + njs_string_map_size(length);
 
     } else {
         total = size;
@@ -293,14 +292,13 @@ njs_string_validate(njs_vm_t *vm, njs_st
                     return length;
                 }
 
-                if (length > NJS_STRING_MAP_OFFSET) {
+                if (length > NJS_STRING_MAP_STRIDE) {
                     /*
                      * Reallocate the long string with offset map
                      * after the string.
                      */
-                    new_size = nxt_align_size(size, sizeof(uint32_t));
-                    new_size += ((length - 1) / NJS_STRING_MAP_OFFSET)
-                                * sizeof(uint32_t);
+                    new_size = njs_string_map_offset(size)
+                               + njs_string_map_size(length);
 
                     start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size);
                     if (nxt_slow_path(start == NULL)) {
@@ -473,15 +471,15 @@ njs_string_offset_map_init(const u_char 
     const u_char  *p, *end;
 
     end = start + size;
-    map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t));
+    map = njs_string_map_start(end);
     p = start;
     n = 0;
-    offset = NJS_STRING_MAP_OFFSET;
+    offset = NJS_STRING_MAP_STRIDE;
 
     do {
         if (offset == 0) {
             map[n++] = p - start;
-            offset = NJS_STRING_MAP_OFFSET;
+            offset = NJS_STRING_MAP_STRIDE;
         }
 
         /* The UTF-8 string should be valid since its length is known. */
@@ -651,7 +649,7 @@ njs_string_prototype_concat(njs_vm_t *vm
         p += string.size;
     }
 
-    if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+    if (length >= NJS_STRING_MAP_STRIDE && size != length) {
         njs_string_offset_map_init(start, size);
     }
 
@@ -685,7 +683,7 @@ njs_string_prototype_from_utf8(njs_vm_t 
 
     if (length >= 0) {
 
-        if (length < NJS_STRING_MAP_OFFSET || (size_t) length == slice.length) {
+        if (length < NJS_STRING_MAP_STRIDE || (size_t) length == slice.length) {
             /* ASCII or short UTF-8 string. */
             return njs_string_create(vm, &vm->retval, string.start,
                                      slice.length, length);
@@ -769,7 +767,7 @@ njs_string_prototype_from_bytes(njs_vm_t
                 s = nxt_utf8_encode(s, *p);
             }
 
-            if (slice.length >= NJS_STRING_MAP_OFFSET || size != slice.length) {
+            if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) {
                 njs_string_offset_map_init(start, size);
             }
         }
@@ -1530,13 +1528,13 @@ njs_string_offset(const u_char *start, c
     uint32_t    *map;
     nxt_uint_t  skip;
 
-    if (index >= NJS_STRING_MAP_OFFSET) {
-        map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t));
-
-        start += map[index / NJS_STRING_MAP_OFFSET - 1];
+    if (index >= NJS_STRING_MAP_STRIDE) {
+        map = njs_string_map_start(end);
+
+        start += map[index / NJS_STRING_MAP_STRIDE - 1];
     }
 
-    for (skip = index % NJS_STRING_MAP_OFFSET; skip != 0; skip--) {
+    for (skip = index % NJS_STRING_MAP_STRIDE; skip != 0; skip--) {
         start = nxt_utf8_next(start, end);
     }
 
@@ -1562,16 +1560,16 @@ njs_string_index(njs_string_prop_t *stri
     last = 0;
     index = 0;
 
-    if (string->length >= NJS_STRING_MAP_OFFSET) {
+    if (string->length >= NJS_STRING_MAP_STRIDE) {
 
         end = string->start + string->size;
-        map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t));
-
-        while (index + NJS_STRING_MAP_OFFSET < string->length
+        map = njs_string_map_start(end);
+
+        while (index + NJS_STRING_MAP_STRIDE < string->length
                && *map <= offset)
         {
             last = *map++;
-            index += NJS_STRING_MAP_OFFSET;
+            index += NJS_STRING_MAP_STRIDE;
         }
     }
 
@@ -1631,7 +1629,7 @@ njs_string_prototype_to_lower_case(njs_v
             size--;
         }
 
-        if (string.length >= NJS_STRING_MAP_OFFSET) {
+        if (string.length >= NJS_STRING_MAP_STRIDE) {
             njs_string_offset_map_init(start, string.size);
         }
     }
@@ -1683,7 +1681,7 @@ njs_string_prototype_to_upper_case(njs_v
             size--;
         }
 
-        if (string.length >= NJS_STRING_MAP_OFFSET) {
+        if (string.length >= NJS_STRING_MAP_STRIDE) {
             njs_string_offset_map_init(start, string.size);
         }
     }
@@ -1867,7 +1865,7 @@ njs_string_prototype_repeat(njs_vm_t *vm
         n--;
     }
 
-    if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+    if (length >= NJS_STRING_MAP_STRIDE && size != length) {
         njs_string_offset_map_init(start, size);
     }
 
@@ -2884,7 +2882,7 @@ njs_string_replace_join(njs_vm_t *vm, nj
         /* GC: release valid values. */
     }
 
-    if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+    if (length >= NJS_STRING_MAP_STRIDE && size != length) {
         njs_string_offset_map_init(string, size);
     }
 
@@ -3655,10 +3653,9 @@ njs_value_index(njs_vm_t *vm, njs_parser
 
             length = src->data.u.string->length;
 
-            if (size != length && length > NJS_STRING_MAP_OFFSET) {
-                size = nxt_align_size(size, sizeof(uint32_t));
-                size += ((length - 1) / NJS_STRING_MAP_OFFSET)
-                        * sizeof(uint32_t);
+            if (size != length && length > NJS_STRING_MAP_STRIDE) {
+                size = njs_string_map_offset(size)
+                       + njs_string_map_size(length);
             }
         }
 
diff -r 60c2930eb951 -r 187882f1895a njs/njs_string.h
--- a/njs/njs_string.h	Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_string.h	Thu Nov 10 18:45:10 2016 +0300
@@ -31,7 +31,15 @@
  * division and remainder operations but no less than 16 because the maximum
  * length of short string inlined in njs_value_t is less than 16 bytes.
  */
-#define NJS_STRING_MAP_OFFSET  32
+#define NJS_STRING_MAP_STRIDE  32
+
+#define njs_string_map_offset(size)  nxt_align_size((size), sizeof(uint32_t))
+
+#define njs_string_map_start(p)                                               \
+    ((uint32_t *) nxt_align_ptr((p), sizeof(uint32_t)))
+
+#define njs_string_map_size(length)                                           \
+    (((length - 1) / NJS_STRING_MAP_STRIDE) * sizeof(uint32_t))
 
 /*
  * The JavaScript standard states that strings are stored in UTF-16.
@@ -44,7 +52,7 @@
  * encoding does not allow to get quickly a character at specified position.
  * To speed up this search a map of offsets is stored after the UTF-8 string.
  * The map is aligned to uint32_t and contains byte positions of each
- * NJS_STRING_MAP_OFFSET UTF-8 character except zero position.  The map
+ * NJS_STRING_MAP_STRIDE UTF-8 character except zero position.  The map
  * can be allocated and updated on demand.  If a string come outside
  * JavaScript as byte sequnece just to be concatenated or to be used in
  * regular expressions the offset map is not required.
@@ -53,7 +61,7 @@
  * 1) if the length is zero hence it is a byte string;
  * 2) if the size and length are equal so the string contains only ASCII
  *    characters map is not required;
- * 3) if the length is less than NJS_STRING_MAP_OFFSET.
+ * 3) if the length is less than NJS_STRING_MAP_STRIDE.
  *
  * The current implementation does not support Unicode surrogate pairs.
  * If offset in map points to surrogate pair then the previous offset
diff -r 60c2930eb951 -r 187882f1895a njs/njs_vm.c
--- a/njs/njs_vm.c	Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_vm.c	Thu Nov 10 18:45:10 2016 +0300
@@ -1556,7 +1556,7 @@ njs_vmcode_addition(njs_vm_t *vm, njs_va
         (void) memcpy(start, string1.start, string1.size);
         (void) memcpy(start + string1.size, string2.start, string2.size);
 
-        if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+        if (length >= NJS_STRING_MAP_STRIDE && size != length) {
             njs_string_offset_map_init(start, size);
         }
 



More information about the nginx-devel mailing list