[njs] On-demand initialization of UTF-8 strings offset map.

Valentin Bartenev vbart at nginx.com
Thu Nov 10 15:55:11 UTC 2016


details:   http://hg.nginx.org/njs/rev/adf61ca4267b
branches:  
changeset: 250:adf61ca4267b
user:      Valentin Bartenev <vbart at nginx.com>
date:      Thu Nov 10 18:54:28 2016 +0300
description:
On-demand initialization of UTF-8 strings offset map.

diffstat:

 njs/njs_string.c |  70 ++++++++++++++++++++++---------------------------------
 njs/njs_string.h |   6 ++--
 njs/njs_vm.c     |   4 ---
 3 files changed, 31 insertions(+), 49 deletions(-)

diffs (229 lines):

diff -r 187882f1895a -r adf61ca4267b njs/njs_string.c
--- a/njs/njs_string.c	Thu Nov 10 18:45:10 2016 +0300
+++ b/njs/njs_string.c	Thu Nov 10 18:54:28 2016 +0300
@@ -178,11 +178,6 @@ njs_string_new(njs_vm_t *vm, njs_value_t
 
     if (nxt_fast_path(p != NULL)) {
         memcpy(p, start, size);
-
-        if (size != length && length >= NJS_STRING_MAP_STRIDE) {
-            njs_string_offset_map_init(p, size);
-        }
-
         return NXT_OK;
     }
 
@@ -194,7 +189,7 @@ nxt_noinline u_char *
 njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size,
     uint32_t length)
 {
-    uint32_t      total;
+    uint32_t      total, map_offset, *map;
     njs_string_t  *string;
 
     value->type = NJS_STRING;
@@ -217,9 +212,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value
     value->data.string_size = size;
 
     if (size != length && length > NJS_STRING_MAP_STRIDE) {
-        total = njs_string_map_offset(size) + njs_string_map_size(length);
+        map_offset = njs_string_map_offset(size);
+        total = map_offset + njs_string_map_size(length);
 
     } else {
+        map_offset = 0;
         total = size;
     }
 
@@ -233,6 +230,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value
         string->length = length;
         string->retain = 1;
 
+        if (map_offset != 0) {
+            map = (uint32_t *) (string->start + map_offset);
+            map[0] = 0;
+        }
+
         return string->start;
     }
 
@@ -251,15 +253,16 @@ njs_string_copy(njs_value_t *dst, njs_va
 
 /*
  * njs_string_validate() validates an UTF-8 string, evaluates its length,
- * sets njs_string_prop_t struct, and initializes offset map if it is required.
+ * sets njs_string_prop_t struct.
  */
 
 nxt_noinline njs_ret_t
 njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string, njs_value_t *value)
 {
-    u_char   *start;
-    size_t   new_size;
-    ssize_t  size, length;
+    u_char    *start;
+    size_t    new_size, map_offset;
+    ssize_t   size, length;
+    uint32_t  *map;
 
     size = value->short_string.size;
 
@@ -297,8 +300,8 @@ njs_string_validate(njs_vm_t *vm, njs_st
                      * Reallocate the long string with offset map
                      * after the string.
                      */
-                    new_size = njs_string_map_offset(size)
-                               + njs_string_map_size(length);
+                    map_offset = njs_string_map_offset(size);
+                    new_size = map_offset + njs_string_map_size(length);
 
                     start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size);
                     if (nxt_slow_path(start == NULL)) {
@@ -309,7 +312,8 @@ njs_string_validate(njs_vm_t *vm, njs_st
                     string->start = start;
                     value->data.u.string->start = start;
 
-                    njs_string_offset_map_init(start, size);
+                    map = (uint32_t *) (start + map_offset);
+                    map[0] = 0;
                 }
             }
 
@@ -649,10 +653,6 @@ njs_string_prototype_concat(njs_vm_t *vm
         p += string.size;
     }
 
-    if (length >= NJS_STRING_MAP_STRIDE && size != length) {
-        njs_string_offset_map_init(start, size);
-    }
-
     return NXT_OK;
 }
 
@@ -766,10 +766,6 @@ njs_string_prototype_from_bytes(njs_vm_t
             for (p = string.start; p < end; p++) {
                 s = nxt_utf8_encode(s, *p);
             }
-
-            if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) {
-                njs_string_offset_map_init(start, size);
-            }
         }
 
         return NXT_OK;
@@ -1518,8 +1514,7 @@ done:
 
 
 /*
- * njs_string_offset() assumes that index is correct
- * and the optional offset map has been initialized.
+ * njs_string_offset() assumes that index is correct.
  */
 
 nxt_noinline const u_char *
@@ -1531,6 +1526,10 @@ njs_string_offset(const u_char *start, c
     if (index >= NJS_STRING_MAP_STRIDE) {
         map = njs_string_map_start(end);
 
+        if (map[0] == 0) {
+            njs_string_offset_map_init(start, end - start);
+        }
+
         start += map[index / NJS_STRING_MAP_STRIDE - 1];
     }
 
@@ -1543,8 +1542,7 @@ njs_string_offset(const u_char *start, c
 
 
 /*
- * njs_string_index() assumes that offset is correct
- * and the optional offset map has been initialized.
+ * njs_string_index() assumes that offset is correct.
  */
 
 nxt_noinline uint32_t
@@ -1565,6 +1563,10 @@ njs_string_index(njs_string_prop_t *stri
         end = string->start + string->size;
         map = njs_string_map_start(end);
 
+        if (map[0] == 0) {
+            njs_string_offset_map_init(string->start, string->size);
+        }
+
         while (index + NJS_STRING_MAP_STRIDE < string->length
                && *map <= offset)
         {
@@ -1628,10 +1630,6 @@ njs_string_prototype_to_lower_case(njs_v
             p = nxt_utf8_encode(p, nxt_utf8_lower_case(&s, end));
             size--;
         }
-
-        if (string.length >= NJS_STRING_MAP_STRIDE) {
-            njs_string_offset_map_init(start, string.size);
-        }
     }
 
     return NXT_OK;
@@ -1680,10 +1678,6 @@ njs_string_prototype_to_upper_case(njs_v
             p = nxt_utf8_encode(p, nxt_utf8_upper_case(&s, end));
             size--;
         }
-
-        if (string.length >= NJS_STRING_MAP_STRIDE) {
-            njs_string_offset_map_init(start, string.size);
-        }
     }
 
     return NXT_OK;
@@ -1865,10 +1859,6 @@ njs_string_prototype_repeat(njs_vm_t *vm
         n--;
     }
 
-    if (length >= NJS_STRING_MAP_STRIDE && size != length) {
-        njs_string_offset_map_init(start, size);
-    }
-
     return NXT_OK;
 }
 
@@ -2882,10 +2872,6 @@ njs_string_replace_join(njs_vm_t *vm, nj
         /* GC: release valid values. */
     }
 
-    if (length >= NJS_STRING_MAP_STRIDE && size != length) {
-        njs_string_offset_map_init(string, size);
-    }
-
     nxt_array_destroy(&r->parts, &njs_array_mem_proto, vm->mem_cache_pool);
 
     return NXT_OK;
diff -r 187882f1895a -r adf61ca4267b njs/njs_string.h
--- a/njs/njs_string.h	Thu Nov 10 18:45:10 2016 +0300
+++ b/njs/njs_string.h	Thu Nov 10 18:54:28 2016 +0300
@@ -53,9 +53,9 @@
  * To speed up this search a map of offsets is stored after the UTF-8 string.
  * The map is aligned to uint32_t and contains byte positions of each
  * NJS_STRING_MAP_STRIDE UTF-8 character except zero position.  The map
- * can be allocated and updated on demand.  If a string come outside
- * JavaScript as byte sequnece just to be concatenated or to be used in
- * regular expressions the offset map is not required.
+ * can be initialized on demand.  If a string come outside JavaScript as
+ * byte sequnece just to be concatenated or to be used in regular expressions
+ * the offset map is not required.
  *
  * The map is not allocated:
  * 1) if the length is zero hence it is a byte string;
diff -r 187882f1895a -r adf61ca4267b njs/njs_vm.c
--- a/njs/njs_vm.c	Thu Nov 10 18:45:10 2016 +0300
+++ b/njs/njs_vm.c	Thu Nov 10 18:54:28 2016 +0300
@@ -1556,10 +1556,6 @@ njs_vmcode_addition(njs_vm_t *vm, njs_va
         (void) memcpy(start, string1.start, string1.size);
         (void) memcpy(start + string1.size, string2.start, string2.size);
 
-        if (length >= NJS_STRING_MAP_STRIDE && size != length) {
-            njs_string_offset_map_init(start, size);
-        }
-
         return sizeof(njs_vmcode_3addr_t);
     }
 



More information about the nginx-devel mailing list