[njs] On-demand initialization of UTF-8 strings offset map.
Valentin Bartenev
vbart at nginx.com
Thu Nov 10 15:55:11 UTC 2016
details: http://hg.nginx.org/njs/rev/adf61ca4267b
branches:
changeset: 250:adf61ca4267b
user: Valentin Bartenev <vbart at nginx.com>
date: Thu Nov 10 18:54:28 2016 +0300
description:
On-demand initialization of UTF-8 strings offset map.
diffstat:
njs/njs_string.c | 70 ++++++++++++++++++++++---------------------------------
njs/njs_string.h | 6 ++--
njs/njs_vm.c | 4 ---
3 files changed, 31 insertions(+), 49 deletions(-)
diffs (229 lines):
diff -r 187882f1895a -r adf61ca4267b njs/njs_string.c
--- a/njs/njs_string.c Thu Nov 10 18:45:10 2016 +0300
+++ b/njs/njs_string.c Thu Nov 10 18:54:28 2016 +0300
@@ -178,11 +178,6 @@ njs_string_new(njs_vm_t *vm, njs_value_t
if (nxt_fast_path(p != NULL)) {
memcpy(p, start, size);
-
- if (size != length && length >= NJS_STRING_MAP_STRIDE) {
- njs_string_offset_map_init(p, size);
- }
-
return NXT_OK;
}
@@ -194,7 +189,7 @@ nxt_noinline u_char *
njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size,
uint32_t length)
{
- uint32_t total;
+ uint32_t total, map_offset, *map;
njs_string_t *string;
value->type = NJS_STRING;
@@ -217,9 +212,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value
value->data.string_size = size;
if (size != length && length > NJS_STRING_MAP_STRIDE) {
- total = njs_string_map_offset(size) + njs_string_map_size(length);
+ map_offset = njs_string_map_offset(size);
+ total = map_offset + njs_string_map_size(length);
} else {
+ map_offset = 0;
total = size;
}
@@ -233,6 +230,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value
string->length = length;
string->retain = 1;
+ if (map_offset != 0) {
+ map = (uint32_t *) (string->start + map_offset);
+ map[0] = 0;
+ }
+
return string->start;
}
@@ -251,15 +253,16 @@ njs_string_copy(njs_value_t *dst, njs_va
/*
* njs_string_validate() validates an UTF-8 string, evaluates its length,
- * sets njs_string_prop_t struct, and initializes offset map if it is required.
+ * sets njs_string_prop_t struct.
*/
nxt_noinline njs_ret_t
njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string, njs_value_t *value)
{
- u_char *start;
- size_t new_size;
- ssize_t size, length;
+ u_char *start;
+ size_t new_size, map_offset;
+ ssize_t size, length;
+ uint32_t *map;
size = value->short_string.size;
@@ -297,8 +300,8 @@ njs_string_validate(njs_vm_t *vm, njs_st
* Reallocate the long string with offset map
* after the string.
*/
- new_size = njs_string_map_offset(size)
- + njs_string_map_size(length);
+ map_offset = njs_string_map_offset(size);
+ new_size = map_offset + njs_string_map_size(length);
start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size);
if (nxt_slow_path(start == NULL)) {
@@ -309,7 +312,8 @@ njs_string_validate(njs_vm_t *vm, njs_st
string->start = start;
value->data.u.string->start = start;
- njs_string_offset_map_init(start, size);
+ map = (uint32_t *) (start + map_offset);
+ map[0] = 0;
}
}
@@ -649,10 +653,6 @@ njs_string_prototype_concat(njs_vm_t *vm
p += string.size;
}
- if (length >= NJS_STRING_MAP_STRIDE && size != length) {
- njs_string_offset_map_init(start, size);
- }
-
return NXT_OK;
}
@@ -766,10 +766,6 @@ njs_string_prototype_from_bytes(njs_vm_t
for (p = string.start; p < end; p++) {
s = nxt_utf8_encode(s, *p);
}
-
- if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) {
- njs_string_offset_map_init(start, size);
- }
}
return NXT_OK;
@@ -1518,8 +1514,7 @@ done:
/*
- * njs_string_offset() assumes that index is correct
- * and the optional offset map has been initialized.
+ * njs_string_offset() assumes that index is correct.
*/
nxt_noinline const u_char *
@@ -1531,6 +1526,10 @@ njs_string_offset(const u_char *start, c
if (index >= NJS_STRING_MAP_STRIDE) {
map = njs_string_map_start(end);
+ if (map[0] == 0) {
+ njs_string_offset_map_init(start, end - start);
+ }
+
start += map[index / NJS_STRING_MAP_STRIDE - 1];
}
@@ -1543,8 +1542,7 @@ njs_string_offset(const u_char *start, c
/*
- * njs_string_index() assumes that offset is correct
- * and the optional offset map has been initialized.
+ * njs_string_index() assumes that offset is correct.
*/
nxt_noinline uint32_t
@@ -1565,6 +1563,10 @@ njs_string_index(njs_string_prop_t *stri
end = string->start + string->size;
map = njs_string_map_start(end);
+ if (map[0] == 0) {
+ njs_string_offset_map_init(string->start, string->size);
+ }
+
while (index + NJS_STRING_MAP_STRIDE < string->length
&& *map <= offset)
{
@@ -1628,10 +1630,6 @@ njs_string_prototype_to_lower_case(njs_v
p = nxt_utf8_encode(p, nxt_utf8_lower_case(&s, end));
size--;
}
-
- if (string.length >= NJS_STRING_MAP_STRIDE) {
- njs_string_offset_map_init(start, string.size);
- }
}
return NXT_OK;
@@ -1680,10 +1678,6 @@ njs_string_prototype_to_upper_case(njs_v
p = nxt_utf8_encode(p, nxt_utf8_upper_case(&s, end));
size--;
}
-
- if (string.length >= NJS_STRING_MAP_STRIDE) {
- njs_string_offset_map_init(start, string.size);
- }
}
return NXT_OK;
@@ -1865,10 +1859,6 @@ njs_string_prototype_repeat(njs_vm_t *vm
n--;
}
- if (length >= NJS_STRING_MAP_STRIDE && size != length) {
- njs_string_offset_map_init(start, size);
- }
-
return NXT_OK;
}
@@ -2882,10 +2872,6 @@ njs_string_replace_join(njs_vm_t *vm, nj
/* GC: release valid values. */
}
- if (length >= NJS_STRING_MAP_STRIDE && size != length) {
- njs_string_offset_map_init(string, size);
- }
-
nxt_array_destroy(&r->parts, &njs_array_mem_proto, vm->mem_cache_pool);
return NXT_OK;
diff -r 187882f1895a -r adf61ca4267b njs/njs_string.h
--- a/njs/njs_string.h Thu Nov 10 18:45:10 2016 +0300
+++ b/njs/njs_string.h Thu Nov 10 18:54:28 2016 +0300
@@ -53,9 +53,9 @@
* To speed up this search a map of offsets is stored after the UTF-8 string.
* The map is aligned to uint32_t and contains byte positions of each
* NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The map
- * can be allocated and updated on demand. If a string come outside
- * JavaScript as byte sequnece just to be concatenated or to be used in
- * regular expressions the offset map is not required.
+ * can be initialized on demand. If a string come outside JavaScript as
+ * byte sequnece just to be concatenated or to be used in regular expressions
+ * the offset map is not required.
*
* The map is not allocated:
* 1) if the length is zero hence it is a byte string;
diff -r 187882f1895a -r adf61ca4267b njs/njs_vm.c
--- a/njs/njs_vm.c Thu Nov 10 18:45:10 2016 +0300
+++ b/njs/njs_vm.c Thu Nov 10 18:54:28 2016 +0300
@@ -1556,10 +1556,6 @@ njs_vmcode_addition(njs_vm_t *vm, njs_va
(void) memcpy(start, string1.start, string1.size);
(void) memcpy(start + string1.size, string2.start, string2.size);
- if (length >= NJS_STRING_MAP_STRIDE && size != length) {
- njs_string_offset_map_init(start, size);
- }
-
return sizeof(njs_vmcode_3addr_t);
}
More information about the nginx-devel
mailing list