[njs] Introduced njs_string_offset() which support any string.
Dmitry Volyntsev
xeioex at nginx.com
Wed Mar 8 05:09:22 UTC 2023
details: https://hg.nginx.org/njs/rev/8170f061bbde
branches:
changeset: 2066:8170f061bbde
user: Dmitry Volyntsev <xeioex at nginx.com>
date: Tue Mar 07 20:35:00 2023 -0800
description:
Introduced njs_string_offset() which support any string.
diffstat:
src/njs_iterator.c | 2 +-
src/njs_json.c | 7 +-----
src/njs_parser.c | 5 ++-
src/njs_regexp.c | 16 ++++++++------
src/njs_string.c | 57 +++++++++++++++--------------------------------------
src/njs_string.h | 18 +++++++++++++++-
6 files changed, 46 insertions(+), 59 deletions(-)
diffs (297 lines):
diff -r e3a609ff9001 -r 8170f061bbde src/njs_iterator.c
--- a/src/njs_iterator.c Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_iterator.c Tue Mar 07 20:35:00 2023 -0800
@@ -558,7 +558,7 @@ njs_object_iterate_reverse(njs_vm_t *vm,
i = from + 1;
if (i > to) {
- p = njs_string_offset(string_prop.start, end, from);
+ p = njs_string_utf8_offset(string_prop.start, end, from);
p = njs_utf8_next(p, end);
}
diff -r e3a609ff9001 -r 8170f061bbde src/njs_json.c
--- a/src/njs_json.c Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_json.c Tue Mar 07 20:35:00 2023 -0800
@@ -233,12 +233,7 @@ njs_json_stringify(njs_vm_t *vm, njs_val
return NJS_ERROR;
}
- if (length > 10) {
- p = njs_string_offset(prop.start, prop.start + prop.size, 10);
-
- } else {
- p = prop.start + prop.size;
- }
+ p = njs_string_offset(&prop, njs_min(length, 10));
stringify->space.start = prop.start;
stringify->space.length = p - prop.start;
diff -r e3a609ff9001 -r 8170f061bbde src/njs_parser.c
--- a/src/njs_parser.c Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_parser.c Tue Mar 07 20:35:00 2023 -0800
@@ -8589,7 +8589,8 @@ njs_parser_string_create(njs_vm_t *vm, n
njs_decode_utf8(&dst, &token->text);
if (length > NJS_STRING_MAP_STRIDE && dst.length != length) {
- njs_string_offset_map_init(value->long_string.data->start, dst.length);
+ njs_string_utf8_offset_map_init(value->long_string.data->start,
+ dst.length);
}
return NJS_OK;
@@ -8833,7 +8834,7 @@ next_char:
}
if (length > NJS_STRING_MAP_STRIDE && length != size) {
- njs_string_offset_map_init(start, size);
+ njs_string_utf8_offset_map_init(start, size);
}
return NJS_TOKEN_STRING;
diff -r e3a609ff9001 -r 8170f061bbde src/njs_regexp.c
--- a/src/njs_regexp.c Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_regexp.c Tue Mar 07 20:35:00 2023 -0800
@@ -891,9 +891,9 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
offset = last_index;
} else {
- /* UTF-8 string. */
- offset = njs_string_offset(string.start, string.start + string.size,
- last_index) - string.start;
+ offset = njs_string_utf8_offset(string.start,
+ string.start + string.size, last_index)
+ - string.start;
}
ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset,
@@ -1360,7 +1360,8 @@ njs_regexp_prototype_symbol_replace(njs_
if ((size_t) length != s.size) {
/* UTF-8 string. */
- pos = njs_string_offset(s.start, s.start + s.size, pos) - s.start;
+ pos = njs_string_utf8_offset(s.start, s.start + s.size, pos)
+ - s.start;
}
pos = njs_max(njs_min(pos, (int64_t) s.size), 0);
@@ -1643,8 +1644,8 @@ njs_regexp_prototype_symbol_split(njs_vm
}
if (utf8 == NJS_STRING_UTF8) {
- start = njs_string_offset(s.start, s.start + s.size, p);
- end = njs_string_offset(s.start, s.start + s.size, q);
+ start = njs_string_utf8_offset(s.start, s.start + s.size, p);
+ end = njs_string_utf8_offset(s.start, s.start + s.size, q);
} else {
start = &s.start[p];
@@ -1691,7 +1692,8 @@ njs_regexp_prototype_symbol_split(njs_vm
end = &s.start[s.size];
if (utf8 == NJS_STRING_UTF8) {
- start = (p < length) ? njs_string_offset(s.start, s.start + s.size, p)
+ start = (p < length) ? njs_string_utf8_offset(s.start, s.start + s.size,
+ p)
: end;
} else {
diff -r e3a609ff9001 -r 8170f061bbde src/njs_string.c
--- a/src/njs_string.c Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_string.c Tue Mar 07 20:35:00 2023 -0800
@@ -1146,7 +1146,7 @@ njs_string_prototype_to_bytes(njs_vm_t *
/* UTF-8 string. */
end = string.start + string.size;
- s = njs_string_offset(string.start, end, slice.start);
+ s = njs_string_utf8_offset(string.start, end, slice.start);
length = slice.length;
@@ -1503,7 +1503,7 @@ njs_string_slice_string_prop(njs_string_
end = start + string->size;
if (slice->start < slice->string_length) {
- start = njs_string_offset(start, end, slice->start);
+ start = njs_string_utf8_offset(start, end, slice->start);
/* Evaluate size of the slice in bytes and adjust length. */
p = start;
@@ -1584,9 +1584,8 @@ njs_string_prototype_char_code_at(njs_vm
} else {
njs_utf8_decode_init(&ctx);
- /* UTF-8 string. */
end = string.start + string.size;
- start = njs_string_offset(string.start, end, index);
+ start = njs_string_utf8_offset(string.start, end, index);
code = njs_utf8_decode(&ctx, &start, end);
}
@@ -2151,7 +2150,7 @@ njs_string_index_of(njs_string_prop_t *s
} else {
/* UTF-8 string. */
- p = njs_string_offset(string->start, end, index);
+ p = njs_string_utf8_offset(string->start, end, index);
end -= search->size - 1;
while (p < end) {
@@ -2296,7 +2295,7 @@ njs_string_prototype_last_index_of(njs_v
goto done;
}
- p = njs_string_offset(string.start, end, index);
+ p = njs_string_utf8_offset(string.start, end, index);
for (; p >= string.start; p = njs_utf8_prev(p)) {
if ((p + s.size) <= end && memcmp(p, s.start, s.size) == 0) {
@@ -2376,15 +2375,7 @@ njs_string_prototype_includes(njs_vm_t *
if (length - index >= search_length) {
end = string.start + string.size;
-
- if (string.size == (size_t) length) {
- /* Byte or ASCII string. */
- p = string.start + index;
-
- } else {
- /* UTF-8 string. */
- p = njs_string_offset(string.start, end, index);
- }
+ p = njs_string_offset(&string, index);
end -= search.size - 1;
@@ -2482,15 +2473,7 @@ njs_string_prototype_starts_or_ends_with
}
end = string.start + string.size;
-
- if (string.size == (size_t) length) {
- /* Byte or ASCII string. */
- p = string.start + index;
-
- } else {
- /* UTF-8 string. */
- p = njs_string_offset(string.start, end, index);
- }
+ p = njs_string_offset(&string, index);
if ((size_t) (end - p) >= search.size
&& memcmp(p, search.start, search.size) == 0)
@@ -2512,11 +2495,11 @@ done:
/*
- * njs_string_offset() assumes that index is correct.
+ * njs_string_utf8_offset() assumes that index is correct.
*/
const u_char *
-njs_string_offset(const u_char *start, const u_char *end, size_t index)
+njs_string_utf8_offset(const u_char *start, const u_char *end, size_t index)
{
uint32_t *map;
njs_uint_t skip;
@@ -2525,7 +2508,7 @@ njs_string_offset(const u_char *start, c
map = njs_string_map_start(end);
if (map[0] == 0) {
- njs_string_offset_map_init(start, end - start);
+ njs_string_utf8_offset_map_init(start, end - start);
}
start += map[index / NJS_STRING_MAP_STRIDE - 1];
@@ -2562,7 +2545,7 @@ njs_string_index(njs_string_prop_t *stri
map = njs_string_map_start(end);
if (map[0] == 0) {
- njs_string_offset_map_init(string->start, string->size);
+ njs_string_utf8_offset_map_init(string->start, string->size);
}
while (index + NJS_STRING_MAP_STRIDE < string->length
@@ -2587,7 +2570,7 @@ njs_string_index(njs_string_prop_t *stri
void
-njs_string_offset_map_init(const u_char *start, size_t size)
+njs_string_utf8_offset_map_init(const u_char *start, size_t size)
{
size_t offset;
uint32_t *map;
@@ -3055,7 +3038,7 @@ njs_string_prototype_pad(njs_vm_t *vm, n
if (pad_string.size != (size_t) pad_length) {
/* UTF-8 string. */
end = pad_string.start + pad_string.size;
- end = njs_string_offset(pad_string.start, end, trunc);
+ end = njs_string_utf8_offset(pad_string.start, end, trunc);
trunc = end - pad_string.start;
padding = pad_string.size * n + trunc;
@@ -3799,14 +3782,7 @@ njs_string_prototype_replace(njs_vm_t *v
}
}
- if (njs_is_byte_or_ascii_string(&string)) {
- p = string.start + pos;
-
- } else {
- /* UTF-8 string. */
- p = njs_string_offset(string.start, string.start + string.size,
- pos);
- }
+ p = njs_string_offset(&string, pos);
(void) njs_string_prop(&ret_string, &retval);
@@ -3867,9 +3843,8 @@ njs_string_prototype_replace(njs_vm_t *v
p = string.start + pos;
} else {
- /* UTF-8 string. */
- p = njs_string_offset(string.start, string.start + string.size,
- pos);
+ p = njs_string_utf8_offset(string.start, string.start + string.size,
+ pos);
}
(void) njs_string_prop(&ret_string, &retval);
diff -r e3a609ff9001 -r 8170f061bbde src/njs_string.h
--- a/src/njs_string.h Fri Mar 03 18:50:23 2023 -0800
+++ b/src/njs_string.h Tue Mar 07 20:35:00 2023 -0800
@@ -243,10 +243,10 @@ void njs_string_slice_string_prop(njs_st
const njs_string_prop_t *string, const njs_slice_prop_t *slice);
njs_int_t njs_string_slice(njs_vm_t *vm, njs_value_t *dst,
const njs_string_prop_t *string, const njs_slice_prop_t *slice);
-const u_char *njs_string_offset(const u_char *start, const u_char *end,
+const u_char *njs_string_utf8_offset(const u_char *start, const u_char *end,
size_t index);
uint32_t njs_string_index(njs_string_prop_t *string, uint32_t offset);
-void njs_string_offset_map_init(const u_char *start, size_t size);
+void njs_string_utf8_offset_map_init(const u_char *start, size_t size);
double njs_string_to_index(const njs_value_t *value);
njs_int_t njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args,
njs_uint_t nargs, njs_index_t component);
@@ -266,6 +266,20 @@ njs_int_t njs_string_get_substitution(nj
njs_value_t *groups, njs_value_t *replacement, njs_value_t *retval);
+njs_inline const u_char *
+njs_string_offset(njs_string_prop_t *string, int64_t index)
+{
+ if (njs_is_byte_or_ascii_string(string)) {
+ return string->start + index;
+ }
+
+ /* UTF-8 string. */
+
+ return njs_string_utf8_offset(string->start, string->start + string->size,
+ index);
+}
+
+
extern const njs_object_init_t njs_string_instance_init;
extern const njs_object_type_init_t njs_string_type_init;
More information about the nginx-devel
mailing list