[njs] String.prototype.lastIndexOf() method fixes and optimizati...
Igor Sysoev
igor at sysoev.ru
Thu Oct 27 09:51:54 UTC 2016
details: http://hg.nginx.org/njs/rev/da63e61ecef4
branches:
changeset: 223:da63e61ecef4
user: Igor Sysoev <igor at sysoev.ru>
date: Thu Oct 27 10:56:54 2016 +0300
description:
String.prototype.lastIndexOf() method fixes and optimizations.
In collaboration with Valentin Bartenev.
diffstat:
njs/njs_string.c | 129 ++++++++++++++++++++++------------------------
njs/test/njs_unit_test.c | 41 ++++++++++++++-
nxt/nxt_utf8.h | 27 ++++++++-
3 files changed, 123 insertions(+), 74 deletions(-)
diffs (274 lines):
diff -r 74785cebd8df -r da63e61ecef4 njs/njs_string.c
--- a/njs/njs_string.c Thu Oct 27 10:47:48 2016 +0300
+++ b/njs/njs_string.c Thu Oct 27 10:56:54 2016 +0300
@@ -83,8 +83,6 @@ static nxt_noinline void njs_string_slic
njs_value_t *args, nxt_uint_t nargs);
static njs_ret_t njs_string_from_char_code(njs_vm_t *vm,
njs_value_t *args, nxt_uint_t nargs, njs_index_t unused);
-static nxt_noinline ssize_t njs_string_index_of(njs_vm_t *vm,
- njs_value_t *src, njs_value_t *search_string, size_t index);
static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
njs_regexp_pattern_t *pattern);
static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
@@ -1288,91 +1286,86 @@ static njs_ret_t
njs_string_prototype_last_index_of(njs_vm_t *vm, njs_value_t *args,
nxt_uint_t nargs, njs_index_t unused)
{
- ssize_t ret, index, last;
-
- index = -1;
-
- if (nargs > 1) {
- last = NJS_STRING_MAX_LENGTH;
-
- if (nargs > 2) {
- last = args[2].data.u.number;
-
- if (last < 0) {
- last = 0;
- }
- }
-
- ret = 0;
-
- for ( ;; ) {
- ret = njs_string_index_of(vm, &args[0], &args[1], ret);
-
- if (ret < 0 || ret >= last) {
- break;
- }
-
- index = ret++;
- }
- }
-
- njs_number_set(&vm->retval, index);
-
- return NXT_OK;
-}
-
-
-static nxt_noinline ssize_t
-njs_string_index_of(njs_vm_t *vm, njs_value_t *src, njs_value_t *search_string,
- size_t index)
-{
- size_t length;
+ ssize_t index, start, length, search_length;
const u_char *p, *end;
njs_string_prop_t string, search;
- (void) njs_string_prop(&search, search_string);
-
- length = njs_string_prop(&string, src);
-
- if (index < length) {
-
- if (string.size == length) {
+ if (nargs > 1) {
+ length = njs_string_prop(&string, &args[0]);
+ search_length = njs_string_prop(&search, &args[1]);
+
+ if (length < search_length) {
+ goto small;
+ }
+
+ index = NJS_STRING_MAX_LENGTH;
+
+ if (nargs > 2) {
+ index = args[2].data.u.number;
+
+ if (index < 0) {
+ index = 0;
+ }
+ }
+
+ if (index > length) {
+ index = length;
+ }
+
+ if (string.size == (size_t) length) {
/* Byte or ASCII string. */
+
+ start = length - search.size;
+
+ if (index > start) {
+ index = start;
+ }
+
p = string.start + index;
- end = (string.start + string.size) - (search.size - 1);
-
- while (p < end) {
+
+ do {
if (memcmp(p, search.start, search.size) == 0) {
- return index;
+ goto done;
}
- index++;
- p++;
- }
+ p--;
+ index--;
+
+ } while (index >= 0);
} else {
/* UTF-8 string. */
+
end = string.start + string.size;
-
p = njs_string_offset(string.start, end, index);
-
- end -= search.size - 1;
-
- while (p < end) {
+ end -= search.size;
+
+ while (p > end) {
+ index--;
+ p = nxt_utf8_prev(p);
+ }
+
+ do {
if (memcmp(p, search.start, search.size) == 0) {
- return index;
+ goto done;
}
- index++;
- p = nxt_utf8_next(p, end);
- }
+ p = nxt_utf8_prev(p);
+ index--;
+
+ } while (index >= 0);
}
-
- } else if (search.size == 0) {
- return length;
}
- return -1;
+small:
+
+ index = -1;
+
+done:
+
+ njs_number_set(&vm->retval, index);
+
+ return NXT_OK;
}
diff -r 74785cebd8df -r da63e61ecef4 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Thu Oct 27 10:47:48 2016 +0300
+++ b/njs/test/njs_unit_test.c Thu Oct 27 10:56:54 2016 +0300
@@ -3224,6 +3224,9 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("''.indexOf.call(12345, 45, '0')"),
nxt_string("3") },
+ { nxt_string("'abc'.lastIndexOf('abcdef')"),
+ nxt_string("-1") },
+
{ nxt_string("'abc abc abc abc'.lastIndexOf('abc')"),
nxt_string("12") },
@@ -3231,7 +3234,43 @@ static njs_unit_test_t njs_test[] =
nxt_string("8") },
{ nxt_string("'abc abc abc abc'.lastIndexOf('abc', 0)"),
- nxt_string("-1") },
+ nxt_string("0") },
+
+ { nxt_string("'abc abc abc abc'.lastIndexOf('', 0)"),
+ nxt_string("0") },
+
+ { nxt_string("'abc abc abc abc'.lastIndexOf('', 5)"),
+ nxt_string("5") },
+
+ { nxt_string("'abc abc абвгд abc'.lastIndexOf('абвгд')"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд')"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 11)"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 12)"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 13)"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 14)"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 15)"),
+ nxt_string("8") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('')"),
+ nxt_string("16") },
+
+ { nxt_string("'abc abc абвгдежз'.lastIndexOf('', 12)"),
+ nxt_string("12") },
+
+ { nxt_string("''.lastIndexOf('')"),
+ nxt_string("0") },
{ nxt_string("'ABC'.toLowerCase()"),
nxt_string("abc") },
diff -r 74785cebd8df -r da63e61ecef4 nxt/nxt_utf8.h
--- a/nxt/nxt_utf8.h Thu Oct 27 10:47:48 2016 +0300
+++ b/nxt/nxt_utf8.h Thu Oct 27 10:56:54 2016 +0300
@@ -29,7 +29,12 @@ NXT_EXPORT ssize_t nxt_utf8_length(const
NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len);
-/* nxt_utf8_next() expects a valid UTF-8 string. */
+/*
+ * nxt_utf8_next() and nxt_utf8_prev() expect a valid UTF-8 string.
+ *
+ * The leading UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
+ * The continuation UTF-8 bytes are 10xxxxxx.
+ */
nxt_inline const u_char *
nxt_utf8_next(const u_char *p, const u_char *end)
@@ -41,10 +46,6 @@ nxt_utf8_next(const u_char *p, const u_c
if ((c & 0x80) != 0) {
do {
- /*
- * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
- * The next UTF-8 bytes are 10xxxxxx.
- */
c = *p;
if ((c & 0xC0) != 0x80) {
@@ -60,6 +61,22 @@ nxt_utf8_next(const u_char *p, const u_c
}
+nxt_inline const u_char *
+nxt_utf8_prev(const u_char *p)
+{
+ u_char c;
+
+ do {
+ p--;
+ c = *p;
+
+ } while ((c & 0xC0) == 0x80);
+
+ return p;
+}
+
+
+
#define nxt_utf8_size(u) \
((u < 0x80) ? 1 : ((u < 0x0800) ? 2 : ((u < 0x10000) ? 3 : 4)))
More information about the nginx-devel
mailing list