[njs] String.prototype.lastIndexOf() method fixes and optimizati...

Igor Sysoev igor at sysoev.ru
Thu Oct 27 09:51:54 UTC 2016


details:   http://hg.nginx.org/njs/rev/da63e61ecef4
branches:  
changeset: 223:da63e61ecef4
user:      Igor Sysoev <igor at sysoev.ru>
date:      Thu Oct 27 10:56:54 2016 +0300
description:
String.prototype.lastIndexOf() method fixes and optimizations.

In collaboration with Valentin Bartenev.

diffstat:

 njs/njs_string.c         |  129 ++++++++++++++++++++++------------------------
 njs/test/njs_unit_test.c |   41 ++++++++++++++-
 nxt/nxt_utf8.h           |   27 ++++++++-
 3 files changed, 123 insertions(+), 74 deletions(-)

diffs (274 lines):

diff -r 74785cebd8df -r da63e61ecef4 njs/njs_string.c
--- a/njs/njs_string.c	Thu Oct 27 10:47:48 2016 +0300
+++ b/njs/njs_string.c	Thu Oct 27 10:56:54 2016 +0300
@@ -83,8 +83,6 @@ static nxt_noinline void njs_string_slic
     njs_value_t *args, nxt_uint_t nargs);
 static njs_ret_t njs_string_from_char_code(njs_vm_t *vm,
     njs_value_t *args, nxt_uint_t nargs, njs_index_t unused);
-static nxt_noinline ssize_t njs_string_index_of(njs_vm_t *vm,
-    njs_value_t *src, njs_value_t *search_string, size_t index);
 static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
     njs_regexp_pattern_t *pattern);
 static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
@@ -1288,91 +1286,86 @@ static njs_ret_t
 njs_string_prototype_last_index_of(njs_vm_t *vm, njs_value_t *args,
     nxt_uint_t nargs, njs_index_t unused)
 {
-    ssize_t  ret, index, last;
-
-    index = -1;
-
-    if (nargs > 1) {
-        last = NJS_STRING_MAX_LENGTH;
-
-        if (nargs > 2) {
-            last = args[2].data.u.number;
-
-            if (last < 0) {
-                last = 0;
-            }
-        }
-
-        ret = 0;
-
-        for ( ;; ) {
-            ret = njs_string_index_of(vm, &args[0], &args[1], ret);
-
-            if (ret < 0 || ret >= last) {
-                break;
-            }
-
-            index = ret++;
-        }
-    }
-
-    njs_number_set(&vm->retval, index);
-
-    return NXT_OK;
-}
-
-
-static nxt_noinline ssize_t
-njs_string_index_of(njs_vm_t *vm, njs_value_t *src, njs_value_t *search_string,
-    size_t index)
-{
-    size_t             length;
+    ssize_t            index, start, length, search_length;
     const u_char       *p, *end;
     njs_string_prop_t  string, search;
 
-    (void) njs_string_prop(&search, search_string);
-
-    length = njs_string_prop(&string, src);
-
-    if (index < length) {
-
-        if (string.size == length) {
+    if (nargs > 1) {
+        length = njs_string_prop(&string, &args[0]);
+        search_length = njs_string_prop(&search, &args[1]);
+
+        if (length < search_length) {
+            goto small;
+        }
+
+        index = NJS_STRING_MAX_LENGTH;
+
+        if (nargs > 2) {
+            index = args[2].data.u.number;
+
+            if (index < 0) {
+                index = 0;
+            }
+        }
+
+        if (index > length) {
+            index = length;
+        }
+
+        if (string.size == (size_t) length) {
             /* Byte or ASCII string. */
+
+            start = length - search.size;
+
+            if (index > start) {
+                index = start;
+            }
+
             p = string.start + index;
-            end = (string.start + string.size) - (search.size - 1);
-
-            while (p < end) {
+
+            do {
                 if (memcmp(p, search.start, search.size) == 0) {
-                    return index;
+                    goto done;
                 }
 
-                index++;
-                p++;
-            }
+                p--;
+                index--;
+
+            } while (index >= 0);
 
         } else {
             /* UTF-8 string. */
+
             end = string.start + string.size;
-
             p = njs_string_offset(string.start, end, index);
-
-            end -= search.size - 1;
-
-            while (p < end) {
+            end -= search.size;
+
+            while (p > end) {
+                index--;
+                p = nxt_utf8_prev(p);
+            }
+
+            do {
                 if (memcmp(p, search.start, search.size) == 0) {
-                    return index;
+                    goto done;
                 }
 
-                index++;
-                p = nxt_utf8_next(p, end);
-            }
+                p = nxt_utf8_prev(p);
+                index--;
+
+            } while (index >= 0);
         }
-
-    } else if (search.size == 0) {
-        return length;
     }
 
-    return -1;
+small:
+
+    index = -1;
+
+done:
+
+    njs_number_set(&vm->retval, index);
+
+    return NXT_OK;
 }
 
 
diff -r 74785cebd8df -r da63e61ecef4 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Thu Oct 27 10:47:48 2016 +0300
+++ b/njs/test/njs_unit_test.c	Thu Oct 27 10:56:54 2016 +0300
@@ -3224,6 +3224,9 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("''.indexOf.call(12345, 45, '0')"),
       nxt_string("3") },
 
+    { nxt_string("'abc'.lastIndexOf('abcdef')"),
+      nxt_string("-1") },
+
     { nxt_string("'abc abc abc abc'.lastIndexOf('abc')"),
       nxt_string("12") },
 
@@ -3231,7 +3234,43 @@ static njs_unit_test_t  njs_test[] =
       nxt_string("8") },
 
     { nxt_string("'abc abc abc abc'.lastIndexOf('abc', 0)"),
-      nxt_string("-1") },
+      nxt_string("0") },
+
+    { nxt_string("'abc abc abc abc'.lastIndexOf('', 0)"),
+      nxt_string("0") },
+
+    { nxt_string("'abc abc abc abc'.lastIndexOf('', 5)"),
+      nxt_string("5") },
+
+    { nxt_string("'abc abc абвгд abc'.lastIndexOf('абвгд')"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд')"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 11)"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 12)"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 13)"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 14)"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 15)"),
+      nxt_string("8") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('')"),
+      nxt_string("16") },
+
+    { nxt_string("'abc abc абвгдежз'.lastIndexOf('', 12)"),
+      nxt_string("12") },
+
+    { nxt_string("''.lastIndexOf('')"),
+      nxt_string("0") },
 
     { nxt_string("'ABC'.toLowerCase()"),
       nxt_string("abc") },
diff -r 74785cebd8df -r da63e61ecef4 nxt/nxt_utf8.h
--- a/nxt/nxt_utf8.h	Thu Oct 27 10:47:48 2016 +0300
+++ b/nxt/nxt_utf8.h	Thu Oct 27 10:56:54 2016 +0300
@@ -29,7 +29,12 @@ NXT_EXPORT ssize_t nxt_utf8_length(const
 NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len);
 
 
-/* nxt_utf8_next() expects a valid UTF-8 string. */
+/*
+ * nxt_utf8_next() and nxt_utf8_prev() expect a valid UTF-8 string.
+ *
+ * The leading UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
+ * The continuation UTF-8 bytes are 10xxxxxx.
+ */
 
 nxt_inline const u_char *
 nxt_utf8_next(const u_char *p, const u_char *end)
@@ -41,10 +46,6 @@ nxt_utf8_next(const u_char *p, const u_c
     if ((c & 0x80) != 0) {
 
         do {
-            /*
-             * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
-             * The next UTF-8 bytes are 10xxxxxx.
-             */
             c = *p;
 
             if ((c & 0xC0) != 0x80) {
@@ -60,6 +61,22 @@ nxt_utf8_next(const u_char *p, const u_c
 }
 
 
+nxt_inline const u_char *
+nxt_utf8_prev(const u_char *p)
+{
+   u_char  c;
+
+   do {
+       p--;
+       c = *p;
+
+   } while ((c & 0xC0) == 0x80);
+
+   return p;
+}
+
+
+
 #define nxt_utf8_size(u)                                                      \
     ((u < 0x80) ? 1 : ((u < 0x0800) ? 2 : ((u < 0x10000) ? 3 : 4)))
 


More information about the nginx-devel mailing list