[njs] String.match() fixes.

Igor Sysoev igor at sysoev.ru
Tue Aug 16 16:14:37 UTC 2016


details:   http://hg.nginx.org/njs/rev/5d9e4adf25c2
branches:  
changeset: 157:5d9e4adf25c2
user:      Igor Sysoev <igor at sysoev.ru>
date:      Tue Aug 16 18:58:30 2016 +0300
description:
String.match() fixes.

diffstat:

 njs/njs_regexp.c         |   69 +++++++++++++----------
 njs/njs_regexp.h         |    2 +
 njs/njs_string.c         |  133 +++++++++++++++++++++-------------------------
 njs/njs_vm.h             |    1 +
 njs/test/njs_unit_test.c |    9 +++
 5 files changed, 112 insertions(+), 102 deletions(-)

diffs (331 lines):

diff -r dcd8a105b5e7 -r 5d9e4adf25c2 njs/njs_regexp.c
--- a/njs/njs_regexp.c	Tue Aug 16 18:09:35 2016 +0300
+++ b/njs/njs_regexp.c	Tue Aug 16 18:58:30 2016 +0300
@@ -65,7 +65,8 @@ njs_regexp_init(njs_vm_t *vm)
 
     vm->regex_context->trace = &vm->trace;
 
-    return NXT_OK;
+    return njs_regexp_create(vm, &vm->empty_regexp, (u_char *) "(?:)",
+                             sizeof("(?:)") - 1, 0);
 }
 
 
@@ -87,20 +88,22 @@ njs_ret_t
 njs_regexp_constructor(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
     njs_index_t unused)
 {
-    size_t                length;
-    njs_regexp_t          *regexp;
-    njs_string_prop_t     string;
-    njs_regexp_flags_t    flags;
-    njs_regexp_pattern_t  *pattern;
+    njs_string_prop_t   string;
+    njs_regexp_flags_t  flags;
 
     flags = 0;
 
     switch (nargs) {
 
+    case 1:
+        string.start = NULL;
+        string.size = 0;
+        break;
+
     default:
-        length = njs_string_prop(&string, &args[2]);
+        (void) njs_string_prop(&string, &args[2]);
 
-        flags = njs_regexp_flags(&string.start, string.start + length, 1);
+        flags = njs_regexp_flags(&string.start, string.start + string.size, 1);
         if (nxt_slow_path(flags < 0)) {
             return NXT_ERROR;
         }
@@ -108,36 +111,42 @@ njs_regexp_constructor(njs_vm_t *vm, njs
         /* Fall through. */
 
     case 2:
-        string.length = njs_string_prop(&string, &args[1]);
-
-        if (string.length != 0) {
-            break;
-        }
-
-        /* Fall through. */
-
-    case 1:
-        string.start = (u_char *) "(?:)";
-        string.length = sizeof("(?:)") - 1;
+        (void) njs_string_prop(&string, &args[1]);
         break;
     }
 
-    pattern = njs_regexp_pattern_create(vm, string.start, string.length, flags);
-
-    if (nxt_fast_path(pattern != NULL)) {
-
-        regexp = njs_regexp_alloc(vm, pattern);
+    return njs_regexp_create(vm, &vm->retval, string.start, string.size, flags);
+}
 
-        if (nxt_fast_path(regexp != NULL)) {
-            vm->retval.data.u.regexp = regexp;
-            vm->retval.type = NJS_REGEXP;
-            vm->retval.data.truth = 1;
 
-            return NXT_OK;
+nxt_int_t
+njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
+    size_t length, njs_regexp_flags_t flags)
+{
+    njs_regexp_t          *regexp;
+    njs_regexp_pattern_t  *pattern;
+
+    if (length != 0) {
+        pattern = njs_regexp_pattern_create(vm, start, length, flags);
+
+        if (nxt_fast_path(pattern != NULL)) {
+            regexp = njs_regexp_alloc(vm, pattern);
+
+            if (nxt_fast_path(regexp != NULL)) {
+                value->data.u.regexp = regexp;
+                value->type = NJS_REGEXP;
+                value->data.truth = 1;
+
+                return NXT_OK;
+            }
         }
+
+        return NXT_ERROR;
     }
 
-    return NXT_ERROR;
+    *value = vm->empty_regexp;
+
+    return NXT_OK;
 }
 
 
diff -r dcd8a105b5e7 -r 5d9e4adf25c2 njs/njs_regexp.h
--- a/njs/njs_regexp.h	Tue Aug 16 18:09:35 2016 +0300
+++ b/njs/njs_regexp.h	Tue Aug 16 18:58:30 2016 +0300
@@ -35,6 +35,8 @@ struct njs_regexp_s {
 njs_ret_t njs_regexp_init(njs_vm_t *vm);
 njs_ret_t njs_regexp_constructor(njs_vm_t *vm, njs_value_t *args,
     nxt_uint_t nargs, njs_index_t unused);
+nxt_int_t njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
+    size_t length, njs_regexp_flags_t flags);
 njs_token_t njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser,
     njs_value_t *value);
 njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm,
diff -r dcd8a105b5e7 -r 5d9e4adf25c2 njs/njs_string.c
--- a/njs/njs_string.c	Tue Aug 16 18:09:35 2016 +0300
+++ b/njs/njs_string.c	Tue Aug 16 18:58:30 2016 +0300
@@ -41,6 +41,8 @@ static njs_ret_t njs_string_from_char_co
     njs_value_t *args, nxt_uint_t nargs, njs_index_t unused);
 static nxt_noinline ssize_t njs_string_index_of(njs_vm_t *vm,
     njs_value_t *src, njs_value_t *search_string, size_t index);
+static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
+    njs_regexp_pattern_t *pattern);
 static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
     u_char *start, size_t size, nxt_uint_t utf8);
 
@@ -1429,26 +1431,27 @@ njs_string_prototype_search(njs_vm_t *vm
 
         switch (args[1].type) {
 
-        case NJS_VOID:
-            goto done;
+        case NJS_REGEXP:
+            pattern = args[1].data.u.regexp->pattern;
+            break;
 
         case NJS_STRING:
             (void) njs_string_prop(&string, &args[1]);
 
-            if (string.size == 0) {
-                goto done;
+            if (string.size != 0) {
+                pattern = njs_regexp_pattern_create(vm, string.start,
+                                                    string.size, 0);
+                if (nxt_slow_path(pattern == NULL)) {
+                    return NXT_ERROR;
+                }
+
+                break;
             }
 
-            pattern = njs_regexp_pattern_create(vm, string.start,
-                                                string.length, 0);
-            if (nxt_slow_path(pattern == NULL)) {
-                return NXT_ERROR;
-            }
-
-            break;
-
-        default:  /* NJS_REGEXP */
-            pattern = args[1].data.u.regexp->pattern;
+            goto done;
+
+        default:  /* NJS_VOID */
+            goto done;
         }
 
         index = -1;
@@ -1486,55 +1489,61 @@ static njs_ret_t
 njs_string_prototype_match(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
     njs_index_t unused)
 {
-    int                   *captures;
-    u_char                *start;
-    int32_t               size, length;
     njs_ret_t             ret;
-    nxt_uint_t            n, utf8;
-    njs_value_t           tmp;
-    njs_array_t           *array;
+    njs_value_t           arguments[2];
     njs_string_prop_t     string;
     njs_regexp_pattern_t  *pattern;
 
-    if (nargs == 1) {
-        goto empty;
-    }
-
-    switch (args[1].type) {
-
-    case NJS_VOID:
-        goto empty;
-
-    case NJS_STRING:
-        (void) njs_string_prop(&string, &args[1]);
-
-        if (string.size == 0) {
-            goto empty;
-        }
-
-        pattern = njs_regexp_pattern_create(vm, string.start, string.length, 0);
-        if (nxt_slow_path(pattern == NULL)) {
-            return NXT_ERROR;
-        }
-
-        break;
-
-    default:  /* NJS_REGEXP */
-        pattern = args[1].data.u.regexp->pattern;
-
-        if (!pattern->global) {
+    arguments[0] = vm->empty_regexp;
+    arguments[1] = args[0];
+
+    if (nargs > 1) {
+
+        if (njs_is_regexp(&args[1])) {
+            pattern = args[1].data.u.regexp->pattern;
+
+            if (pattern->global) {
+                return njs_string_match_multiple(vm, args, pattern);
+            }
+
             /*
              * string.match(regexp) is the same as regexp.exec(string)
              * if the regexp has no global flag.
              */
-            tmp = args[0];
-            args[0] = args[1];
-            args[1] = tmp;
-
-            return njs_regexp_prototype_exec(vm, args, nargs, unused);
+            arguments[0] = args[1];
+
+        } else if (njs_is_string(&args[1])) {
+            /* string1.match(string2) is the same as /string2/.exec(string1). */
+
+            (void) njs_string_prop(&string, &args[1]);
+
+            ret = njs_regexp_create(vm, &arguments[0], string.start,
+                                    string.size, 0);
+            if (nxt_slow_path(ret != NXT_OK)) {
+                return ret;
+            }
         }
+
+        /* A void value. */
     }
 
+    return njs_regexp_prototype_exec(vm, arguments, nargs, unused);
+}
+
+
+static njs_ret_t
+njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
+    njs_regexp_pattern_t *pattern)
+{
+    int                *captures;
+    u_char             *start;
+    int32_t            size, length;
+    njs_ret_t          ret;
+    nxt_uint_t         n, utf8;
+    njs_array_t        *array;
+    njs_string_prop_t  string;
+
+    args[1].data.u.regexp->last_index = 0;
     vm->retval = njs_value_null;
 
     (void) njs_string_prop(&string, &args[0]);
@@ -1626,26 +1635,6 @@ njs_string_prototype_match(njs_vm_t *vm,
         } while (string.size > 0);
     }
 
-    if (njs_is_regexp(&args[1])) {
-        args[1].data.u.regexp->last_index = 0;
-    }
-
-    return NXT_OK;
-
-empty:
-
-    array = njs_array_alloc(vm, 1, 0);
-    if (nxt_slow_path(array == NULL)) {
-        return NXT_ERROR;
-    }
-
-    array->length = 1;
-    array->start[0] = njs_string_empty;
-
-    vm->retval.data.u.array = array;
-    vm->retval.type = NJS_ARRAY;
-    vm->retval.data.truth = 1;
-
     return NXT_OK;
 }
 
diff -r dcd8a105b5e7 -r 5d9e4adf25c2 njs/njs_vm.h
--- a/njs/njs_vm.h	Tue Aug 16 18:09:35 2016 +0300
+++ b/njs/njs_vm.h	Tue Aug 16 18:58:30 2016 +0300
@@ -797,6 +797,7 @@ struct njs_vm_s {
 
     nxt_regex_context_t      *regex_context;
     nxt_regex_match_data_t   *single_match_data;
+    njs_value_t              empty_regexp;
 
     nxt_array_t              *code;  /* of njs_vm_code_t */
 
diff -r dcd8a105b5e7 -r 5d9e4adf25c2 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Tue Aug 16 18:09:35 2016 +0300
+++ b/njs/test/njs_unit_test.c	Tue Aug 16 18:58:30 2016 +0300
@@ -3229,6 +3229,15 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("'abcdefgh'.match(/def/)"),
       nxt_string("def") },
 
+    { nxt_string("'abc abc abc'.match('abc')"),
+      nxt_string("abc") },
+
+    { nxt_string("'abc abc abc'.match(/abc/)"),
+      nxt_string("abc") },
+
+    { nxt_string("'abc abc abc'.match(/abc/g)"),
+      nxt_string("abc,abc,abc") },
+
     { nxt_string("'abc ABC aBc'.match(/abc/ig)"),
       nxt_string("abc,ABC,aBc") },
 



More information about the nginx-devel mailing list