[njs] Added processing asterisk quantifier for String.prototype.match().

Alexander Borisov alexander.borisov at nginx.com
Wed Apr 24 16:04:56 UTC 2019


details:   https://hg.nginx.org/njs/rev/24d109826d6d
branches:  
changeset: 923:24d109826d6d
user:      Alexander Borisov <alexander.borisov at nginx.com>
date:      Wed Apr 24 19:04:22 2019 +0300
description:
Added processing asterisk quantifier for String.prototype.match().

The lack of processing asterisk quantification in regexp led to infinity
loop in String.prototype.match() function.

diffstat:

 njs/njs_string.c         |  34 ++++++++++++++++++++++------------
 njs/test/njs_unit_test.c |  27 +++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 12 deletions(-)

diffs (108 lines):

diff -r 1c1732696e98 -r 24d109826d6d njs/njs_string.c
--- a/njs/njs_string.c	Tue Apr 23 18:10:37 2019 +0300
+++ b/njs/njs_string.c	Wed Apr 24 19:04:22 2019 +0300
@@ -2634,11 +2634,11 @@ njs_string_match_multiple(njs_vm_t *vm, 
     njs_regexp_pattern_t *pattern)
 {
     int                *captures;
-    u_char             *start;
     int32_t            size, length;
     njs_ret_t          ret;
     njs_utf8_t         utf8;
     njs_array_t        *array;
+    const u_char       *p, *start, *end;
     njs_regexp_utf8_t  type;
     njs_string_prop_t  string;
 
@@ -2661,10 +2661,12 @@ njs_string_match_multiple(njs_vm_t *vm, 
 
     if (nxt_regex_is_valid(&pattern->regex[type])) {
         array = NULL;
+        p = string.start;
+        end = p + string.size;
 
         do {
-            ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
-                                   string.size, vm->single_match_data);
+            ret = njs_regexp_match(vm, &pattern->regex[type], p, string.size,
+                                   vm->single_match_data);
             if (ret >= 0) {
                 if (array != NULL) {
                     ret = njs_array_expand(vm, array, 0, 1);
@@ -2684,14 +2686,22 @@ njs_string_match_multiple(njs_vm_t *vm, 
                 }
 
                 captures = nxt_regex_captures(vm->single_match_data);
-                start = &string.start[captures[0]];
-
-                string.start += captures[1];
-                string.size -= captures[1];
-
-                size = captures[1] - captures[0];
-
-                length = njs_string_calc_length(utf8, start, size);
+                start = p + captures[0];
+
+                if (captures[1] == 0) {
+                    p = nxt_utf8_next(start, end);
+                    string.size = end - p;
+
+                    size = 0;
+                    length = 0;
+
+                } else {
+                    p += captures[1];
+                    string.size -= captures[1];
+
+                    size = captures[1] - captures[0];
+                    length = njs_string_calc_length(utf8, start, size);
+                }
 
                 ret = njs_string_new(vm, &array->start[array->length],
                                      start, size, length);
@@ -2708,7 +2718,7 @@ njs_string_match_multiple(njs_vm_t *vm, 
                 return NXT_ERROR;
             }
 
-        } while (string.size > 0);
+        } while (p <= end);
     }
 
     return NXT_OK;
diff -r 1c1732696e98 -r 24d109826d6d njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Tue Apr 23 18:10:37 2019 +0300
+++ b/njs/test/njs_unit_test.c	Wed Apr 24 19:04:22 2019 +0300
@@ -5375,6 +5375,33 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"),
       nxt_string("βα") },
 
+    { nxt_string("'abc'.match(/a*/g)"),
+      nxt_string("a,,,") },
+
+    { nxt_string("'abc'.match(/z*/g)"),
+      nxt_string(",,,") },
+
+    { nxt_string("'abc'.match(/.?/g)"),
+      nxt_string("a,b,c,") },
+
+    { nxt_string("''.match(/a*/g)"),
+      nxt_string("") },
+
+    { nxt_string("''.match(/.?/g)"),
+      nxt_string("") },
+
+    { nxt_string("'абв'.match(/я?/g)"),
+      nxt_string(",,,") },
+
+    { nxt_string("'αβγ'.match(/z*/g)"),
+      nxt_string(",,,") },
+
+    { nxt_string("'囲碁織'.match(/z*/g)"),
+      nxt_string(",,,") },
+
+    { nxt_string("'𝟘𝟙𝟚𝟛'.match(/z*/g)"),
+      nxt_string(",,,,") },
+
     { nxt_string("'abcdefgh'.match()"),
       nxt_string("") },
 


More information about the nginx-devel mailing list