[njs] Fixed processing * quantifier for String.prototype.replace().

Alexander Borisov alexander.borisov at nginx.com
Thu May 16 15:59:39 UTC 2019


details:   https://hg.nginx.org/njs/rev/9005991a53d6
branches:  
changeset: 966:9005991a53d6
user:      Alexander Borisov <alexander.borisov at nginx.com>
date:      Wed May 08 17:05:05 2019 +0300
description:
Fixed processing * quantifier for String.prototype.replace().

This closes #73 issue on GitHub.

diffstat:

 njs/njs_string.c         |  134 +++++++++++++++++++++++++++++++++++++++++-----
 njs/test/njs_unit_test.c |   33 +++++++++++
 2 files changed, 151 insertions(+), 16 deletions(-)

diffs (243 lines):

diff -r e0fdef4eb478 -r 9005991a53d6 njs/njs_string.c
--- a/njs/njs_string.c	Thu May 16 15:20:31 2019 +0300
+++ b/njs/njs_string.c	Wed May 08 17:05:05 2019 +0300
@@ -3061,11 +3061,16 @@ njs_string_replace_regexp(njs_vm_t *vm, 
     njs_string_replace_t *r)
 {
     int                        *captures;
+    u_char                     *p, *start;
     njs_ret_t                  ret;
+    const u_char               *end;
     njs_regexp_pattern_t       *pattern;
-    njs_string_replace_part_t  *part;
+    njs_string_replace_part_t  replace;
 
     pattern = args[1].data.u.regexp->pattern;
+    end = r->part[0].start + r->part[0].size;
+
+    replace = r->part[1];
 
     do {
         ret = njs_regexp_match(vm, &pattern->regex[r->type],
@@ -3102,25 +3107,50 @@ njs_string_replace_regexp(njs_vm_t *vm, 
                     r->part -= 2;
                 }
 
-                r->part[2].start = r->part[0].start + captures[1];
-                r->part[2].size = r->part[0].size - captures[1];
-                njs_set_invalid(&r->part[2].value);
-
-                if (r->function != NULL) {
-                    return njs_string_replace_regexp_function(vm, args, r,
-                                                              captures, ret);
+                if (captures[1] == 0) {
+
+                    /* Empty match. */
+
+                    start = r->part[0].start;
+
+                    if (start < end) {
+                        p = (u_char *) nxt_utf8_next(start, end);
+
+                        r->part[1].start = start;
+                        r->part[1].size = p - start;
+
+                        r->part[2].start = p;
+                        r->part[2].size = end - p;
+
+                    } else {
+                        r->part[1].size = 0;
+                        r->part[2].size = 0;
+
+                        /* To exit the loop. */
+                        r->part[2].start = start + 1;
+                    }
+
+                    r->part[0] = replace;
+
+                } else {
+                    r->part[2].start = r->part[0].start + captures[1];
+                    r->part[2].size = r->part[0].size - captures[1];
+                    njs_set_invalid(&r->part[2].value);
+
+                    if (r->function != NULL) {
+                        return njs_string_replace_regexp_function(vm, args, r,
+                                                                 captures, ret);
+                    }
+
+                    r->part[0].size = captures[0];
+
+                    r->part[1] = replace;
                 }
 
-                r->part[0].size = captures[0];
-
                 if (!pattern->global) {
                     return njs_string_replace_regexp_join(vm, r);
                 }
 
-                /* A literal replacement is stored in the second part. */
-                part = r->parts.start;
-                r->part[1] = part[1];
-
                 r->part += 2;
             }
 
@@ -3131,7 +3161,7 @@ njs_string_replace_regexp(njs_vm_t *vm, 
             return NXT_ERROR;
         }
 
-    } while (r->part[0].size > 0);
+    } while (r->part[0].start <= end);
 
     if (r->part != r->parts.start) {
         return njs_string_replace_regexp_join(vm, r);
@@ -3447,11 +3477,15 @@ static njs_ret_t
 njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r,
     int *captures)
 {
-    uint32_t                   i, n, last;
+    uint32_t                   i, n, last, index;
+    const u_char               *end;
     njs_string_subst_t         *s;
     njs_string_replace_part_t  *part, *subject;
 
+    index = 0;
+
     last = r->substitutions->items;
+    end = r->part[0].start + r->part[0].size;
 
     part = nxt_array_add_multiple(&r->parts, &njs_array_mem_proto, vm->mem_pool,
                                   last + 1);
@@ -3462,6 +3496,22 @@ njs_string_replace_substitute(njs_vm_t *
     r->part = &part[-1];
 
     part[last].start = r->part[0].start + captures[1];
+
+    if (captures[1] == 0) {
+
+        /* Empty match. */
+
+        if (r->part[0].start < end) {
+            captures[1] = nxt_utf8_next(r->part[0].start, end)
+                          - r->part[0].start;
+            part[last].start = r->part[0].start + captures[1];
+
+        } else {
+            /* To exit the loop. */
+            part[last].start = r->part[0].start + 1;
+        }
+    }
+
     part[last].size = r->part[0].size - captures[1];
     njs_set_invalid(&part[last].value);
 
@@ -3498,6 +3548,44 @@ njs_string_replace_substitute(njs_vm_t *
          * "$&" is the same as "$0", the "$0" however is not supported.
          */
         default:
+            if (captures[n] == captures[n + 1]) {
+
+                /* Empty match. */
+
+                if (captures[n - 1] == captures[n]) {
+
+                    /*
+                     * Consecutive empty matches as in
+                     * 'ab'.replace(/(z*)(h*)/g, 'x')
+                     */
+
+                    part->size = 0;
+                    break;
+                }
+
+                index = n;
+                continue;
+            }
+
+            if (index != 0) {
+
+                /*
+                 * Inserting a single character after a series of
+                 * (possibly several) empty matches.
+                 */
+
+                if (part->start < end) {
+                    part->start = r->part[0].start + captures[index];
+                    part->size = nxt_utf8_next(part->start, end) - part->start;
+
+                } else {
+                    part->size = 0;
+                }
+
+                index = 0;
+                break;
+            }
+
             part->start = r->part[0].start + captures[n];
             part->size = captures[n + 1] - captures[n];
             break;
@@ -3507,6 +3595,20 @@ njs_string_replace_substitute(njs_vm_t *
         part++;
     }
 
+    if (index != 0) {
+        part->start = r->part[0].start + captures[index];
+
+        if (part->start < end) {
+            part->size = nxt_utf8_next(part->start, end) - part->start;
+
+        } else {
+            part->size = 0;
+        }
+
+        njs_set_invalid(&part->value);
+        part++;
+    }
+
     r->part = part;
 
     return NXT_OK;
diff -r e0fdef4eb478 -r 9005991a53d6 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Thu May 16 15:20:31 2019 +0300
+++ b/njs/test/njs_unit_test.c	Wed May 08 17:05:05 2019 +0300
@@ -5451,6 +5451,39 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"),
       nxt_string("βα") },
 
+    { nxt_string("'abc'.replace(/(h*)(z*)(g*)/g, '$1nn$2zz$3')"),
+      nxt_string("nnzzannzzbnnzzcnnzz") },
+
+    { nxt_string("'abc'.replace(/(h*)(z*)/g, '$1nn$2zz$3yy')"),
+      nxt_string("nnzz$3yyannzz$3yybnnzz$3yycnnzz$3yy") },
+
+    { nxt_string("'ъ'.replace(/(h*)/g, '$1ЮЙ')"),
+      nxt_string("ЮЙъЮЙ") },
+
+    { nxt_string("'ъg'.replace(/(h*)/g, '$1ЮЙ')"),
+      nxt_string("ЮЙъЮЙgЮЙ") },
+
+    { nxt_string("'ъg'.replace(/(ъ*)/g, '$1ЮЙ')"),
+      nxt_string("ъЮЙЮЙgЮЙ") },
+
+    { nxt_string("'ъg'.replace(/(h*)/g, 'fg$1ЮЙ')"),
+      nxt_string("fgЮЙъfgЮЙgfgЮЙ") },
+
+    { nxt_string("'юgёfя'.replace(/(gё)/g, 'n$1i')"),
+      nxt_string("юngёifя") },
+
+    { nxt_string("'aabbccaa'.replace(/a*/g, '')"),
+      nxt_string("bbcc") },
+
+    { nxt_string("'aabbccaab'.replace(/z*/g, '')"),
+      nxt_string("aabbccaab") },
+
+    { nxt_string("'αβγ'.replace(/z*/g, '|')"),
+      nxt_string("|α|β|γ|") },
+
+    { nxt_string("''.replace(/a*/g, '')"),
+      nxt_string("") },
+
     { nxt_string("'abc'.match(/a*/g)"),
       nxt_string("a,,,") },
 


More information about the nginx-devel mailing list