[njs] Fixed processing * quantifier for String.prototype.replace().
Alexander Borisov
alexander.borisov at nginx.com
Thu May 16 15:59:39 UTC 2019
details: https://hg.nginx.org/njs/rev/9005991a53d6
branches:
changeset: 966:9005991a53d6
user: Alexander Borisov <alexander.borisov at nginx.com>
date: Wed May 08 17:05:05 2019 +0300
description:
Fixed processing * quantifier for String.prototype.replace().
This closes #73 issue on GitHub.
diffstat:
njs/njs_string.c | 134 +++++++++++++++++++++++++++++++++++++++++-----
njs/test/njs_unit_test.c | 33 +++++++++++
2 files changed, 151 insertions(+), 16 deletions(-)
diffs (243 lines):
diff -r e0fdef4eb478 -r 9005991a53d6 njs/njs_string.c
--- a/njs/njs_string.c Thu May 16 15:20:31 2019 +0300
+++ b/njs/njs_string.c Wed May 08 17:05:05 2019 +0300
@@ -3061,11 +3061,16 @@ njs_string_replace_regexp(njs_vm_t *vm,
njs_string_replace_t *r)
{
int *captures;
+ u_char *p, *start;
njs_ret_t ret;
+ const u_char *end;
njs_regexp_pattern_t *pattern;
- njs_string_replace_part_t *part;
+ njs_string_replace_part_t replace;
pattern = args[1].data.u.regexp->pattern;
+ end = r->part[0].start + r->part[0].size;
+
+ replace = r->part[1];
do {
ret = njs_regexp_match(vm, &pattern->regex[r->type],
@@ -3102,25 +3107,50 @@ njs_string_replace_regexp(njs_vm_t *vm,
r->part -= 2;
}
- r->part[2].start = r->part[0].start + captures[1];
- r->part[2].size = r->part[0].size - captures[1];
- njs_set_invalid(&r->part[2].value);
-
- if (r->function != NULL) {
- return njs_string_replace_regexp_function(vm, args, r,
- captures, ret);
+ if (captures[1] == 0) {
+
+ /* Empty match. */
+
+ start = r->part[0].start;
+
+ if (start < end) {
+ p = (u_char *) nxt_utf8_next(start, end);
+
+ r->part[1].start = start;
+ r->part[1].size = p - start;
+
+ r->part[2].start = p;
+ r->part[2].size = end - p;
+
+ } else {
+ r->part[1].size = 0;
+ r->part[2].size = 0;
+
+ /* To exit the loop. */
+ r->part[2].start = start + 1;
+ }
+
+ r->part[0] = replace;
+
+ } else {
+ r->part[2].start = r->part[0].start + captures[1];
+ r->part[2].size = r->part[0].size - captures[1];
+ njs_set_invalid(&r->part[2].value);
+
+ if (r->function != NULL) {
+ return njs_string_replace_regexp_function(vm, args, r,
+ captures, ret);
+ }
+
+ r->part[0].size = captures[0];
+
+ r->part[1] = replace;
}
- r->part[0].size = captures[0];
-
if (!pattern->global) {
return njs_string_replace_regexp_join(vm, r);
}
- /* A literal replacement is stored in the second part. */
- part = r->parts.start;
- r->part[1] = part[1];
-
r->part += 2;
}
@@ -3131,7 +3161,7 @@ njs_string_replace_regexp(njs_vm_t *vm,
return NXT_ERROR;
}
- } while (r->part[0].size > 0);
+ } while (r->part[0].start <= end);
if (r->part != r->parts.start) {
return njs_string_replace_regexp_join(vm, r);
@@ -3447,11 +3477,15 @@ static njs_ret_t
njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r,
int *captures)
{
- uint32_t i, n, last;
+ uint32_t i, n, last, index;
+ const u_char *end;
njs_string_subst_t *s;
njs_string_replace_part_t *part, *subject;
+ index = 0;
+
last = r->substitutions->items;
+ end = r->part[0].start + r->part[0].size;
part = nxt_array_add_multiple(&r->parts, &njs_array_mem_proto, vm->mem_pool,
last + 1);
@@ -3462,6 +3496,22 @@ njs_string_replace_substitute(njs_vm_t *
r->part = &part[-1];
part[last].start = r->part[0].start + captures[1];
+
+ if (captures[1] == 0) {
+
+ /* Empty match. */
+
+ if (r->part[0].start < end) {
+ captures[1] = nxt_utf8_next(r->part[0].start, end)
+ - r->part[0].start;
+ part[last].start = r->part[0].start + captures[1];
+
+ } else {
+ /* To exit the loop. */
+ part[last].start = r->part[0].start + 1;
+ }
+ }
+
part[last].size = r->part[0].size - captures[1];
njs_set_invalid(&part[last].value);
@@ -3498,6 +3548,44 @@ njs_string_replace_substitute(njs_vm_t *
* "$&" is the same as "$0", the "$0" however is not supported.
*/
default:
+ if (captures[n] == captures[n + 1]) {
+
+ /* Empty match. */
+
+ if (captures[n - 1] == captures[n]) {
+
+ /*
+ * Consecutive empty matches as in
+ * 'ab'.replace(/(z*)(h*)/g, 'x')
+ */
+
+ part->size = 0;
+ break;
+ }
+
+ index = n;
+ continue;
+ }
+
+ if (index != 0) {
+
+ /*
+ * Inserting a single character after a series of
+ * (possibly several) empty matches.
+ */
+
+ if (part->start < end) {
+ part->start = r->part[0].start + captures[index];
+ part->size = nxt_utf8_next(part->start, end) - part->start;
+
+ } else {
+ part->size = 0;
+ }
+
+ index = 0;
+ break;
+ }
+
part->start = r->part[0].start + captures[n];
part->size = captures[n + 1] - captures[n];
break;
@@ -3507,6 +3595,20 @@ njs_string_replace_substitute(njs_vm_t *
part++;
}
+ if (index != 0) {
+ part->start = r->part[0].start + captures[index];
+
+ if (part->start < end) {
+ part->size = nxt_utf8_next(part->start, end) - part->start;
+
+ } else {
+ part->size = 0;
+ }
+
+ njs_set_invalid(&part->value);
+ part++;
+ }
+
r->part = part;
return NXT_OK;
diff -r e0fdef4eb478 -r 9005991a53d6 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Thu May 16 15:20:31 2019 +0300
+++ b/njs/test/njs_unit_test.c Wed May 08 17:05:05 2019 +0300
@@ -5451,6 +5451,39 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"),
nxt_string("βα") },
+ { nxt_string("'abc'.replace(/(h*)(z*)(g*)/g, '$1nn$2zz$3')"),
+ nxt_string("nnzzannzzbnnzzcnnzz") },
+
+ { nxt_string("'abc'.replace(/(h*)(z*)/g, '$1nn$2zz$3yy')"),
+ nxt_string("nnzz$3yyannzz$3yybnnzz$3yycnnzz$3yy") },
+
+ { nxt_string("'ъ'.replace(/(h*)/g, '$1ЮЙ')"),
+ nxt_string("ЮЙъЮЙ") },
+
+ { nxt_string("'ъg'.replace(/(h*)/g, '$1ЮЙ')"),
+ nxt_string("ЮЙъЮЙgЮЙ") },
+
+ { nxt_string("'ъg'.replace(/(ъ*)/g, '$1ЮЙ')"),
+ nxt_string("ъЮЙЮЙgЮЙ") },
+
+ { nxt_string("'ъg'.replace(/(h*)/g, 'fg$1ЮЙ')"),
+ nxt_string("fgЮЙъfgЮЙgfgЮЙ") },
+
+ { nxt_string("'юgёfя'.replace(/(gё)/g, 'n$1i')"),
+ nxt_string("юngёifя") },
+
+ { nxt_string("'aabbccaa'.replace(/a*/g, '')"),
+ nxt_string("bbcc") },
+
+ { nxt_string("'aabbccaab'.replace(/z*/g, '')"),
+ nxt_string("aabbccaab") },
+
+ { nxt_string("'αβγ'.replace(/z*/g, '|')"),
+ nxt_string("|α|β|γ|") },
+
+ { nxt_string("''.replace(/a*/g, '')"),
+ nxt_string("") },
+
{ nxt_string("'abc'.match(/a*/g)"),
nxt_string("a,,,") },
More information about the nginx-devel
mailing list