[njs] Escaping lone closing square brackets in RegExp() constructor.
Dmitry Volyntsev
xeioex at nginx.com
Tue Jun 4 09:45:46 UTC 2019
details: https://hg.nginx.org/njs/rev/2054b8410a28
branches:
changeset: 997:2054b8410a28
user: Dmitry Volyntsev <xeioex at nginx.com>
date: Thu May 30 20:05:14 2019 +0300
description:
Escaping lone closing square brackets in RegExp() constructor.
This correctly fixes #157. As in 88263426432d this was done only
for regexp literals.
diffstat:
njs/njs_regexp.c | 134 +++++++++++++++++++++++++++-------------------
njs/test/njs_unit_test.c | 15 +++++
2 files changed, 92 insertions(+), 57 deletions(-)
diffs (252 lines):
diff -r 1041e3241457 -r 2054b8410a28 njs/njs_regexp.c
--- a/njs/njs_regexp.c Fri May 31 15:11:39 2019 +0300
+++ b/njs/njs_regexp.c Thu May 30 20:05:14 2019 +0300
@@ -206,56 +206,93 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
}
-nxt_inline njs_ret_t
-njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count)
-{
- size_t length, diff;
- u_char *p, *dst, *start, *end;
+/*
+ * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid. Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * Escaping it here as a workaround.
+ */
- length = text->length + count;
-
- dst = nxt_mp_alloc(vm->mem_pool, length);
- if (nxt_slow_path(dst == NULL)) {
- njs_memory_error(vm);
- return NJS_ERROR;
- }
+nxt_inline njs_ret_t
+njs_regexp_escape(njs_vm_t *vm, nxt_str_t *text)
+{
+ size_t brackets;
+ u_char *p, *dst, *start, *end;
+ nxt_bool_t in;
start = text->start;
end = text->start + text->length;
+ in = 0;
+ brackets = 0;
+
for (p = start; p < end; p++) {
switch (*p) {
case '[':
- while (++p < end && *p != ']') {
- if (*p == '\\') {
- p++;
- }
- }
-
+ in = 1;
break;
case ']':
- diff = p - start;
- dst = nxt_cpymem(dst, start, diff);
- dst = nxt_cpymem(dst, "\\]", 2);
+ if (!in) {
+ brackets++;
+ }
- start = p + 1;
+ in = 0;
break;
case '\\':
p++;
- break;
}
}
- diff = p - start;
- memcpy(dst, start, diff);
+ if (!brackets) {
+ return NXT_OK;
+ }
+
+ text->length = text->length + brackets;
+
+ text->start = nxt_mp_alloc(vm->mem_pool, text->length);
+ if (nxt_slow_path(text->start == NULL)) {
+ njs_memory_error(vm);
+ return NXT_ERROR;
+ }
+
+ in = 0;
+ dst = text->start;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ in = 1;
+ break;
- text->start = dst - (length - diff);
- text->length = length;
+ case ']':
+ if (!in) {
+ *dst++ = '\\';
+ }
+
+ in = 0;
+ break;
+
+ case '\\':
+ *dst++ = *p++;
- return NJS_OK;
+ if (p == end) {
+ goto done;
+ }
+ }
+
+ *dst++ = *p;
+ }
+
+done:
+
+ text->length = dst - text->start;
+
+ return NXT_OK;
}
@@ -263,14 +300,11 @@ njs_token_t
njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
{
u_char *p;
- size_t closing_brackets;
nxt_str_t text;
- njs_ret_t ret;
njs_lexer_t *lexer;
njs_regexp_flags_t flags;
njs_regexp_pattern_t *pattern;
- closing_brackets = 0;
lexer = parser->lexer;
for (p = lexer->start; p < lexer->end; p++) {
@@ -298,10 +332,6 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
break;
- case ']':
- closing_brackets++;
- break;
-
case '\\':
if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
goto failed;
@@ -327,28 +357,9 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
lexer->start = p;
- if (closing_brackets != 0) {
- /*
- * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
- * lone closing square brackets as invalid. Whereas according
- * to ES6: 11.8.5 it is a valid regexp expression.
- *
- * Escaping it here as a workaround.
- */
-
- ret = njs_regexp_escape_bracket(vm, &text, closing_brackets);
- if (nxt_slow_path(ret != NXT_OK)) {
- return NJS_TOKEN_ILLEGAL;
- }
- }
-
pattern = njs_regexp_pattern_create(vm, text.start, text.length,
flags);
- if (closing_brackets != 0) {
- nxt_mp_free(vm->mem_pool, text.start);
- }
-
if (nxt_slow_path(pattern == NULL)) {
return NJS_TOKEN_ILLEGAL;
}
@@ -440,6 +451,7 @@ njs_regexp_pattern_create(njs_vm_t *vm,
int options, ret;
u_char *p, *end;
size_t size;
+ nxt_str_t text;
nxt_uint_t n;
nxt_regex_t *regex;
njs_regexp_group_t *group;
@@ -450,8 +462,16 @@ njs_regexp_pattern_create(njs_vm_t *vm,
size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
size += ((flags & NJS_REGEXP_MULTILINE) != 0);
+ text.start = start;
+ text.length = length;
+
+ ret = njs_regexp_escape(vm, &text);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NULL;
+ }
+
pattern = nxt_mp_zalloc(vm->mem_pool, sizeof(njs_regexp_pattern_t) + 1
- + length + size + 1);
+ + text.length + size + 1);
if (nxt_slow_path(pattern == NULL)) {
njs_memory_error(vm);
return NULL;
@@ -463,8 +483,8 @@ njs_regexp_pattern_create(njs_vm_t *vm,
pattern->source = p;
*p++ = '/';
- p = memcpy(p, start, length);
- p += length;
+ p = memcpy(p, text.start, text.length);
+ p += text.length;
end = p;
*p++ = '\0';
diff -r 1041e3241457 -r 2054b8410a28 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Fri May 31 15:11:39 2019 +0300
+++ b/njs/test/njs_unit_test.c Thu May 30 20:05:14 2019 +0300
@@ -5540,9 +5540,21 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("/]/"),
nxt_string("/\\]/") },
+ { nxt_string("RegExp(']')"),
+ nxt_string("/\\]/") },
+
+ { nxt_string("RegExp('[\\\\\\\\]]')"),
+ nxt_string("/[\\\\]\\]/") },
+
+ { nxt_string("/[\\\\]]/"),
+ nxt_string("/[\\\\]\\]/") },
+
{ nxt_string("/\\]/"),
nxt_string("/\\]/") },
+ { nxt_string("RegExp('\\]')"),
+ nxt_string("/\\]/") },
+
{ nxt_string("/ab]cd/"),
nxt_string("/ab\\]cd/") },
@@ -7441,6 +7453,9 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("new RegExp('[')"),
nxt_string("SyntaxError: pcre_compile(\"[\") failed: missing terminating ] for character class") },
+ { nxt_string("new RegExp('\\\\')"),
+ nxt_string("SyntaxError: pcre_compile(\"\\\") failed: \\ at end of pattern") },
+
{ nxt_string("[0].map(RegExp().toString)"),
nxt_string("TypeError: \"this\" argument is not a regexp") },
More information about the nginx-devel
mailing list