[njs] Escaping lone closing square brackets in RegExp() constructor.

Dmitry Volyntsev xeioex at nginx.com
Tue Jun 4 09:45:46 UTC 2019


details:   https://hg.nginx.org/njs/rev/2054b8410a28
branches:  
changeset: 997:2054b8410a28
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Thu May 30 20:05:14 2019 +0300
description:
Escaping lone closing square brackets in RegExp() constructor.

This correctly fixes #157. As in 88263426432d this was done only
for regexp literals.

diffstat:

 njs/njs_regexp.c         |  134 +++++++++++++++++++++++++++-------------------
 njs/test/njs_unit_test.c |   15 +++++
 2 files changed, 92 insertions(+), 57 deletions(-)

diffs (252 lines):

diff -r 1041e3241457 -r 2054b8410a28 njs/njs_regexp.c
--- a/njs/njs_regexp.c	Fri May 31 15:11:39 2019 +0300
+++ b/njs/njs_regexp.c	Thu May 30 20:05:14 2019 +0300
@@ -206,56 +206,93 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
 }
 
 
-nxt_inline njs_ret_t
-njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count)
-{
-    size_t  length, diff;
-    u_char  *p, *dst, *start, *end;
+/*
+ * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid.  Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * Escaping it here as a workaround.
+ */
 
-    length = text->length + count;
-
-    dst = nxt_mp_alloc(vm->mem_pool, length);
-    if (nxt_slow_path(dst == NULL)) {
-        njs_memory_error(vm);
-        return NJS_ERROR;
-    }
+nxt_inline njs_ret_t
+njs_regexp_escape(njs_vm_t *vm, nxt_str_t *text)
+{
+    size_t      brackets;
+    u_char      *p, *dst, *start, *end;
+    nxt_bool_t  in;
 
     start = text->start;
     end = text->start + text->length;
 
+    in = 0;
+    brackets = 0;
+
     for (p = start; p < end; p++) {
 
         switch (*p) {
         case '[':
-            while (++p < end && *p != ']') {
-                if (*p == '\\') {
-                    p++;
-                }
-            }
-
+            in = 1;
             break;
 
         case ']':
-            diff = p - start;
-            dst = nxt_cpymem(dst, start, diff);
-            dst = nxt_cpymem(dst, "\\]", 2);
+            if (!in) {
+                brackets++;
+            }
 
-            start = p + 1;
+            in = 0;
             break;
 
         case '\\':
             p++;
-            break;
         }
     }
 
-    diff = p - start;
-    memcpy(dst, start, diff);
+    if (!brackets) {
+        return NXT_OK;
+    }
+
+    text->length = text->length + brackets;
+
+    text->start = nxt_mp_alloc(vm->mem_pool, text->length);
+    if (nxt_slow_path(text->start == NULL)) {
+        njs_memory_error(vm);
+        return NXT_ERROR;
+    }
+
+    in = 0;
+    dst = text->start;
+
+    for (p = start; p < end; p++) {
+
+        switch (*p) {
+        case '[':
+            in = 1;
+            break;
 
-    text->start = dst - (length - diff);
-    text->length = length;
+        case ']':
+            if (!in) {
+                *dst++ = '\\';
+            }
+
+            in = 0;
+            break;
+
+        case '\\':
+            *dst++ = *p++;
 
-    return NJS_OK;
+            if (p == end) {
+                goto done;
+            }
+        }
+
+        *dst++ = *p;
+    }
+
+done:
+
+    text->length = dst - text->start;
+
+    return NXT_OK;
 }
 
 
@@ -263,14 +300,11 @@ njs_token_t
 njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
 {
     u_char                *p;
-    size_t                closing_brackets;
     nxt_str_t             text;
-    njs_ret_t             ret;
     njs_lexer_t           *lexer;
     njs_regexp_flags_t    flags;
     njs_regexp_pattern_t  *pattern;
 
-    closing_brackets = 0;
     lexer = parser->lexer;
 
     for (p = lexer->start; p < lexer->end; p++) {
@@ -298,10 +332,6 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
 
             break;
 
-        case ']':
-            closing_brackets++;
-            break;
-
         case '\\':
             if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
                 goto failed;
@@ -327,28 +357,9 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
 
             lexer->start = p;
 
-            if (closing_brackets != 0) {
-                /*
-                 * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
-                 * lone closing square brackets as invalid.  Whereas according
-                 * to ES6: 11.8.5 it is a valid regexp expression.
-                 *
-                 * Escaping it here as a workaround.
-                 */
-
-                ret = njs_regexp_escape_bracket(vm, &text, closing_brackets);
-                if (nxt_slow_path(ret != NXT_OK)) {
-                    return NJS_TOKEN_ILLEGAL;
-                }
-            }
-
             pattern = njs_regexp_pattern_create(vm, text.start, text.length,
                                                 flags);
 
-            if (closing_brackets != 0) {
-                nxt_mp_free(vm->mem_pool, text.start);
-            }
-
             if (nxt_slow_path(pattern == NULL)) {
                 return NJS_TOKEN_ILLEGAL;
             }
@@ -440,6 +451,7 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     int                   options, ret;
     u_char                *p, *end;
     size_t                size;
+    nxt_str_t             text;
     nxt_uint_t            n;
     nxt_regex_t           *regex;
     njs_regexp_group_t    *group;
@@ -450,8 +462,16 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
     size += ((flags & NJS_REGEXP_MULTILINE) != 0);
 
+    text.start = start;
+    text.length = length;
+
+    ret = njs_regexp_escape(vm, &text);
+    if (nxt_slow_path(ret != NXT_OK)) {
+        return NULL;
+    }
+
     pattern = nxt_mp_zalloc(vm->mem_pool, sizeof(njs_regexp_pattern_t) + 1
-                                          + length + size + 1);
+                                          + text.length + size + 1);
     if (nxt_slow_path(pattern == NULL)) {
         njs_memory_error(vm);
         return NULL;
@@ -463,8 +483,8 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     pattern->source = p;
 
     *p++ = '/';
-    p = memcpy(p, start, length);
-    p += length;
+    p = memcpy(p, text.start, text.length);
+    p += text.length;
     end = p;
     *p++ = '\0';
 
diff -r 1041e3241457 -r 2054b8410a28 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Fri May 31 15:11:39 2019 +0300
+++ b/njs/test/njs_unit_test.c	Thu May 30 20:05:14 2019 +0300
@@ -5540,9 +5540,21 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("/]/"),
       nxt_string("/\\]/") },
 
+    { nxt_string("RegExp(']')"),
+      nxt_string("/\\]/") },
+
+    { nxt_string("RegExp('[\\\\\\\\]]')"),
+      nxt_string("/[\\\\]\\]/") },
+
+    { nxt_string("/[\\\\]]/"),
+      nxt_string("/[\\\\]\\]/") },
+
     { nxt_string("/\\]/"),
       nxt_string("/\\]/") },
 
+    { nxt_string("RegExp('\\]')"),
+      nxt_string("/\\]/") },
+
     { nxt_string("/ab]cd/"),
       nxt_string("/ab\\]cd/") },
 
@@ -7441,6 +7453,9 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("new RegExp('[')"),
       nxt_string("SyntaxError: pcre_compile(\"[\") failed: missing terminating ] for character class") },
 
+    { nxt_string("new RegExp('\\\\')"),
+      nxt_string("SyntaxError: pcre_compile(\"\\\") failed: \\ at end of pattern") },
+
     { nxt_string("[0].map(RegExp().toString)"),
       nxt_string("TypeError: \"this\" argument is not a regexp") },
 


More information about the nginx-devel mailing list