[njs] Added escaping to lone closing square brackets in a regexp.

Alexander Borisov alexander.borisov at nginx.com
Fri May 17 18:57:12 UTC 2019


details:   https://hg.nginx.org/njs/rev/88263426432d
branches:  
changeset: 970:88263426432d
user:      Alexander Borisov <alexander.borisov at nginx.com>
date:      Fri May 17 17:01:10 2019 +0300
description:
Added escaping to lone closing square brackets in a regexp.

PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
lone closing square brackets as invalid. Whereas according
to ES6: 11.8.5 it is a valid regexp expression.

As a workaround, the solution is to escape those lone brackets.

This closes #157 issue on GitHub.

diffstat:

 njs/njs_regexp.c         |  80 ++++++++++++++++++++++++++++++++++++++++++++++++
 njs/test/njs_unit_test.c |  30 ++++++++++++++++++
 2 files changed, 110 insertions(+), 0 deletions(-)

diffs (155 lines):

diff -r e22397fd709a -r 88263426432d njs/njs_regexp.c
--- a/njs/njs_regexp.c	Fri May 17 21:16:31 2019 +0300
+++ b/njs/njs_regexp.c	Fri May 17 17:01:10 2019 +0300
@@ -206,15 +206,71 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
 }
 
 
+nxt_inline njs_ret_t
+njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count)
+{
+    size_t  length, diff;
+    u_char  *p, *dst, *start, *end;
+
+    length = text->length + count;
+
+    dst = nxt_mp_alloc(vm->mem_pool, length);
+    if (nxt_slow_path(dst == NULL)) {
+        njs_memory_error(vm);
+        return NJS_ERROR;
+    }
+
+    start = text->start;
+    end = text->start + text->length;
+
+    for (p = start; p < end; p++) {
+
+        switch (*p) {
+        case '[':
+            while (++p < end && *p != ']') {
+                if (*p == '\\') {
+                    p++;
+                }
+            }
+
+            break;
+
+        case ']':
+            diff = p - start;
+            dst = nxt_cpymem(dst, start, diff);
+            dst = nxt_cpymem(dst, "\\]", 2);
+
+            start = p + 1;
+            break;
+
+        case '\\':
+            p++;
+            break;
+        }
+    }
+
+    diff = p - start;
+    memcpy(dst, start, diff);
+
+    text->start = dst - (length - diff);
+    text->length = length;
+
+    return NJS_OK;
+}
+
+
 njs_token_t
 njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
 {
     u_char                *p;
+    size_t                closing_brackets;
     nxt_str_t             text;
+    njs_ret_t             ret;
     njs_lexer_t           *lexer;
     njs_regexp_flags_t    flags;
     njs_regexp_pattern_t  *pattern;
 
+    closing_brackets = 0;
     lexer = parser->lexer;
 
     for (p = lexer->start; p < lexer->end; p++) {
@@ -242,6 +298,10 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
 
             break;
 
+        case ']':
+            closing_brackets++;
+            break;
+
         case '\\':
             if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
                 goto failed;
@@ -267,8 +327,28 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
 
             lexer->start = p;
 
+            if (closing_brackets != 0) {
+                /*
+                 * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+                 * lone closing square brackets as invalid.  Whereas according
+                 * to ES6: 11.8.5 it is a valid regexp expression.
+                 *
+                 * Escaping it here as a workaround.
+                 */
+
+                ret = njs_regexp_escape_bracket(vm, &text, closing_brackets);
+                if (nxt_slow_path(ret != NXT_OK)) {
+                    return NJS_TOKEN_ILLEGAL;
+                }
+            }
+
             pattern = njs_regexp_pattern_create(vm, text.start, text.length,
                                                 flags);
+
+            if (closing_brackets != 0) {
+                nxt_mp_free(vm->mem_pool, text.start);
+            }
+
             if (nxt_slow_path(pattern == NULL)) {
                 return NJS_TOKEN_ILLEGAL;
             }
diff -r e22397fd709a -r 88263426432d njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Fri May 17 21:16:31 2019 +0300
+++ b/njs/test/njs_unit_test.c	Fri May 17 17:01:10 2019 +0300
@@ -5496,6 +5496,36 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("'12345'.replace(3, () => ({toString: () => 'aaaa'}))"),
       nxt_string("12aaaa45") },
 
+    { nxt_string("/]/"),
+      nxt_string("/\\]/") },
+
+    { nxt_string("/\\]/"),
+      nxt_string("/\\]/") },
+
+    { nxt_string("/ab]cd/"),
+      nxt_string("/ab\\]cd/") },
+
+    { nxt_string("/ab]/"),
+      nxt_string("/ab\\]/") },
+
+    { nxt_string("/]cd/"),
+      nxt_string("/\\]cd/") },
+
+    { nxt_string("']'.match(/]/)"),
+      nxt_string("]") },
+
+    { nxt_string("'ab]cd'.match(/]/)"),
+      nxt_string("]") },
+
+    { nxt_string("'ab]'.match(/]/)"),
+      nxt_string("]") },
+
+    { nxt_string("']cd'.match(/]/)"),
+      nxt_string("]") },
+
+    { nxt_string("'ab]cd'.match(/\\]/)"),
+      nxt_string("]") },
+
     { nxt_string("'abc'.match(/a*/g)"),
       nxt_string("a,,,") },
 


More information about the nginx-devel mailing list