[njs] Fixed regexp literals parsing.

Alexander Borisov alexander.borisov at nginx.com
Wed May 15 09:52:55 UTC 2019


details:   https://hg.nginx.org/njs/rev/56c75545da25
branches:  
changeset: 964:56c75545da25
user:      Alexander Borisov <alexander.borisov at nginx.com>
date:      Wed May 15 12:51:31 2019 +0300
description:
Fixed regexp literals parsing.

Problems were observed:
1. Escaping symbols: /\\\\/
2. Solidus symbol ('/') in square bracket: /[/]/

This closes #149 issue on GitHub.

diffstat:

 njs/njs_regexp.c         |  36 +++++++++++++++++++++++++++++++-----
 njs/test/njs_unit_test.c |  31 +++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 5 deletions(-)

diffs (104 lines):

diff -r 895f4887702d -r 56c75545da25 njs/njs_regexp.c
--- a/njs/njs_regexp.c	Tue May 14 19:13:53 2019 +0300
+++ b/njs/njs_regexp.c	Wed May 15 12:51:31 2019 +0300
@@ -209,7 +209,7 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
 njs_token_t
 njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
 {
-    u_char                *p, c;
+    u_char                *p;
     nxt_str_t             text;
     njs_lexer_t           *lexer;
     njs_regexp_flags_t    flags;
@@ -219,13 +219,37 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
 
     for (p = lexer->start; p < lexer->end; p++) {
 
-        c = *p;
+        switch (*p) {
+        case '\n':
+        case '\r':
+            goto failed;
 
-        if (c == '\n' || c == '\r') {
+        case '[':
+            while (++p < lexer->end && *p != ']') {
+                switch (*p) {
+                case '\n':
+                case '\r':
+                    goto failed;
+
+                case '\\':
+                    if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
+                        goto failed;
+                    }
+
+                    break;
+                }
+            }
+
             break;
-        }
 
-        if (c == '/' && !(p > lexer->start && p[-1] == '\\')) {
+        case '\\':
+            if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
+                goto failed;
+            }
+
+            break;
+
+        case '/':
             text.start = lexer->start;
             text.length = p - text.start;
             p++;
@@ -255,6 +279,8 @@ njs_regexp_literal(njs_vm_t *vm, njs_par
         }
     }
 
+failed:
+
     njs_parser_syntax_error(vm, parser, "Unterminated RegExp \"%*s\"",
                             p - (lexer->start - 1), lexer->start - 1);
 
diff -r 895f4887702d -r 56c75545da25 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Tue May 14 19:13:53 2019 +0300
+++ b/njs/test/njs_unit_test.c	Wed May 15 12:51:31 2019 +0300
@@ -7073,6 +7073,37 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("/a\\q/"),
       nxt_string("/a\\q/") },
 
+    { nxt_string("/\\\\/"),
+      nxt_string("/\\\\/") },
+
+    { nxt_string("/\\\\\\/"),
+      nxt_string("SyntaxError: Unterminated RegExp \"/\\\\\\/\" in 1") },
+
+    { nxt_string("/\\\\\\\\/"),
+      nxt_string("/\\\\\\\\/") },
+
+    { nxt_string("/\\\\\\//"),
+      nxt_string("/\\\\\\//") },
+
+    { nxt_string("/[A-Z/]/"),
+      nxt_string("/[A-Z/]/") },
+
+    { nxt_string("/[A-Z\n]/"),
+      nxt_string("SyntaxError: Unterminated RegExp \"/[A-Z\" in 1") },
+
+    { nxt_string("/[A-Z\\\n]/"),
+      nxt_string("SyntaxError: Unterminated RegExp \"/[A-Z\\\" in 1") },
+
+    { nxt_string("/\\\n/"),
+      nxt_string("SyntaxError: Unterminated RegExp \"/\\\" in 1") },
+
+    { nxt_string("/^[A-Za-z0-9+/]{4}$/.test('////')"),
+      nxt_string("true") },
+
+    { nxt_string("'[]!\"#$%&\\'()*+,.\\/:;<=>?@\\^_`{|}-'.split('')"
+                 ".every(ch=>/[\\]\\[!\"#$%&'()*+,.\\/:;<=>?@\\^_`{|}-]/.test(ch))"),
+      nxt_string("true") },
+
     { nxt_string("/a\\q/.test('a\\q')"),
       nxt_string("true") },
 


More information about the nginx-devel mailing list