[njs] Fixed handling of unicode only regexp expressions.

Dmitry Volyntsev xeioex at nginx.com
Wed Apr 17 15:50:40 UTC 2019


details:   https://hg.nginx.org/njs/rev/39790a9d9b58
branches:  
changeset: 899:39790a9d9b58
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Wed Apr 17 18:43:13 2019 +0300
description:
Fixed handling of unicode only regexp expressions.

This fixes #125 issue on Github.

diffstat:

 njs/njs_regexp.c         |   31 +++++++--
 njs/test/njs_unit_test.c |  144 +++++++++++++++++++++++++++++++++++++---------
 nxt/nxt_pcre.c           |    2 +
 3 files changed, 139 insertions(+), 38 deletions(-)

diffs (259 lines):

diff -r a88bf03264b4 -r 39790a9d9b58 njs/njs_regexp.c
--- a/njs/njs_regexp.c	Tue Apr 16 18:34:57 2019 +0300
+++ b/njs/njs_regexp.c	Wed Apr 17 18:43:13 2019 +0300
@@ -315,30 +315,43 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
 
     ret = njs_regexp_pattern_compile(vm, &pattern->regex[0],
                                      &pattern->source[1], options);
-    if (nxt_slow_path(ret < 0)) {
-        return NULL;
+
+    if (nxt_fast_path(ret >= 0)) {
+        pattern->ncaptures = ret;
+
+    } else if (ret < 0 && ret != NXT_DECLINED) {
+        goto fail;
     }
 
-    pattern->ncaptures = ret;
-
     ret = njs_regexp_pattern_compile(vm, &pattern->regex[1],
                                      &pattern->source[1], options | PCRE_UTF8);
     if (nxt_fast_path(ret >= 0)) {
 
-        if (nxt_slow_path((u_int) ret != pattern->ncaptures)) {
+        if (nxt_slow_path(nxt_regex_is_valid(&pattern->regex[0])
+                          && (u_int) ret != pattern->ncaptures))
+        {
             njs_internal_error(vm, "regexp pattern compile failed");
-            nxt_mp_free(vm->mem_pool, pattern);
-            return NULL;
+            goto fail;
         }
 
     } else if (ret != NXT_DECLINED) {
-        nxt_mp_free(vm->mem_pool, pattern);
-        return NULL;
+        goto fail;
+    }
+
+    if (!nxt_regex_is_valid(&pattern->regex[0])
+        && !nxt_regex_is_valid(&pattern->regex[1]))
+    {
+        goto fail;
     }
 
     *end = '/';
 
     return pattern;
+
+fail:
+
+    nxt_mp_free(vm->mem_pool, pattern);
+    return NULL;
 }
 
 
diff -r a88bf03264b4 -r 39790a9d9b58 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Tue Apr 16 18:34:57 2019 +0300
+++ b/njs/test/njs_unit_test.c	Wed Apr 17 18:43:13 2019 +0300
@@ -7,6 +7,7 @@
 #include <njs_core.h>
 #include <nxt_lvlhsh.h>
 #include <nxt_djb_hash.h>
+#include <nxt_pcre.h>
 #include <string.h>
 #include <stdlib.h>
 #include <sys/resource.h>
@@ -4451,6 +4452,12 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("'α'.toUTF8()[0]"),
       nxt_string("\xCE") },
 
+    { nxt_string("var r = /^\\x80$/; r.source + r.source.length"),
+      nxt_string("^\\x80$6") },
+
+    { nxt_string("var r = /^\\\\x80$/; r.source + r.source.length"),
+      nxt_string("^\\\\x80$7") },
+
     { nxt_string("/^\\x80$/.test('\\x80'.toBytes())"),
       nxt_string("true") },
 
@@ -11957,6 +11964,25 @@ static njs_unit_test_t  njs_tz_test[] =
 };
 
 
+static njs_unit_test_t  njs_regexp_test[] =
+{
+    { nxt_string("/[\\\\u02E0-\\\\u02E4]/"),
+      nxt_string("/[\\\\u02E0-\\\\u02E4]/") },
+
+    { nxt_string("/[\\u02E0-\\u02E4]/"),
+      nxt_string("/[\\u02E0-\\u02E4]/") },
+
+    { nxt_string("RegExp('[\\\\u02E0-\\\\u02E4]')"),
+      nxt_string("/[\\u02E0-\\u02E4]/") },
+
+    { nxt_string("/[\\u0430-\\u044f]+/.test('тест')"),
+      nxt_string("true") },
+
+    { nxt_string("RegExp('[\\\\u0430-\\\\u044f]+').test('тест')"),
+      nxt_string("true") },
+};
+
+
 typedef struct {
     nxt_lvlhsh_t          hash;
     const njs_extern_t    *proto;
@@ -12715,6 +12741,85 @@ done:
 
 
 static nxt_int_t
+njs_timezone_optional_test(nxt_bool_t disassemble, nxt_bool_t verbose)
+{
+    size_t      size;
+    u_char      buf[16];
+    time_t      clock;
+    struct tm   tm;
+    nxt_int_t   ret;
+
+    /*
+     * Chatham Islands NZ-CHAT time zone.
+     * Standard time: UTC+12:45, Daylight Saving time: UTC+13:45.
+     */
+    (void) putenv((char *) "TZ=Pacific/Chatham");
+    tzset();
+
+    clock = 0;
+    localtime_r(&clock, &tm);
+
+    size = strftime((char *) buf, sizeof(buf), "%z", &tm);
+
+    if (memcmp(buf, "+1245", size) == 0) {
+        ret = njs_unit_test(njs_tz_test, nxt_nitems(njs_tz_test), disassemble,
+                            verbose);
+        if (ret != NXT_OK) {
+            return ret;
+        }
+
+        nxt_printf("njs timezone tests passed\n");
+
+    } else {
+        nxt_printf("njs timezone tests skipped, timezone is unavailable\n");
+    }
+
+    return NXT_OK;
+}
+
+static nxt_int_t
+njs_regexp_optional_test(nxt_bool_t disassemble, nxt_bool_t verbose)
+{
+    int         erroff;
+    pcre        *re1, *re2;
+    njs_ret_t   ret;
+    const char  *errstr;
+
+    /*
+     * pcre-8.21 crashes when it compiles unicode escape codes inside
+     * square brackets when PCRE_UTF8 option is provided.
+     * Catching it in runtime by compiling it without PCRE_UTF8. Normally it
+     * should return NULL and "character value in \u.... sequence is too large"
+     * error string.
+     */
+    re1 = pcre_compile("/[\\u0410]/", PCRE_JAVASCRIPT_COMPAT, &errstr, &erroff,
+                      NULL);
+
+    /*
+     * pcre-7.8 fails to compile unicode escape codes inside square brackets
+     * even when PCRE_UTF8 option is provided.
+     */
+    re2 = pcre_compile("/[\\u0410]/", PCRE_JAVASCRIPT_COMPAT | PCRE_UTF8,
+                       &errstr, &erroff, NULL);
+
+    if (re1 == NULL && re2 != NULL) {
+        ret = njs_unit_test(njs_regexp_test, nxt_nitems(njs_regexp_test),
+                            disassemble, verbose);
+        if (ret != NXT_OK) {
+            return ret;
+        }
+
+        nxt_printf("njs unicode regexp tests passed\n");
+
+    } else {
+        nxt_printf("njs unicode regexp tests skipped, libpcre fails\n");
+    }
+
+    return NXT_OK;
+}
+
+
+static nxt_int_t
 njs_vm_json_test(nxt_bool_t disassemble, nxt_bool_t verbose)
 {
     njs_vm_t           *vm;
@@ -13025,10 +13130,6 @@ done:
 int nxt_cdecl
 main(int argc, char **argv)
 {
-    size_t      size;
-    u_char      buf[16];
-    time_t      clock;
-    struct tm   tm;
     nxt_int_t   ret;
     nxt_bool_t  disassemble, verbose;
 
@@ -13059,33 +13160,18 @@ main(int argc, char **argv)
         return ret;
     }
 
+    ret = njs_timezone_optional_test(disassemble, verbose);
+    if (ret != NXT_OK) {
+        return ret;
+    }
+
+    ret = njs_regexp_optional_test(disassemble, verbose);
+    if (ret != NXT_OK) {
+        return ret;
+    }
+
     nxt_printf("njs unit tests passed\n");
 
-    /*
-     * Chatham Islands NZ-CHAT time zone.
-     * Standard time: UTC+12:45, Daylight Saving time: UTC+13:45.
-     */
-    (void) putenv((char *) "TZ=Pacific/Chatham");
-    tzset();
-
-    clock = 0;
-    localtime_r(&clock, &tm);
-
-    size = strftime((char *) buf, sizeof(buf), "%z", &tm);
-
-    if (memcmp(buf, "+1245", size) == 0) {
-        ret = njs_unit_test(njs_tz_test, nxt_nitems(njs_tz_test), disassemble,
-                            verbose);
-        if (ret != NXT_OK) {
-            return ret;
-        }
-
-        nxt_printf("njs timezone tests passed\n");
-
-    } else {
-        nxt_printf("njs timezone tests skipped, timezone is unavailable\n");
-    }
-
     ret = njs_vm_json_test(disassemble, verbose);
     if (ret != NXT_OK) {
         return ret;
diff -r a88bf03264b4 -r 39790a9d9b58 nxt/nxt_pcre.c
--- a/nxt/nxt_pcre.c	Tue Apr 16 18:34:57 2019 +0300
+++ b/nxt/nxt_pcre.c	Wed Apr 17 18:43:13 2019 +0300
@@ -92,6 +92,8 @@ nxt_regex_compile(nxt_regex_t *regex, u_
                       "pcre_compile(\"%s\") failed: %s", pattern, errstr);
         }
 
+        ret = NXT_DECLINED;
+
         goto done;
     }
 


More information about the nginx-devel mailing list