[njs] RegExp: incapsulating PCRE API.

Dmitry Volyntsev xeioex at nginx.com
Thu Nov 11 14:31:00 UTC 2021


details:   https://hg.nginx.org/njs/rev/67ee2e4907a8
branches:  
changeset: 1743:67ee2e4907a8
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Thu Nov 11 14:26:30 2021 +0000
description:
RegExp: incapsulating PCRE API.

diffstat:

 src/njs_main.h           |    1 -
 src/njs_parser.c         |    2 +-
 src/njs_pcre.c           |  214 +++++++++++++++++++++++++++++++------
 src/njs_pcre.h           |   40 -------
 src/njs_regex.h          |   65 +++++++++--
 src/njs_regexp.c         |  262 +++++++++++++---------------------------------
 src/njs_regexp.h         |   16 +--
 src/njs_string.c         |   31 ++--
 src/njs_vm.h             |    3 +-
 src/test/njs_unit_test.c |    2 +
 10 files changed, 326 insertions(+), 310 deletions(-)

diffs (truncated from 1159 to 1000 lines):

diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_main.h
--- a/src/njs_main.h	Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_main.h	Thu Nov 11 14:26:30 2021 +0000
@@ -38,7 +38,6 @@
 #include <njs_sprintf.h>
 #include <njs_assert.h>
 
-#include <njs_pcre.h>
 #include <njs_regex.h>
 
 #include <njs_md5.h>
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_parser.c
--- a/src/njs_parser.c	Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_parser.c	Thu Nov 11 14:26:30 2021 +0000
@@ -1198,7 +1198,7 @@ njs_parser_regexp_literal(njs_parser_t *
     njs_int_t             ret;
     njs_lexer_t           *lexer;
     njs_value_t           *value, retval;
-    njs_regexp_flags_t    flags;
+    njs_regex_flags_t     flags;
     njs_regexp_pattern_t  *pattern;
 
     static const njs_value_t  string_message = njs_string("message");
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_pcre.c
--- a/src/njs_pcre.c	Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_pcre.c	Thu Nov 11 14:26:30 2021 +0000
@@ -7,21 +7,23 @@
 
 #include <njs_main.h>
 
+#include <pcre.h>
+
 
 static void *njs_pcre_malloc(size_t size);
 static void njs_pcre_free(void *p);
 
 
-static njs_regex_context_t  *regex_context;
+static njs_regex_generic_ctx_t  *regex_context;
 
 
-njs_regex_context_t *
-njs_regex_context_create(njs_pcre_malloc_t private_malloc,
+njs_regex_generic_ctx_t *
+njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
     njs_pcre_free_t private_free, void *memory_data)
 {
-    njs_regex_context_t  *ctx;
+    njs_regex_generic_ctx_t  *ctx;
 
-    ctx = private_malloc(sizeof(njs_regex_context_t), memory_data);
+    ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data);
 
     if (njs_fast_path(ctx != NULL)) {
         ctx->private_malloc = private_malloc;
@@ -33,15 +35,138 @@ njs_regex_context_create(njs_pcre_malloc
 }
 
 
+njs_regex_compile_ctx_t *
+njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
+{
+    return ctx;
+}
+
+
+/*
+ * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid.  Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * 2) escaping zero byte characters as "\u0000".
+ *
+ * Escaping it here as a workaround.
+ */
+
+njs_int_t
+njs_regex_escape(njs_mp_t *mp, njs_str_t *text)
+{
+    size_t      brackets, zeros;
+    u_char      *p, *dst, *start, *end;
+    njs_bool_t  in;
+
+    start = text->start;
+    end = text->start + text->length;
+
+    in = 0;
+    zeros = 0;
+    brackets = 0;
+
+    for (p = start; p < end; p++) {
+
+        switch (*p) {
+        case '[':
+            in = 1;
+            break;
+
+        case ']':
+            if (!in) {
+                brackets++;
+            }
+
+            in = 0;
+            break;
+
+        case '\\':
+            p++;
+
+            if (p == end || *p != '\0') {
+                break;
+            }
+
+            /* Fall through. */
+
+        case '\0':
+            zeros++;
+            break;
+        }
+    }
+
+    if (!brackets && !zeros) {
+        return NJS_OK;
+    }
+
+    text->length = text->length + brackets + zeros * njs_length("\\u0000");
+
+    text->start = njs_mp_alloc(mp, text->length);
+    if (njs_slow_path(text->start == NULL)) {
+        return NJS_ERROR;
+    }
+
+    in = 0;
+    dst = text->start;
+
+    for (p = start; p < end; p++) {
+
+        switch (*p) {
+        case '[':
+            in = 1;
+            break;
+
+        case ']':
+            if (!in) {
+                *dst++ = '\\';
+            }
+
+            in = 0;
+            break;
+
+        case '\\':
+            *dst++ = *p++;
+
+            if (p == end) {
+                goto done;
+            }
+
+            if (*p != '\0') {
+                break;
+            }
+
+            /* Fall through. */
+
+        case '\0':
+            dst = njs_cpymem(dst, "\\u0000", 6);
+            continue;
+        }
+
+        *dst++ = *p;
+    }
+
+done:
+
+    text->length = dst - text->start;
+
+    return NJS_OK;
+}
+
+
 njs_int_t
 njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len,
-    njs_uint_t options, njs_regex_context_t *ctx)
+    njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace)
 {
-    int         ret, err, erroff;
-    char        *pattern, *error;
-    void        *(*saved_malloc)(size_t size);
-    void        (*saved_free)(void *p);
-    const char  *errstr;
+    int                      ret, err, erroff;
+    char                     *pattern, *error;
+    void                     *(*saved_malloc)(size_t size);
+    void                     (*saved_free)(void *p);
+    njs_uint_t               options;
+    const char               *errstr;
+    njs_regex_generic_ctx_t  *ctx;
+
+    ctx = cctx;
 
     ret = NJS_ERROR;
 
@@ -51,31 +176,43 @@ njs_regex_compile(njs_regex_t *regex, u_
     pcre_free = njs_pcre_free;
     regex_context = ctx;
 
-    if (len == 0) {
-        pattern = (char *) source;
+#ifdef PCRE_JAVASCRIPT_COMPAT
+    /* JavaScript compatibility has been introduced in PCRE-7.7. */
+    options = PCRE_JAVASCRIPT_COMPAT;
+#else
+    options = 0;
+#endif
+
+    if ((flags & NJS_REGEX_IGNORE_CASE)) {
+         options |= PCRE_CASELESS;
+    }
 
-    } else {
-        pattern = ctx->private_malloc(len + 1, ctx->memory_data);
-        if (njs_slow_path(pattern == NULL)) {
-            goto done;
-        }
+    if ((flags & NJS_REGEX_MULTILINE)) {
+         options |= PCRE_MULTILINE;
+    }
 
-        memcpy(pattern, source, len);
-        pattern[len] = '\0';
+    if ((flags & NJS_REGEX_STICKY)) {
+         options |= PCRE_ANCHORED;
     }
 
+    if ((flags & NJS_REGEX_UTF8)) {
+         options |= PCRE_UTF8;
+    }
+
+    pattern = (char *) source;
+
     regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL);
 
     if (njs_slow_path(regex->code == NULL)) {
         error = pattern + erroff;
 
         if (*error != '\0') {
-            njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+            njs_alert(trace, NJS_LEVEL_ERROR,
                       "pcre_compile(\"%s\") failed: %s at \"%s\"",
                       pattern, errstr, error);
 
         } else {
-            njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+            njs_alert(trace, NJS_LEVEL_ERROR,
                       "pcre_compile(\"%s\") failed: %s", pattern, errstr);
         }
 
@@ -87,7 +224,7 @@ njs_regex_compile(njs_regex_t *regex, u_
     regex->extra = pcre_study(regex->code, 0, &errstr);
 
     if (njs_slow_path(errstr != NULL)) {
-        njs_alert(ctx->trace, NJS_LEVEL_WARN,
+        njs_alert(trace, NJS_LEVEL_WARN,
                   "pcre_study(\"%s\") failed: %s", pattern, errstr);
     }
 
@@ -95,7 +232,7 @@ njs_regex_compile(njs_regex_t *regex, u_
                         &regex->ncaptures);
 
     if (njs_slow_path(err < 0)) {
-        njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+        njs_alert(trace, NJS_LEVEL_ERROR,
                   "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d",
                   pattern, err);
 
@@ -106,7 +243,7 @@ njs_regex_compile(njs_regex_t *regex, u_
                         &regex->backrefmax);
 
     if (njs_slow_path(err < 0)) {
-        njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+        njs_alert(trace, NJS_LEVEL_ERROR,
                   "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d",
                   pattern, err);
 
@@ -121,7 +258,7 @@ njs_regex_compile(njs_regex_t *regex, u_
                             &regex->nentries);
 
         if (njs_slow_path(err < 0)) {
-            njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+            njs_alert(trace, NJS_LEVEL_ERROR,
                       "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
                       pattern, err);
 
@@ -133,7 +270,7 @@ njs_regex_compile(njs_regex_t *regex, u_
                                 &regex->entry_size);
 
             if (njs_slow_path(err < 0)) {
-                njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+                njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
                           "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);
 
                 goto done;
@@ -143,7 +280,7 @@ njs_regex_compile(njs_regex_t *regex, u_
                                 &regex->entries);
 
             if (njs_slow_path(err < 0)) {
-                njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+                njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
                           "PCRE_INFO_NAMETABLE) failed: %d", pattern, err);
 
                 goto done;
@@ -193,7 +330,7 @@ njs_regex_named_captures(njs_regex_t *re
 
 
 njs_regex_match_data_t *
-njs_regex_match_data(njs_regex_t *regex, njs_regex_context_t *ctx)
+njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx)
 {
     size_t                  size;
     njs_uint_t              ncaptures;
@@ -222,7 +359,7 @@ njs_regex_match_data(njs_regex_t *regex,
 
 void
 njs_regex_match_data_free(njs_regex_match_data_t *match_data,
-    njs_regex_context_t *ctx)
+    njs_regex_generic_ctx_t *ctx)
 {
     ctx->private_free(match_data, ctx->memory_data);
 }
@@ -244,25 +381,28 @@ njs_pcre_free(void *p)
 
 njs_int_t
 njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
-    size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx)
+    size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace)
 {
     int  ret;
 
     ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
                     off, 0, match_data->captures, match_data->ncaptures);
 
-    /* PCRE_ERROR_NOMATCH is -1. */
+    if (ret <= PCRE_ERROR_NOMATCH) {
+        if (ret == PCRE_ERROR_NOMATCH) {
+            return NJS_DECLINED;
+        }
 
-    if (njs_slow_path(ret < PCRE_ERROR_NOMATCH)) {
-        njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
+        njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
+        return NJS_ERROR;
     }
 
     return ret;
 }
 
 
-int *
-njs_regex_captures(njs_regex_match_data_t *match_data)
+size_t
+njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n)
 {
-    return match_data->captures;
+    return match_data->captures[n];
 }
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_pcre.h
--- a/src/njs_pcre.h	Wed Nov 10 14:50:16 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-
-/*
- * Copyright (C) Igor Sysoev
- * Copyright (C) NGINX, Inc.
- */
-
-#ifndef _NJS_PCRE_H_INCLUDED_
-#define _NJS_PCRE_H_INCLUDED_
-
-
-#include <pcre.h>
-
-
-#define NJS_REGEX_NOMATCH  PCRE_ERROR_NOMATCH
-
-
-struct njs_regex_s {
-    pcre        *code;
-    pcre_extra  *extra;
-    int         ncaptures;
-    int         backrefmax;
-    int         nentries;
-    int         entry_size;
-    char        *entries;
-};
-
-
-struct njs_regex_match_data_s {
-    int         ncaptures;
-    /*
-     * Each capture is stored in 3 "int" vector elements.
-     * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements.
-     * The 3rd bookkeeping elements are at the end of the vector.
-     * The first vector is for the "$0" capture and it is always allocated.
-     */
-    int         captures[3];
-};
-
-
-#endif /* _NJS_PCRE_H_INCLUDED_ */
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_regex.h
--- a/src/njs_regex.h	Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_regex.h	Thu Nov 11 14:26:30 2021 +0000
@@ -7,39 +7,78 @@
 #ifndef _NJS_REGEX_H_INCLUDED_
 #define _NJS_REGEX_H_INCLUDED_
 
+#define NJS_REGEX_UNSET      (size_t) (-1)
+
+
+typedef enum {
+    NJS_REGEX_INVALID_FLAG = -1,
+    NJS_REGEX_NO_FLAGS     =  0,
+    NJS_REGEX_GLOBAL       =  1,
+    NJS_REGEX_IGNORE_CASE  =  2,
+    NJS_REGEX_MULTILINE    =  4,
+    NJS_REGEX_STICKY       =  8,
+    NJS_REGEX_UTF8         = 16,
+} njs_regex_flags_t;
+
 
 typedef void *(*njs_pcre_malloc_t)(size_t size, void *memory_data);
 typedef void (*njs_pcre_free_t)(void *p, void *memory_data);
 
 
-typedef struct njs_regex_s             njs_regex_t;
-typedef struct njs_regex_match_data_s  njs_regex_match_data_t;
-
-
 typedef struct {
     njs_pcre_malloc_t  private_malloc;
     njs_pcre_free_t    private_free;
     void               *memory_data;
-    njs_trace_t        *trace;
-} njs_regex_context_t;
+} njs_regex_generic_ctx_t;
+
+
+#define njs_regex_compile_ctx_t  void
+
+
+typedef struct {
+    void        *code;
+    void        *extra;
+    int         ncaptures;
+    int         backrefmax;
+    int         nentries;
+    int         entry_size;
+    char        *entries;
+} njs_regex_t;
 
 
-NJS_EXPORT njs_regex_context_t *
-    njs_regex_context_create(njs_pcre_malloc_t private_malloc,
+typedef struct {
+    int         ncaptures;
+    /*
+     * Each capture is stored in 3 "int" vector elements.
+     * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements.
+     * The 3rd bookkeeping elements are at the end of the vector.
+     * The first vector is for the "$0" capture and it is always allocated.
+     */
+    int         captures[3];
+} njs_regex_match_data_t;
+
+
+NJS_EXPORT njs_regex_generic_ctx_t *
+    njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
     njs_pcre_free_t private_free, void *memory_data);
+NJS_EXPORT njs_regex_compile_ctx_t *njs_regex_compile_ctx_create(
+    njs_regex_generic_ctx_t *ctx);
+NJS_EXPORT njs_int_t njs_regex_escape(njs_mp_t *mp, njs_str_t *text);
 NJS_EXPORT njs_int_t njs_regex_compile(njs_regex_t *regex, u_char *source,
-    size_t len, njs_uint_t options, njs_regex_context_t *ctx);
+    size_t len, njs_regex_flags_t flags, njs_regex_compile_ctx_t *ctx,
+    njs_trace_t *trace);
 NJS_EXPORT njs_bool_t njs_regex_is_valid(njs_regex_t *regex);
 NJS_EXPORT njs_int_t njs_regex_named_captures(njs_regex_t *regex,
     njs_str_t *name, int n);
 NJS_EXPORT njs_regex_match_data_t *njs_regex_match_data(njs_regex_t *regex,
-    njs_regex_context_t *ctx);
+    njs_regex_generic_ctx_t *ctx);
 NJS_EXPORT void njs_regex_match_data_free(njs_regex_match_data_t *match_data,
-    njs_regex_context_t *ctx);
+    njs_regex_generic_ctx_t *ctx);
 NJS_EXPORT njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject,
     size_t off, size_t len, njs_regex_match_data_t *match_data,
-    njs_regex_context_t *ctx);
-NJS_EXPORT int *njs_regex_captures(njs_regex_match_data_t *match_data);
+    njs_trace_t *trace);
+NJS_EXPORT size_t njs_regex_capture(njs_regex_match_data_t *match_data,
+    njs_uint_t n);
 
 
 #endif /* _NJS_REGEX_H_INCLUDED_ */
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_regexp.c
--- a/src/njs_regexp.c	Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_regexp.c	Thu Nov 11 14:26:30 2021 +0000
@@ -20,7 +20,7 @@ static void njs_regexp_free(void *p, voi
 static njs_int_t njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *args,
     njs_uint_t nargs, njs_index_t unused);
 static int njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex,
-    u_char *source, int options);
+    u_char *source, size_t len, njs_regex_flags_t flags);
 static u_char *njs_regexp_compile_trace_handler(njs_trace_t *trace,
     njs_trace_data_t *td, u_char *start);
 static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace,
@@ -37,21 +37,26 @@ const njs_value_t  njs_string_lindex = n
 njs_int_t
 njs_regexp_init(njs_vm_t *vm)
 {
-    vm->regex_context = njs_regex_context_create(njs_regexp_malloc,
-                                          njs_regexp_free, vm->mem_pool);
-    if (njs_slow_path(vm->regex_context == NULL)) {
+    vm->regex_generic_ctx = njs_regex_generic_ctx_create(njs_regexp_malloc,
+                                                         njs_regexp_free,
+                                                         vm->mem_pool);
+    if (njs_slow_path(vm->regex_generic_ctx == NULL)) {
         njs_memory_error(vm);
         return NJS_ERROR;
     }
 
-    vm->single_match_data = njs_regex_match_data(NULL, vm->regex_context);
+    vm->regex_compile_ctx = njs_regex_compile_ctx_create(vm->regex_generic_ctx);
+    if (njs_slow_path(vm->regex_compile_ctx == NULL)) {
+        njs_memory_error(vm);
+        return NJS_ERROR;
+    }
+
+    vm->single_match_data = njs_regex_match_data(NULL, vm->regex_generic_ctx);
     if (njs_slow_path(vm->single_match_data == NULL)) {
         njs_memory_error(vm);
         return NJS_ERROR;
     }
 
-    vm->regex_context->trace = &vm->trace;
-
     return NJS_OK;
 }
 
@@ -70,10 +75,10 @@ njs_regexp_free(void *p, void *memory_da
 }
 
 
-static njs_regexp_flags_t
+static njs_regex_flags_t
 njs_regexp_value_flags(njs_vm_t *vm, const njs_value_t *regexp)
 {
-    njs_regexp_flags_t    flags;
+    njs_regex_flags_t     flags;
     njs_regexp_pattern_t  *pattern;
 
     flags = 0;
@@ -81,19 +86,19 @@ njs_regexp_value_flags(njs_vm_t *vm, con
     pattern = njs_regexp_pattern(regexp);
 
     if (pattern->global) {
-        flags |= NJS_REGEXP_GLOBAL;
+        flags |= NJS_REGEX_GLOBAL;
     }
 
     if (pattern->ignore_case) {
-        flags |= NJS_REGEXP_IGNORE_CASE;
+        flags |= NJS_REGEX_IGNORE_CASE;
     }
 
     if (pattern->multiline) {
-        flags |= NJS_REGEXP_MULTILINE;
+        flags |= NJS_REGEX_MULTILINE;
     }
 
     if (pattern->sticky) {
-        flags |= NJS_REGEXP_STICKY;
+        flags |= NJS_REGEX_STICKY;
     }
 
     return flags;
@@ -108,7 +113,7 @@ njs_regexp_constructor(njs_vm_t *vm, njs
     njs_int_t           ret;
     njs_str_t           string;
     njs_value_t         source, *pattern, *flags;
-    njs_regexp_flags_t  re_flags;
+    njs_regex_flags_t   re_flags;
 
     pattern = njs_arg(args, nargs, 1);
 
@@ -168,7 +173,7 @@ njs_regexp_constructor(njs_vm_t *vm, njs
 
 njs_int_t
 njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
-    size_t length, njs_regexp_flags_t flags)
+    size_t length, njs_regex_flags_t flags)
 {
     njs_regexp_t          *regexp;
     njs_regexp_pattern_t  *pattern;
@@ -200,143 +205,30 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
 }
 
 
-/*
- * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
- * lone closing square brackets as invalid.  Whereas according
- * to ES6: 11.8.5 it is a valid regexp expression.
- *
- * 2) escaping zero byte characters as "\u0000".
- *
- * Escaping it here as a workaround.
- */
-
-njs_inline njs_int_t
-njs_regexp_escape(njs_vm_t *vm, njs_str_t *text)
-{
-    size_t      brackets, zeros;
-    u_char      *p, *dst, *start, *end;
-    njs_bool_t  in;
-
-    start = text->start;
-    end = text->start + text->length;
-
-    in = 0;
-    zeros = 0;
-    brackets = 0;
-
-    for (p = start; p < end; p++) {
-
-        switch (*p) {
-        case '[':
-            in = 1;
-            break;
-
-        case ']':
-            if (!in) {
-                brackets++;
-            }
-
-            in = 0;
-            break;
-
-        case '\\':
-            p++;
-
-            if (p == end || *p != '\0') {
-                break;
-            }
-
-            /* Fall through. */
-
-        case '\0':
-            zeros++;
-            break;
-        }
-    }
-
-    if (!brackets && !zeros) {
-        return NJS_OK;
-    }
-
-    text->length = text->length + brackets + zeros * njs_length("\\u0000");
-
-    text->start = njs_mp_alloc(vm->mem_pool, text->length);
-    if (njs_slow_path(text->start == NULL)) {
-        njs_memory_error(vm);
-        return NJS_ERROR;
-    }
-
-    in = 0;
-    dst = text->start;
-
-    for (p = start; p < end; p++) {
-
-        switch (*p) {
-        case '[':
-            in = 1;
-            break;
-
-        case ']':
-            if (!in) {
-                *dst++ = '\\';
-            }
-
-            in = 0;
-            break;
-
-        case '\\':
-            *dst++ = *p++;
-
-            if (p == end) {
-                goto done;
-            }
-
-            if (*p != '\0') {
-                break;
-            }
-
-            /* Fall through. */
-
-        case '\0':
-            dst = njs_cpymem(dst, "\\u0000", 6);
-            continue;
-        }
-
-        *dst++ = *p;
-    }
-
-done:
-
-    text->length = dst - text->start;
-
-    return NJS_OK;
-}
-
-
-njs_regexp_flags_t
+njs_regex_flags_t
 njs_regexp_flags(u_char **start, u_char *end)
 {
-    u_char              *p;
-    njs_regexp_flags_t  flags, flag;
+    u_char             *p;
+    njs_regex_flags_t  flags, flag;
 
-    flags = NJS_REGEXP_NO_FLAGS;
+    flags = NJS_REGEX_NO_FLAGS;
 
     for (p = *start; p < end; p++) {
         switch (*p) {
         case 'g':
-            flag = NJS_REGEXP_GLOBAL;
+            flag = NJS_REGEX_GLOBAL;
             break;
 
         case 'i':
-            flag = NJS_REGEXP_IGNORE_CASE;
+            flag = NJS_REGEX_IGNORE_CASE;
             break;
 
         case 'm':
-            flag = NJS_REGEXP_MULTILINE;
+            flag = NJS_REGEX_MULTILINE;
             break;
 
         case 'y':
-            flag = NJS_REGEXP_STICKY;
+            flag = NJS_REGEX_STICKY;
             break;
 
         default:
@@ -364,15 +256,15 @@ invalid:
 
     *start = p + 1;
 
-    return NJS_REGEXP_INVALID_FLAG;
+    return NJS_REGEX_INVALID_FLAG;
 }
 
 
 njs_regexp_pattern_t *
 njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length,
-    njs_regexp_flags_t flags)
+    njs_regex_flags_t flags)
 {
-    int                   options, ret;
+    int                   ret;
     u_char                *p, *end;
     size_t                size;
     njs_str_t             text;
@@ -382,15 +274,16 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     njs_regexp_pattern_t  *pattern;
 
     size = 1;  /* A trailing "/". */
-    size += ((flags & NJS_REGEXP_GLOBAL) != 0);
-    size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
-    size += ((flags & NJS_REGEXP_MULTILINE) != 0);
+    size += ((flags & NJS_REGEX_GLOBAL) != 0);
+    size += ((flags & NJS_REGEX_IGNORE_CASE) != 0);
+    size += ((flags & NJS_REGEX_MULTILINE) != 0);
 
     text.start = start;
     text.length = length;
 
-    ret = njs_regexp_escape(vm, &text);
+    ret = njs_regex_escape(vm->mem_pool, &text);
     if (njs_slow_path(ret != NJS_OK)) {
+        njs_memory_error(vm);
         return NULL;
     }
 
@@ -412,39 +305,27 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     end = p;
     *p++ = '\0';
 
-    pattern->global = ((flags & NJS_REGEXP_GLOBAL) != 0);
+    pattern->global = ((flags & NJS_REGEX_GLOBAL) != 0);
     if (pattern->global) {
         *p++ = 'g';
     }
 
-#ifdef PCRE_JAVASCRIPT_COMPAT
-    /* JavaScript compatibility has been introduced in PCRE-7.7. */
-    options = PCRE_JAVASCRIPT_COMPAT;
-#else
-    options = 0;
-#endif
-
-    pattern->ignore_case = ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
+    pattern->ignore_case = ((flags & NJS_REGEX_IGNORE_CASE) != 0);
     if (pattern->ignore_case) {
         *p++ = 'i';
-         options |= PCRE_CASELESS;
     }
 
-    pattern->multiline = ((flags & NJS_REGEXP_MULTILINE) != 0);
+    pattern->multiline = ((flags & NJS_REGEX_MULTILINE) != 0);
     if (pattern->multiline) {
         *p++ = 'm';
-         options |= PCRE_MULTILINE;
     }
 
-    pattern->sticky = ((flags & NJS_REGEXP_STICKY) != 0);
-    if (pattern->sticky) {
-        options |= PCRE_ANCHORED;
-    }
+    pattern->sticky = ((flags & NJS_REGEX_STICKY) != 0);
 
     *p++ = '\0';
 
     ret = njs_regexp_pattern_compile(vm, &pattern->regex[0],
-                                     &pattern->source[1], options);
+                                     &pattern->source[1], text.length, flags);
 
     if (njs_fast_path(ret >= 0)) {
         pattern->ncaptures = ret;
@@ -454,7 +335,8 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     }
 
     ret = njs_regexp_pattern_compile(vm, &pattern->regex[1],
-                                     &pattern->source[1], options | PCRE_UTF8);
+                                  &pattern->source[1], text.length,
+                                  flags | NJS_REGEX_UTF8);
     if (njs_fast_path(ret >= 0)) {
 
         if (njs_slow_path(njs_regex_is_valid(&pattern->regex[0])
@@ -519,7 +401,7 @@ fail:
 
 static int
 njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, u_char *source,
-    int options)
+    size_t len, njs_regex_flags_t flags)
 {
     njs_int_t            ret;
     njs_trace_handler_t  handler;
@@ -527,8 +409,8 @@ njs_regexp_pattern_compile(njs_vm_t *vm,
     handler = vm->trace.handler;
     vm->trace.handler = njs_regexp_compile_trace_handler;
 
-    /* Zero length means a zero-terminated string. */
-    ret = njs_regex_compile(regex, source, 0, options, vm->regex_context);
+    ret = njs_regex_compile(regex, source, len, flags, vm->regex_compile_ctx,
+                            &vm->trace);
 
     vm->trace.handler = handler;
 
@@ -568,8 +450,7 @@ njs_regexp_match(njs_vm_t *vm, njs_regex
     handler = vm->trace.handler;
     vm->trace.handler = njs_regexp_match_trace_handler;
 
-    ret = njs_regex_match(regex, subject, off, len, match_data,
-                          vm->regex_context);
+    ret = njs_regex_match(regex, subject, off, len, match_data, &vm->trace);
 
     vm->trace.handler = handler;
 
@@ -742,19 +623,19 @@ njs_regexp_prototype_flag(njs_vm_t *vm, 
     pattern = njs_regexp_pattern(this);
 
     switch (flag) {
-    case NJS_REGEXP_GLOBAL:
+    case NJS_REGEX_GLOBAL:
         yn = pattern->global;
         break;
 
-    case NJS_REGEXP_IGNORE_CASE:
+    case NJS_REGEX_IGNORE_CASE:
         yn = pattern->ignore_case;
         break;
 
-    case NJS_REGEXP_MULTILINE:
+    case NJS_REGEX_MULTILINE:
         yn = pattern->multiline;
         break;
 
-    case NJS_REGEXP_STICKY:
+    case NJS_REGEX_STICKY:
     default:
         yn = pattern->sticky;
         break;
@@ -996,7 +877,8 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
         goto not_found;
     }
 
-    match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context);
+    match_data = njs_regex_match_data(&pattern->regex[type],
+                                      vm->regex_generic_ctx);
     if (njs_slow_path(match_data == NULL)) {
         njs_memory_error(vm);
         return NJS_ERROR;
@@ -1023,9 +905,8 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
         return NJS_OK;
     }
 
-    if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
-        njs_regex_match_data_free(match_data, vm->regex_context);
-
+    if (njs_slow_path(ret == NJS_ERROR)) {
+        njs_regex_match_data_free(match_data, vm->regex_generic_ctx);
         return NJS_ERROR;
     }
 
@@ -1050,8 +931,8 @@ static njs_array_t *
 njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_utf8_t utf8,
     njs_string_prop_t *string, njs_regex_match_data_t *match_data)
 {
-    int                   *captures;
     u_char                *start;
+    size_t                c;
     int32_t               size, length;
     uint32_t              index;
     njs_int_t             ret;
@@ -1076,14 +957,13 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
         goto fail;
     }
 
-    captures = njs_regex_captures(match_data);
-
     for (i = 0; i < pattern->ncaptures; i++) {
         n = 2 * i;
+        c = njs_regex_capture(match_data, n);
 
-        if (captures[n] != -1) {
-            start = &string->start[captures[n]];
-            size = captures[n + 1] - captures[n];
+        if (c != NJS_REGEX_UNSET) {
+            start = &string->start[c];
+            size = njs_regex_capture(match_data, n + 1) - c;
 
             if (utf8 == NJS_STRING_UTF8) {
                 length = njs_max(njs_utf8_length(start, size), 0);
@@ -1109,21 +989,25 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
         goto fail;
     }
 
+    c = njs_regex_capture(match_data, 0);
+
     if (utf8 == NJS_STRING_UTF8) {
-        index = njs_string_index(string, captures[0]);
+        index = njs_string_index(string, c);
 
     } else {
-        index = captures[0];
+        index = c;
     }
 
     njs_set_number(&prop->value, index);
 
     if (pattern->global || pattern->sticky) {
+        c = njs_regex_capture(match_data, 1);
+
         if (utf8 == NJS_STRING_UTF8) {
-            index = njs_string_index(string, captures[1]);
+            index = njs_string_index(string, c);
 
         } else {
-            index = captures[1];
+            index = c;
         }
 
         njs_set_number(&value, index);
@@ -1226,7 +1110,7 @@ fail:
 
 done:
 
-    njs_regex_match_data_free(match_data, vm->regex_context);
+    njs_regex_match_data_free(match_data, vm->regex_generic_ctx);
 
     return (ret == NJS_OK) ? array : NULL;
 }
@@ -1919,7 +1803,7 @@ static const njs_object_prop_t  njs_rege
         .name = njs_string("global"),
         .value = njs_value(NJS_INVALID, 1, NAN),
         .getter = njs_native_function2(njs_regexp_prototype_flag, 0,
-                                       NJS_REGEXP_GLOBAL),
+                                       NJS_REGEX_GLOBAL),
         .setter = njs_value(NJS_UNDEFINED, 0, NAN),


More information about the nginx-devel mailing list