[njs] Added support for RegExp "groups" object (ES9).

Valentin Bartenev vbart at nginx.com
Mon May 6 16:59:18 UTC 2019


details:   https://hg.nginx.org/njs/rev/e9de8a5d423c
branches:  
changeset: 950:e9de8a5d423c
user:      Valentin Bartenev <vbart at nginx.com>
date:      Mon May 06 19:42:06 2019 +0300
description:
Added support for RegExp "groups" object (ES9).

diffstat:

 njs/njs_object_hash.h    |   10 +++
 njs/njs_regexp.c         |  121 ++++++++++++++++++++++++++++++++++++++++++----
 njs/njs_regexp_pattern.h |   18 +++---
 njs/test/njs_unit_test.c |   12 ++++
 nxt/nxt_pcre.c           |   58 ++++++++++++++++++++++
 nxt/nxt_pcre.h           |    3 +
 nxt/nxt_regex.h          |    2 +
 7 files changed, 203 insertions(+), 21 deletions(-)

diffs (368 lines):

diff -r d94c836632c7 -r e9de8a5d423c njs/njs_object_hash.h
--- a/njs/njs_object_hash.h	Mon May 06 11:33:23 2019 +0300
+++ b/njs/njs_object_hash.h	Mon May 06 19:42:06 2019 +0300
@@ -100,6 +100,16 @@
         'i'), 'n'), 'p'), 'u'), 't')
 
 
+#define NJS_GROUPS_HASH                                                       \
+    nxt_djb_hash_add(                                                         \
+    nxt_djb_hash_add(                                                         \
+    nxt_djb_hash_add(                                                         \
+    nxt_djb_hash_add(                                                         \
+    nxt_djb_hash_add(                                                         \
+    nxt_djb_hash_add(NXT_DJB_HASH_INIT,                                       \
+        'g'), 'r'), 'o'), 'u'), 'p'), 's')
+
+
 #define NJS_JOIN_HASH                                                         \
     nxt_djb_hash_add(                                                         \
     nxt_djb_hash_add(                                                         \
diff -r d94c836632c7 -r e9de8a5d423c njs/njs_regexp.c
--- a/njs/njs_regexp.c	Mon May 06 11:33:23 2019 +0300
+++ b/njs/njs_regexp.c	Mon May 06 19:42:06 2019 +0300
@@ -11,6 +11,13 @@
 #include <string.h>
 
 
+struct njs_regexp_group_s {
+    nxt_str_t  name;
+    uint32_t   hash;
+    uint32_t   capture;
+};
+
+
 static void *njs_regexp_malloc(size_t size, void *memory_data);
 static void njs_regexp_free(void *p, void *memory_data);
 static njs_regexp_flags_t njs_regexp_flags(u_char **start, u_char *end,
@@ -327,6 +334,9 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
     int                   options, ret;
     u_char                *p, *end;
     size_t                size;
+    nxt_uint_t            n;
+    nxt_regex_t           *regex;
+    njs_regexp_group_t    *group;
     njs_regexp_pattern_t  *pattern;
 
     size = 1;  /* A trailing "/". */
@@ -405,14 +415,42 @@ njs_regexp_pattern_create(njs_vm_t *vm, 
         goto fail;
     }
 
-    if (!nxt_regex_is_valid(&pattern->regex[0])
-        && !nxt_regex_is_valid(&pattern->regex[1]))
-    {
+    if (nxt_regex_is_valid(&pattern->regex[0])) {
+        regex = &pattern->regex[0];
+
+    } else if (nxt_regex_is_valid(&pattern->regex[1])) {
+        regex = &pattern->regex[1];
+
+    } else {
         goto fail;
     }
 
     *end = '/';
 
+    pattern->ngroups = nxt_regex_named_captures(regex, NULL, 0);
+
+    if (pattern->ngroups != 0) {
+        size = sizeof(njs_regexp_group_t) * pattern->ngroups;
+
+        pattern->groups = nxt_mp_alloc(vm->mem_pool, size);
+        if (nxt_slow_path(pattern->groups == NULL)) {
+            njs_memory_error(vm);
+            return NULL;
+        }
+
+        n = 0;
+
+        do {
+            group = &pattern->groups[n];
+
+            group->capture = nxt_regex_named_captures(regex, &group->name, n);
+            group->hash = nxt_djb_hash(group->name.start, group->name.length);
+
+            n++;
+
+        } while (n != pattern->ngroups);
+    }
+
     return pattern;
 
 fail:
@@ -777,11 +815,15 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
     njs_ret_t           ret;
     nxt_uint_t          i, n;
     njs_array_t         *array;
+    njs_value_t         name;
+    njs_object_t        *groups;
     njs_object_prop_t   *prop;
+    njs_regexp_group_t  *group;
     nxt_lvlhsh_query_t  lhq;
 
     static const njs_value_t  string_index = njs_string("index");
     static const njs_value_t  string_input = njs_string("input");
+    static const njs_value_t  string_groups = njs_string("groups");
 
     array = njs_array_alloc(vm, regexp->pattern->ncaptures, 0);
     if (nxt_slow_path(array == NULL)) {
@@ -832,8 +874,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
 
     ret = nxt_lvlhsh_insert(&array->object.hash, &lhq);
     if (nxt_slow_path(ret != NXT_OK)) {
-        njs_internal_error(vm, "lvlhsh insert failed");
-        goto fail;
+        goto insert_fail;
     }
 
     prop = njs_object_prop_alloc(vm, &string_input, &regexp->string, 1);
@@ -846,16 +887,74 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
     lhq.value = prop;
 
     ret = nxt_lvlhsh_insert(&array->object.hash, &lhq);
+    if (nxt_slow_path(ret != NXT_OK)) {
+        goto insert_fail;
+    }
 
-    if (nxt_fast_path(ret == NXT_OK)) {
-        vm->retval.data.u.array = array;
-        vm->retval.type = NJS_ARRAY;
-        vm->retval.data.truth = 1;
+    prop = njs_object_prop_alloc(vm, &string_groups, &njs_value_undefined, 1);
+    if (nxt_slow_path(prop == NULL)) {
+        goto fail;
+    }
+
+    lhq.key_hash = NJS_GROUPS_HASH;
+    lhq.key = nxt_string_value("groups");
+    lhq.value = prop;
+
+    ret = nxt_lvlhsh_insert(&array->object.hash, &lhq);
+    if (nxt_slow_path(ret != NXT_OK)) {
+        goto insert_fail;
+    }
+
+    if (regexp->pattern->ngroups != 0) {
+        groups = njs_object_alloc(vm);
+        if (nxt_slow_path(groups == NULL)) {
+            goto fail;
+        }
+
+        prop->value.data.u.object = groups;
+        prop->value.type = NJS_OBJECT;
+        prop->value.data.truth = 1;
+
+        i = 0;
+
+        do {
+            group = &regexp->pattern->groups[i];
 
-        ret = NXT_OK;
-        goto done;
+            ret = njs_string_set(vm, &name, group->name.start,
+                                 group->name.length);
+            if (nxt_slow_path(ret != NXT_OK)) {
+                goto fail;
+            }
+
+            prop = njs_object_prop_alloc(vm, &name,
+                                         &array->start[group->capture], 1);
+            if (nxt_slow_path(prop == NULL)) {
+                goto fail;
+            }
+
+            lhq.key_hash = group->hash;
+            lhq.key = group->name;
+            lhq.value = prop;
+
+            ret = nxt_lvlhsh_insert(&groups->hash, &lhq);
+            if (nxt_slow_path(ret != NXT_OK)) {
+                goto insert_fail;
+            }
+
+            i++;
+
+        } while (i < regexp->pattern->ngroups);
     }
 
+    vm->retval.data.u.array = array;
+    vm->retval.type = NJS_ARRAY;
+    vm->retval.data.truth = 1;
+
+    ret = NXT_OK;
+    goto done;
+
+insert_fail:
+
     njs_internal_error(vm, "lvlhsh insert failed");
 
 fail:
diff -r d94c836632c7 -r e9de8a5d423c njs/njs_regexp_pattern.h
--- a/njs/njs_regexp_pattern.h	Mon May 06 11:33:23 2019 +0300
+++ b/njs/njs_regexp_pattern.h	Mon May 06 19:42:06 2019 +0300
@@ -17,6 +17,9 @@ typedef enum {
 } njs_regexp_utf8_t;
 
 
+typedef struct njs_regexp_group_s  njs_regexp_group_t;
+
+
 struct njs_regexp_pattern_s {
     nxt_regex_t           regex[2];
 
@@ -30,20 +33,15 @@ struct njs_regexp_pattern_s {
      */
     u_char                *source;
 
-#if (NXT_64BIT)
-    uint32_t              ncaptures;
+    uint16_t              ncaptures;
+    uint16_t              ngroups;
+
     uint8_t               flags;        /* 2 bits */
-
     uint8_t               global;       /* 1 bit */
     uint8_t               ignore_case;  /* 1 bit */
     uint8_t               multiline;    /* 1 bit */
-#else
-    uint16_t              ncaptures;
-    uint8_t               flags;        /* 2 bits */
-    uint8_t               global:1;
-    uint8_t               ignore_case:1;
-    uint8_t               multiline:1;
-#endif
+
+    njs_regexp_group_t    *groups;
 };
 
 
diff -r d94c836632c7 -r e9de8a5d423c njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Mon May 06 11:33:23 2019 +0300
+++ b/njs/test/njs_unit_test.c	Mon May 06 19:42:06 2019 +0300
@@ -7118,6 +7118,18 @@ static njs_unit_test_t  njs_test[] =
       nxt_string("Quick Brown Fox Jumps Brown Jumps undefined "
                  "4 25 The Quick Brown Fox Jumps Over The Lazy Dog") },
 
+    { nxt_string("var r = /a/.exec('a'); ['groups' in r, typeof r.groups]"),
+      nxt_string("true,undefined") },
+
+    { nxt_string("var r = /(?<m>[0-9]{2})\\/(?<d>[0-9]{2})\\/(?<y>[0-9]{4})/;"
+                 "var g = r.exec('12/31/1986').groups;"
+                 "g.d + '.' + g.m + '.' + g.y"),
+      nxt_string("31.12.1986") },
+
+    { nxt_string("var g = /(?<r>(?<no>no)?(?<yes>yes)?)/.exec('yes').groups;"
+                 "[Object.keys(g).length,'no' in g, typeof g.no, g.yes, g.r]"),
+      nxt_string("3,true,undefined,yes,yes") },
+
     { nxt_string("var s; var r = /./g; while (s = r.exec('abc')); s"),
       nxt_string("null") },
 
diff -r d94c836632c7 -r e9de8a5d423c nxt/nxt_pcre.c
--- a/nxt/nxt_pcre.c	Mon May 06 11:33:23 2019 +0300
+++ b/nxt/nxt_pcre.c	Mon May 06 19:42:06 2019 +0300
@@ -9,6 +9,7 @@
 #include <nxt_clang.h>
 #include <nxt_stub.h>
 #include <nxt_trace.h>
+#include <nxt_string.h>
 #include <nxt_regex.h>
 #include <nxt_pcre.h>
 #include <string.h>
@@ -120,6 +121,41 @@ nxt_regex_compile(nxt_regex_t *regex, u_
     /* Reserve additional elements for the first "$0" capture. */
     regex->ncaptures++;
 
+    if (regex->ncaptures > 1) {
+        err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT,
+                            &regex->nentries);
+
+        if (nxt_slow_path(err < 0)) {
+            nxt_alert(ctx->trace, NXT_LEVEL_ERROR,
+                      "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
+                      pattern, err);
+
+            goto done;
+        }
+
+        if (regex->nentries != 0) {
+            err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE,
+                                &regex->entry_size);
+
+            if (nxt_slow_path(err < 0)) {
+                nxt_alert(ctx->trace, NXT_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+                          "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);
+
+                goto done;
+            }
+
+            err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE,
+                                &regex->entries);
+
+            if (nxt_slow_path(err < 0)) {
+                nxt_alert(ctx->trace, NXT_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+                          "PCRE_INFO_NAMETABLE) failed: %d", pattern, err);
+
+                goto done;
+            }
+        }
+    }
+
     ret = NXT_OK;
 
 done:
@@ -146,6 +182,28 @@ nxt_regex_ncaptures(nxt_regex_t *regex)
 }
 
 
+nxt_int_t
+nxt_regex_named_captures(nxt_regex_t *regex, nxt_str_t *name, int n)
+{
+    char  *entry;
+
+    if (name == NULL) {
+        return regex->nentries;
+    }
+
+    if (n >= regex->nentries) {
+        return NXT_ERROR;
+    }
+
+    entry = regex->entries + regex->entry_size * n;
+
+    name->start = (u_char *) entry + 2;
+    name->length = nxt_strlen(name->start);
+
+    return (entry[0] << 8) + entry[1];
+}
+
+
 nxt_regex_match_data_t *
 nxt_regex_match_data(nxt_regex_t *regex, nxt_regex_context_t *ctx)
 {
diff -r d94c836632c7 -r e9de8a5d423c nxt/nxt_pcre.h
--- a/nxt/nxt_pcre.h	Mon May 06 11:33:23 2019 +0300
+++ b/nxt/nxt_pcre.h	Mon May 06 19:42:06 2019 +0300
@@ -18,6 +18,9 @@ struct nxt_regex_s {
     pcre        *code;
     pcre_extra  *extra;
     int         ncaptures;
+    int         nentries;
+    int         entry_size;
+    char        *entries;
 };
 
 
diff -r d94c836632c7 -r e9de8a5d423c nxt/nxt_regex.h
--- a/nxt/nxt_regex.h	Mon May 06 11:33:23 2019 +0300
+++ b/nxt/nxt_regex.h	Mon May 06 19:42:06 2019 +0300
@@ -31,6 +31,8 @@ NXT_EXPORT nxt_int_t nxt_regex_compile(n
     size_t len, nxt_uint_t options, nxt_regex_context_t *ctx);
 NXT_EXPORT nxt_bool_t nxt_regex_is_valid(nxt_regex_t *regex);
 NXT_EXPORT nxt_uint_t nxt_regex_ncaptures(nxt_regex_t *regex);
+NXT_EXPORT nxt_int_t nxt_regex_named_captures(nxt_regex_t *regex,
+    nxt_str_t *name, int n);
 NXT_EXPORT nxt_regex_match_data_t *nxt_regex_match_data(nxt_regex_t *regex,
     nxt_regex_context_t *ctx);
 NXT_EXPORT void nxt_regex_match_data_free(nxt_regex_match_data_t *match_data,


More information about the nginx-devel mailing list