[njs] Added support for RegExp "groups" object (ES9).
Valentin Bartenev
vbart at nginx.com
Mon May 6 16:59:18 UTC 2019
details: https://hg.nginx.org/njs/rev/e9de8a5d423c
branches:
changeset: 950:e9de8a5d423c
user: Valentin Bartenev <vbart at nginx.com>
date: Mon May 06 19:42:06 2019 +0300
description:
Added support for RegExp "groups" object (ES9).
diffstat:
njs/njs_object_hash.h | 10 +++
njs/njs_regexp.c | 121 ++++++++++++++++++++++++++++++++++++++++++----
njs/njs_regexp_pattern.h | 18 +++---
njs/test/njs_unit_test.c | 12 ++++
nxt/nxt_pcre.c | 58 ++++++++++++++++++++++
nxt/nxt_pcre.h | 3 +
nxt/nxt_regex.h | 2 +
7 files changed, 203 insertions(+), 21 deletions(-)
diffs (368 lines):
diff -r d94c836632c7 -r e9de8a5d423c njs/njs_object_hash.h
--- a/njs/njs_object_hash.h Mon May 06 11:33:23 2019 +0300
+++ b/njs/njs_object_hash.h Mon May 06 19:42:06 2019 +0300
@@ -100,6 +100,16 @@
'i'), 'n'), 'p'), 'u'), 't')
+#define NJS_GROUPS_HASH \
+ nxt_djb_hash_add( \
+ nxt_djb_hash_add( \
+ nxt_djb_hash_add( \
+ nxt_djb_hash_add( \
+ nxt_djb_hash_add( \
+ nxt_djb_hash_add(NXT_DJB_HASH_INIT, \
+ 'g'), 'r'), 'o'), 'u'), 'p'), 's')
+
+
#define NJS_JOIN_HASH \
nxt_djb_hash_add( \
nxt_djb_hash_add( \
diff -r d94c836632c7 -r e9de8a5d423c njs/njs_regexp.c
--- a/njs/njs_regexp.c Mon May 06 11:33:23 2019 +0300
+++ b/njs/njs_regexp.c Mon May 06 19:42:06 2019 +0300
@@ -11,6 +11,13 @@
#include <string.h>
+struct njs_regexp_group_s {
+ nxt_str_t name;
+ uint32_t hash;
+ uint32_t capture;
+};
+
+
static void *njs_regexp_malloc(size_t size, void *memory_data);
static void njs_regexp_free(void *p, void *memory_data);
static njs_regexp_flags_t njs_regexp_flags(u_char **start, u_char *end,
@@ -327,6 +334,9 @@ njs_regexp_pattern_create(njs_vm_t *vm,
int options, ret;
u_char *p, *end;
size_t size;
+ nxt_uint_t n;
+ nxt_regex_t *regex;
+ njs_regexp_group_t *group;
njs_regexp_pattern_t *pattern;
size = 1; /* A trailing "/". */
@@ -405,14 +415,42 @@ njs_regexp_pattern_create(njs_vm_t *vm,
goto fail;
}
- if (!nxt_regex_is_valid(&pattern->regex[0])
- && !nxt_regex_is_valid(&pattern->regex[1]))
- {
+ if (nxt_regex_is_valid(&pattern->regex[0])) {
+ regex = &pattern->regex[0];
+
+ } else if (nxt_regex_is_valid(&pattern->regex[1])) {
+ regex = &pattern->regex[1];
+
+ } else {
goto fail;
}
*end = '/';
+ pattern->ngroups = nxt_regex_named_captures(regex, NULL, 0);
+
+ if (pattern->ngroups != 0) {
+ size = sizeof(njs_regexp_group_t) * pattern->ngroups;
+
+ pattern->groups = nxt_mp_alloc(vm->mem_pool, size);
+ if (nxt_slow_path(pattern->groups == NULL)) {
+ njs_memory_error(vm);
+ return NULL;
+ }
+
+ n = 0;
+
+ do {
+ group = &pattern->groups[n];
+
+ group->capture = nxt_regex_named_captures(regex, &group->name, n);
+ group->hash = nxt_djb_hash(group->name.start, group->name.length);
+
+ n++;
+
+ } while (n != pattern->ngroups);
+ }
+
return pattern;
fail:
@@ -777,11 +815,15 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
njs_ret_t ret;
nxt_uint_t i, n;
njs_array_t *array;
+ njs_value_t name;
+ njs_object_t *groups;
njs_object_prop_t *prop;
+ njs_regexp_group_t *group;
nxt_lvlhsh_query_t lhq;
static const njs_value_t string_index = njs_string("index");
static const njs_value_t string_input = njs_string("input");
+ static const njs_value_t string_groups = njs_string("groups");
array = njs_array_alloc(vm, regexp->pattern->ncaptures, 0);
if (nxt_slow_path(array == NULL)) {
@@ -832,8 +874,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
ret = nxt_lvlhsh_insert(&array->object.hash, &lhq);
if (nxt_slow_path(ret != NXT_OK)) {
- njs_internal_error(vm, "lvlhsh insert failed");
- goto fail;
+ goto insert_fail;
}
prop = njs_object_prop_alloc(vm, &string_input, ®exp->string, 1);
@@ -846,16 +887,74 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
lhq.value = prop;
ret = nxt_lvlhsh_insert(&array->object.hash, &lhq);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto insert_fail;
+ }
- if (nxt_fast_path(ret == NXT_OK)) {
- vm->retval.data.u.array = array;
- vm->retval.type = NJS_ARRAY;
- vm->retval.data.truth = 1;
+ prop = njs_object_prop_alloc(vm, &string_groups, &njs_value_undefined, 1);
+ if (nxt_slow_path(prop == NULL)) {
+ goto fail;
+ }
+
+ lhq.key_hash = NJS_GROUPS_HASH;
+ lhq.key = nxt_string_value("groups");
+ lhq.value = prop;
+
+ ret = nxt_lvlhsh_insert(&array->object.hash, &lhq);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto insert_fail;
+ }
+
+ if (regexp->pattern->ngroups != 0) {
+ groups = njs_object_alloc(vm);
+ if (nxt_slow_path(groups == NULL)) {
+ goto fail;
+ }
+
+ prop->value.data.u.object = groups;
+ prop->value.type = NJS_OBJECT;
+ prop->value.data.truth = 1;
+
+ i = 0;
+
+ do {
+ group = ®exp->pattern->groups[i];
- ret = NXT_OK;
- goto done;
+ ret = njs_string_set(vm, &name, group->name.start,
+ group->name.length);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto fail;
+ }
+
+ prop = njs_object_prop_alloc(vm, &name,
+ &array->start[group->capture], 1);
+ if (nxt_slow_path(prop == NULL)) {
+ goto fail;
+ }
+
+ lhq.key_hash = group->hash;
+ lhq.key = group->name;
+ lhq.value = prop;
+
+ ret = nxt_lvlhsh_insert(&groups->hash, &lhq);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto insert_fail;
+ }
+
+ i++;
+
+ } while (i < regexp->pattern->ngroups);
}
+ vm->retval.data.u.array = array;
+ vm->retval.type = NJS_ARRAY;
+ vm->retval.data.truth = 1;
+
+ ret = NXT_OK;
+ goto done;
+
+insert_fail:
+
njs_internal_error(vm, "lvlhsh insert failed");
fail:
diff -r d94c836632c7 -r e9de8a5d423c njs/njs_regexp_pattern.h
--- a/njs/njs_regexp_pattern.h Mon May 06 11:33:23 2019 +0300
+++ b/njs/njs_regexp_pattern.h Mon May 06 19:42:06 2019 +0300
@@ -17,6 +17,9 @@ typedef enum {
} njs_regexp_utf8_t;
+typedef struct njs_regexp_group_s njs_regexp_group_t;
+
+
struct njs_regexp_pattern_s {
nxt_regex_t regex[2];
@@ -30,20 +33,15 @@ struct njs_regexp_pattern_s {
*/
u_char *source;
-#if (NXT_64BIT)
- uint32_t ncaptures;
+ uint16_t ncaptures;
+ uint16_t ngroups;
+
uint8_t flags; /* 2 bits */
-
uint8_t global; /* 1 bit */
uint8_t ignore_case; /* 1 bit */
uint8_t multiline; /* 1 bit */
-#else
- uint16_t ncaptures;
- uint8_t flags; /* 2 bits */
- uint8_t global:1;
- uint8_t ignore_case:1;
- uint8_t multiline:1;
-#endif
+
+ njs_regexp_group_t *groups;
};
diff -r d94c836632c7 -r e9de8a5d423c njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Mon May 06 11:33:23 2019 +0300
+++ b/njs/test/njs_unit_test.c Mon May 06 19:42:06 2019 +0300
@@ -7118,6 +7118,18 @@ static njs_unit_test_t njs_test[] =
nxt_string("Quick Brown Fox Jumps Brown Jumps undefined "
"4 25 The Quick Brown Fox Jumps Over The Lazy Dog") },
+ { nxt_string("var r = /a/.exec('a'); ['groups' in r, typeof r.groups]"),
+ nxt_string("true,undefined") },
+
+ { nxt_string("var r = /(?<m>[0-9]{2})\\/(?<d>[0-9]{2})\\/(?<y>[0-9]{4})/;"
+ "var g = r.exec('12/31/1986').groups;"
+ "g.d + '.' + g.m + '.' + g.y"),
+ nxt_string("31.12.1986") },
+
+ { nxt_string("var g = /(?<r>(?<no>no)?(?<yes>yes)?)/.exec('yes').groups;"
+ "[Object.keys(g).length,'no' in g, typeof g.no, g.yes, g.r]"),
+ nxt_string("3,true,undefined,yes,yes") },
+
{ nxt_string("var s; var r = /./g; while (s = r.exec('abc')); s"),
nxt_string("null") },
diff -r d94c836632c7 -r e9de8a5d423c nxt/nxt_pcre.c
--- a/nxt/nxt_pcre.c Mon May 06 11:33:23 2019 +0300
+++ b/nxt/nxt_pcre.c Mon May 06 19:42:06 2019 +0300
@@ -9,6 +9,7 @@
#include <nxt_clang.h>
#include <nxt_stub.h>
#include <nxt_trace.h>
+#include <nxt_string.h>
#include <nxt_regex.h>
#include <nxt_pcre.h>
#include <string.h>
@@ -120,6 +121,41 @@ nxt_regex_compile(nxt_regex_t *regex, u_
/* Reserve additional elements for the first "$0" capture. */
regex->ncaptures++;
+ if (regex->ncaptures > 1) {
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT,
+ ®ex->nentries);
+
+ if (nxt_slow_path(err < 0)) {
+ nxt_alert(ctx->trace, NXT_LEVEL_ERROR,
+ "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
+ pattern, err);
+
+ goto done;
+ }
+
+ if (regex->nentries != 0) {
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE,
+ ®ex->entry_size);
+
+ if (nxt_slow_path(err < 0)) {
+ nxt_alert(ctx->trace, NXT_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+ "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);
+
+ goto done;
+ }
+
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE,
+ ®ex->entries);
+
+ if (nxt_slow_path(err < 0)) {
+ nxt_alert(ctx->trace, NXT_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+ "PCRE_INFO_NAMETABLE) failed: %d", pattern, err);
+
+ goto done;
+ }
+ }
+ }
+
ret = NXT_OK;
done:
@@ -146,6 +182,28 @@ nxt_regex_ncaptures(nxt_regex_t *regex)
}
+nxt_int_t
+nxt_regex_named_captures(nxt_regex_t *regex, nxt_str_t *name, int n)
+{
+ char *entry;
+
+ if (name == NULL) {
+ return regex->nentries;
+ }
+
+ if (n >= regex->nentries) {
+ return NXT_ERROR;
+ }
+
+ entry = regex->entries + regex->entry_size * n;
+
+ name->start = (u_char *) entry + 2;
+ name->length = nxt_strlen(name->start);
+
+ return (entry[0] << 8) + entry[1];
+}
+
+
nxt_regex_match_data_t *
nxt_regex_match_data(nxt_regex_t *regex, nxt_regex_context_t *ctx)
{
diff -r d94c836632c7 -r e9de8a5d423c nxt/nxt_pcre.h
--- a/nxt/nxt_pcre.h Mon May 06 11:33:23 2019 +0300
+++ b/nxt/nxt_pcre.h Mon May 06 19:42:06 2019 +0300
@@ -18,6 +18,9 @@ struct nxt_regex_s {
pcre *code;
pcre_extra *extra;
int ncaptures;
+ int nentries;
+ int entry_size;
+ char *entries;
};
diff -r d94c836632c7 -r e9de8a5d423c nxt/nxt_regex.h
--- a/nxt/nxt_regex.h Mon May 06 11:33:23 2019 +0300
+++ b/nxt/nxt_regex.h Mon May 06 19:42:06 2019 +0300
@@ -31,6 +31,8 @@ NXT_EXPORT nxt_int_t nxt_regex_compile(n
size_t len, nxt_uint_t options, nxt_regex_context_t *ctx);
NXT_EXPORT nxt_bool_t nxt_regex_is_valid(nxt_regex_t *regex);
NXT_EXPORT nxt_uint_t nxt_regex_ncaptures(nxt_regex_t *regex);
+NXT_EXPORT nxt_int_t nxt_regex_named_captures(nxt_regex_t *regex,
+ nxt_str_t *name, int n);
NXT_EXPORT nxt_regex_match_data_t *nxt_regex_match_data(nxt_regex_t *regex,
nxt_regex_context_t *ctx);
NXT_EXPORT void nxt_regex_match_data_free(nxt_regex_match_data_t *match_data,
More information about the nginx-devel
mailing list