[njs] Introduced RegExpBuiltinExec().
Dmitry Volyntsev
xeioex at nginx.com
Thu Jul 2 14:01:41 UTC 2020
details: https://hg.nginx.org/njs/rev/f9082cd59ba6
branches:
changeset: 1447:f9082cd59ba6
user: Dmitry Volyntsev <xeioex at nginx.com>
date: Thu Jul 02 13:59:33 2020 +0000
description:
Introduced RegExpBuiltinExec().
diffstat:
src/njs_pcre.c | 6 +-
src/njs_regex.h | 3 +-
src/njs_regexp.c | 277 +++++++++++++++++++++++++++-------------------
src/njs_regexp.h | 4 +-
src/njs_string.c | 8 +-
src/test/njs_unit_test.c | 3 +
6 files changed, 180 insertions(+), 121 deletions(-)
diffs (523 lines):
diff -r c30a2a805014 -r f9082cd59ba6 src/njs_pcre.c
--- a/src/njs_pcre.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_pcre.c Thu Jul 02 13:59:33 2020 +0000
@@ -280,13 +280,13 @@ njs_pcre_default_free(void *p, void *mem
njs_int_t
-njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t len,
- njs_regex_match_data_t *match_data, njs_regex_context_t *ctx)
+njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
+ size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx)
{
int ret;
ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
- 0, 0, match_data->captures, match_data->ncaptures);
+ off, 0, match_data->captures, match_data->ncaptures);
/* PCRE_ERROR_NOMATCH is -1. */
diff -r c30a2a805014 -r f9082cd59ba6 src/njs_regex.h
--- a/src/njs_regex.h Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_regex.h Thu Jul 02 13:59:33 2020 +0000
@@ -39,7 +39,8 @@ NJS_EXPORT njs_regex_match_data_t *njs_r
NJS_EXPORT void njs_regex_match_data_free(njs_regex_match_data_t *match_data,
njs_regex_context_t *ctx);
NJS_EXPORT njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject,
- size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx);
+ size_t off, size_t len, njs_regex_match_data_t *match_data,
+ njs_regex_context_t *ctx);
NJS_EXPORT int *njs_regex_captures(njs_regex_match_data_t *match_data);
diff -r c30a2a805014 -r f9082cd59ba6 src/njs_regexp.c
--- a/src/njs_regexp.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_regexp.c Thu Jul 02 13:59:33 2020 +0000
@@ -26,9 +26,9 @@ static u_char *njs_regexp_compile_trace_
njs_trace_data_t *td, u_char *start);
static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace,
njs_trace_data_t *td, u_char *start);
-static njs_int_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
- njs_utf8_t utf8, u_char *string, njs_regex_match_data_t *match_data,
- uint32_t last_index);
+static njs_array_t *njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
+ njs_regexp_utf8_t type, njs_string_prop_t *string,
+ njs_regex_match_data_t *data);
static njs_int_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value,
u_char *start, uint32_t size, int32_t length);
@@ -550,7 +550,7 @@ njs_regexp_compile_trace_handler(njs_tra
njs_int_t
njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject,
- size_t len, njs_regex_match_data_t *match_data)
+ size_t off, size_t len, njs_regex_match_data_t *match_data)
{
njs_int_t ret;
njs_trace_handler_t handler;
@@ -558,7 +558,8 @@ njs_regexp_match(njs_vm_t *vm, njs_regex
handler = vm->trace.handler;
vm->trace.handler = njs_regexp_match_trace_handler;
- ret = njs_regex_match(regex, subject, len, match_data, vm->regex_context);
+ ret = njs_regex_match(regex, subject, off, len, match_data,
+ vm->regex_context);
vm->trace.handler = handler;
@@ -617,9 +618,7 @@ static njs_int_t
njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *unused,
njs_value_t *value, njs_value_t *setval, njs_value_t *retval)
{
- uint32_t index, last_index;
- njs_regexp_t *regexp;
- njs_string_prop_t string;
+ njs_regexp_t *regexp;
regexp = njs_object_proto_lookup(njs_object(value), NJS_REGEXP,
njs_regexp_t);
@@ -635,23 +634,7 @@ njs_regexp_prototype_last_index(njs_vm_t
return NJS_OK;
}
- if (njs_slow_path(!njs_is_number(®exp->last_index))) {
- *retval = regexp->last_index;
- return NJS_OK;
- }
-
- (void) njs_string_prop(&string, ®exp->string);
-
- last_index = njs_number(®exp->last_index);
-
- if (njs_slow_path(string.size < last_index)) {
- *retval = regexp->last_index;
- return NJS_OK;
- }
-
- index = njs_string_index(&string, last_index);
- njs_set_number(retval, index);
-
+ *retval = regexp->last_index;
return NJS_OK;
}
@@ -802,8 +785,8 @@ njs_regexp_prototype_test(njs_vm_t *vm,
}
}
- match = njs_regexp_match(vm, regex, string.start, string.size,
- match_data);
+ match = njs_regexp_match(vm, regex, string.start, 0, string.size,
+ match_data);
if (match >= 0) {
retval = &njs_value_true;
@@ -844,36 +827,25 @@ done:
}
-njs_int_t
-njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
- njs_index_t unused)
+/**
+ * TODO: sticky, unicode flags.
+ */
+static njs_int_t
+njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s,
+ njs_value_t *retval)
{
+ size_t length, offset;
int64_t last_index;
njs_int_t ret;
- njs_utf8_t utf8;
- njs_value_t *value, lvalue;
+ njs_array_t *result;
njs_regexp_t *regexp;
njs_string_prop_t string;
njs_regexp_utf8_t type;
njs_regexp_pattern_t *pattern;
njs_regex_match_data_t *match_data;
- if (!njs_is_regexp(njs_arg(args, nargs, 0))) {
- njs_type_error(vm, "\"this\" argument is not a regexp");
- return NJS_ERROR;
- }
-
- value = njs_lvalue_arg(&lvalue, args, nargs, 1);
-
- if (!njs_is_string(value)) {
- ret = njs_value_to_string(vm, value, value);
- if (njs_slow_path(ret != NJS_OK)) {
- return ret;
- }
- }
-
- regexp = njs_regexp(&args[0]);
- regexp->string = *value;
+ regexp = njs_regexp(r);
+ regexp->string = *s;
pattern = regexp->pattern;
ret = njs_value_to_length(vm, ®exp->last_index, &last_index);
@@ -885,94 +857,113 @@ njs_regexp_prototype_exec(njs_vm_t *vm,
last_index = 0;
}
- (void) njs_string_prop(&string, value);
+ length = njs_string_prop(&string, s);
+
+ if (njs_slow_path((size_t) last_index > length)) {
+ goto not_found;
+ }
+
+ type = NJS_REGEXP_BYTE;
- if (string.size >= (size_t) last_index) {
- utf8 = NJS_STRING_BYTE;
- type = NJS_REGEXP_BYTE;
+ if (length != string.size) {
+ /* UTF-8 string. */
+ type = NJS_REGEXP_UTF8;
+ }
+
+ pattern = regexp->pattern;
+
+ if (njs_slow_path(!njs_regex_is_valid(&pattern->regex[type]))) {
+ goto not_found;
+ }
- if (string.length != 0) {
- utf8 = NJS_STRING_ASCII;
- type = NJS_REGEXP_UTF8;
+ match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context);
+ if (njs_slow_path(match_data == NULL)) {
+ njs_memory_error(vm);
+ return NJS_ERROR;
+ }
+
+ if (type != NJS_REGEXP_UTF8) {
+ offset = last_index;
- if (string.length != string.size) {
- utf8 = NJS_STRING_UTF8;
- }
+ } else {
+ /* UTF-8 string. */
+ offset = njs_string_offset(string.start, string.start + string.size,
+ last_index) - string.start;
+ }
+
+ ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset,
+ string.size, match_data);
+ if (ret >= 0) {
+ result = njs_regexp_exec_result(vm, regexp, type, &string, match_data);
+ if (njs_slow_path(result == NULL)) {
+ return NJS_ERROR;
}
- pattern = regexp->pattern;
-
- if (njs_regex_is_valid(&pattern->regex[type])) {
- string.start += last_index;
- string.size -= last_index;
-
- match_data = njs_regex_match_data(&pattern->regex[type],
- vm->regex_context);
- if (njs_slow_path(match_data == NULL)) {
- njs_memory_error(vm);
- return NJS_ERROR;
- }
+ njs_set_array(retval, result);
+ return NJS_OK;
+ }
- ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
- string.size, match_data);
- if (ret >= 0) {
- return njs_regexp_exec_result(vm, regexp, utf8, string.start,
- match_data, last_index);
- }
+ if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
+ njs_regex_match_data_free(match_data, vm->regex_context);
- if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
- njs_regex_match_data_free(match_data, vm->regex_context);
+ return NJS_ERROR;
+ }
- return NJS_ERROR;
- }
- }
- }
+not_found:
if (pattern->global) {
njs_set_number(®exp->last_index, 0);
}
- vm->retval = njs_value_null;
+ njs_set_null(retval);
return NJS_OK;
}
-static njs_int_t
-njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8,
- u_char *string, njs_regex_match_data_t *match_data, uint32_t last_index)
+static njs_array_t *
+njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
+ njs_regexp_utf8_t type, njs_string_prop_t *string,
+ njs_regex_match_data_t *match_data)
{
- int *captures;
- u_char *start;
- int32_t size, length;
- njs_int_t ret;
- njs_uint_t i, n;
- njs_array_t *array;
- njs_value_t name;
- njs_object_t *groups;
- njs_object_prop_t *prop;
- njs_regexp_group_t *group;
- njs_lvlhsh_query_t lhq;
+ int *captures;
+ u_char *start;
+ int32_t size, length;
+ njs_int_t ret;
+ njs_uint_t i, n;
+ njs_array_t *array;
+ njs_value_t name;
+ njs_object_t *groups;
+ njs_object_prop_t *prop;
+ njs_regexp_group_t *group;
+ njs_lvlhsh_query_t lhq;
+ njs_regexp_pattern_t *pattern;
static const njs_value_t string_index = njs_string("index");
static const njs_value_t string_input = njs_string("input");
static const njs_value_t string_groups = njs_string("groups");
- array = njs_array_alloc(vm, 0, regexp->pattern->ncaptures, 0);
+ pattern = regexp->pattern;
+ array = njs_array_alloc(vm, 0, pattern->ncaptures, 0);
if (njs_slow_path(array == NULL)) {
goto fail;
}
captures = njs_regex_captures(match_data);
- for (i = 0; i < regexp->pattern->ncaptures; i++) {
+ for (i = 0; i < pattern->ncaptures; i++) {
n = 2 * i;
if (captures[n] != -1) {
- start = &string[captures[n]];
+ start = &string->start[captures[n]];
size = captures[n + 1] - captures[n];
- length = njs_string_calc_length(utf8, start, size);
+ if (type == NJS_REGEXP_UTF8) {
+ length = njs_max(njs_utf8_length(start, size), 0);
+
+ } else {
+ length = size;
+ }
ret = njs_regexp_string_create(vm, &array->start[i], start, size,
length);
@@ -985,17 +976,17 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
}
}
+ /* FIXME: implement fast CreateDataPropertyOrThrow(). */
prop = njs_object_prop_alloc(vm, &string_index, &njs_value_undefined, 1);
if (njs_slow_path(prop == NULL)) {
goto fail;
}
- /* TODO: Non UTF-8 position */
+ njs_set_number(&prop->value, njs_string_index(string, captures[0]));
- njs_set_number(&prop->value, last_index + captures[0]);
-
- if (regexp->pattern->global) {
- njs_set_number(®exp->last_index, last_index + captures[1]);
+ if (pattern->global) {
+ njs_set_number(®exp->last_index,
+ njs_string_index(string, captures[1]));
}
lhq.key_hash = NJS_INDEX_HASH;
@@ -1038,7 +1029,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
goto insert_fail;
}
- if (regexp->pattern->ngroups != 0) {
+ if (pattern->ngroups != 0) {
groups = njs_object_alloc(vm);
if (njs_slow_path(groups == NULL)) {
goto fail;
@@ -1049,7 +1040,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
i = 0;
do {
- group = ®exp->pattern->groups[i];
+ group = &pattern->groups[i];
ret = njs_string_set(vm, &name, group->name.start,
group->name.length);
@@ -1074,11 +1065,9 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
i++;
- } while (i < regexp->pattern->ngroups);
+ } while (i < pattern->ngroups);
}
- njs_set_array(&vm->retval, array);
-
ret = NJS_OK;
goto done;
@@ -1094,7 +1083,71 @@ done:
njs_regex_match_data_free(match_data, vm->regex_context);
- return ret;
+ return (ret == NJS_OK) ? array : NULL;
+}
+
+
+njs_int_t
+njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
+ njs_index_t unused)
+{
+ njs_int_t ret;
+ njs_value_t *r, *s;
+ njs_value_t string_lvalue;
+
+ r = njs_argument(args, 0);
+
+ if (njs_slow_path(!njs_is_regexp(r))) {
+ njs_type_error(vm, "\"this\" argument is not a regexp");
+ return NJS_ERROR;
+ }
+
+ s = njs_lvalue_arg(&string_lvalue, args, nargs, 1);
+
+ ret = njs_value_to_string(vm, s, s);
+ if (njs_slow_path(ret != NJS_OK)) {
+ return ret;
+ }
+
+ return njs_regexp_builtin_exec(vm, r, s, &vm->retval);
+}
+
+
+njs_int_t
+njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s,
+ njs_value_t *retval)
+{
+ njs_int_t ret;
+ njs_value_t exec;
+
+ static const njs_value_t string_exec = njs_string("exec");
+
+ ret = njs_value_property(vm, r, njs_value_arg(&string_exec), &exec);
+ if (njs_slow_path(ret == NJS_ERROR)) {
+ return NJS_ERROR;
+ }
+
+ if (njs_is_function(&exec)) {
+ ret = njs_function_call(vm, njs_function(&exec), r, s, 1, retval);
+ if (njs_slow_path(ret == NJS_ERROR)) {
+ return NJS_ERROR;
+ }
+
+ if (njs_slow_path(!njs_is_object(retval) && !njs_is_null(retval))) {
+ njs_type_error(vm, "unexpected \"%s\" retval in njs_regexp_exec()",
+ njs_type_string(retval->type));
+ return NJS_ERROR;
+ }
+
+ return NJS_OK;
+ }
+
+ if (njs_slow_path(!njs_is_regexp(r))) {
+ njs_type_error(vm, "receiver argument is not a regexp");
+ return NJS_ERROR;
+ }
+
+ return njs_regexp_builtin_exec(vm, r, s, retval);
}
diff -r c30a2a805014 -r f9082cd59ba6 src/njs_regexp.h
--- a/src/njs_regexp.h Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_regexp.h Thu Jul 02 13:59:33 2020 +0000
@@ -24,8 +24,10 @@ njs_regexp_flags_t njs_regexp_flags(u_ch
njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm,
u_char *string, size_t length, njs_regexp_flags_t flags);
njs_int_t njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex,
- const u_char *subject, size_t len, njs_regex_match_data_t *match_data);
+ const u_char *subject, size_t off, size_t len, njs_regex_match_data_t *d);
njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern);
+njs_int_t njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s,
+ njs_value_t *retval);
njs_int_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args,
njs_uint_t nargs, njs_index_t unused);
diff -r c30a2a805014 -r f9082cd59ba6 src/njs_string.c
--- a/src/njs_string.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_string.c Thu Jul 02 13:59:33 2020 +0000
@@ -3022,7 +3022,7 @@ njs_string_prototype_search(njs_vm_t *vm
if (njs_regex_is_valid(&pattern->regex[n])) {
ret = njs_regexp_match(vm, &pattern->regex[n], string.start,
- string.size, vm->single_match_data);
+ 0, string.size, vm->single_match_data);
if (ret >= 0) {
captures = njs_regex_captures(vm->single_match_data);
index = njs_string_index(&string, captures[0]);
@@ -3147,7 +3147,7 @@ njs_string_match_multiple(njs_vm_t *vm,
end = p + string.size;
do {
- ret = njs_regexp_match(vm, &pattern->regex[type], p, string.size,
+ ret = njs_regexp_match(vm, &pattern->regex[type], p, 0, string.size,
vm->single_match_data);
if (ret < 0) {
if (njs_fast_path(ret == NJS_REGEX_NOMATCH)) {
@@ -3286,7 +3286,7 @@ njs_string_prototype_split(njs_vm_t *vm,
end = string.start + string.size;
do {
- ret = njs_regexp_match(vm, &pattern->regex[type], start,
+ ret = njs_regexp_match(vm, &pattern->regex[type], start, 0,
end - start, vm->single_match_data);
if (ret >= 0) {
captures = njs_regex_captures(vm->single_match_data);
@@ -3564,7 +3564,7 @@ njs_string_replace_regexp(njs_vm_t *vm,
do {
ret = njs_regexp_match(vm, &pattern->regex[r->type],
- r->part[0].start, r->part[0].size,
+ r->part[0].start, 0, r->part[0].size,
r->match_data);
if (ret < 0) {
diff -r c30a2a805014 -r f9082cd59ba6 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/test/njs_unit_test.c Thu Jul 02 13:59:33 2020 +0000
@@ -9711,6 +9711,9 @@ static njs_unit_test_t njs_test[] =
{ njs_str("var s; var r = /./g; while (s = r.exec('abc')); s"),
njs_str("null") },
+ { njs_str("(/α/).exec('γαβγ').index"),
+ njs_str("1") },
+
{ njs_str("var r = /LS/i.exec(false); r[0]"),
njs_str("ls") },
More information about the nginx-devel
mailing list