[njs] String processing unification using njs_string_length(),
Igor Sysoev
igor at sysoev.ru
Mon Sep 26 15:42:55 UTC 2016
details: http://hg.nginx.org/njs/rev/fec0d8dfa38c
branches:
changeset: 183:fec0d8dfa38c
user: Igor Sysoev <igor at sysoev.ru>
date: Mon Sep 26 14:01:45 2016 +0300
description:
String processing unification using njs_string_length(),
njs_utf8_t, and njs_regexp_utf8_t.
diffstat:
njs/njs_regexp.c | 62 ++++++++++++++-----------------------
njs/njs_string.c | 92 ++++++++++++++++---------------------------------------
njs/njs_string.h | 2 +-
njs/njs_vm.c | 3 +-
4 files changed, 54 insertions(+), 105 deletions(-)
diffs (383 lines):
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_regexp.c
--- a/njs/njs_regexp.c Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_regexp.c Mon Sep 26 14:01:45 2016 +0300
@@ -44,7 +44,7 @@ static u_char *njs_regexp_compile_trace_
static u_char *njs_regexp_match_trace_handler(nxt_trace_t *trace,
nxt_trace_data_t *td, u_char *start);
static njs_ret_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
- u_char *string, nxt_regex_match_data_t *match_data, nxt_uint_t utf8);
+ njs_utf8_t utf8, u_char *string, nxt_regex_match_data_t *match_data);
static njs_ret_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value,
u_char *start, uint32_t size, int32_t length);
@@ -539,7 +539,8 @@ static njs_ret_t
njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *value)
{
u_char *source;
- size_t length, size;
+ int32_t length;
+ uint32_t size;
njs_regexp_pattern_t *pattern;
pattern = value->data.u.regexp->pattern;
@@ -558,7 +559,8 @@ njs_regexp_prototype_to_string(njs_vm_t
nxt_uint_t nargs, njs_index_t unused)
{
u_char *source;
- size_t length, size;
+ int32_t length;
+ uint32_t size;
njs_regexp_pattern_t *pattern;
pattern = args[0].data.u.regexp->pattern;
@@ -624,10 +626,11 @@ njs_regexp_prototype_exec(njs_vm_t *vm,
njs_index_t unused)
{
njs_ret_t ret;
- nxt_uint_t n, utf8;
+ njs_utf8_t utf8;
njs_value_t *value;
njs_regexp_t *regexp;
njs_string_prop_t string;
+ njs_regexp_utf8_t type;
njs_regexp_pattern_t *pattern;
nxt_regex_match_data_t *match_data;
@@ -648,38 +651,35 @@ njs_regexp_prototype_exec(njs_vm_t *vm,
(void) njs_string_prop(&string, value);
- /* Byte string. */
- utf8 = 0;
- n = 0;
+ utf8 = NJS_STRING_BYTE;
+ type = NJS_REGEXP_BYTE;
if (string.length != 0) {
- /* ASCII string. */
- utf8 = 1;
- n = 1;
+ utf8 = NJS_STRING_ASCII;
+ type = NJS_REGEXP_UTF8;
if (string.length != string.size) {
- /* UTF-8 string. */
- utf8 = 2;
+ utf8 = NJS_STRING_UTF8;
}
}
pattern = regexp->pattern;
- if (nxt_regex_is_valid(&pattern->regex[n])) {
+ if (nxt_regex_is_valid(&pattern->regex[type])) {
string.start += regexp->last_index;
string.size -= regexp->last_index;
- match_data = nxt_regex_match_data(&pattern->regex[n],
+ match_data = nxt_regex_match_data(&pattern->regex[type],
vm->regex_context);
if (nxt_slow_path(match_data == NULL)) {
return NXT_ERROR;
}
- ret = njs_regexp_match(vm, &pattern->regex[n], string.start,
+ ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
string.size, match_data);
if (ret >= 0) {
- return njs_regexp_exec_result(vm, regexp, string.start, match_data,
- utf8);
+ return njs_regexp_exec_result(vm, regexp, utf8, string.start,
+ match_data);
}
if (nxt_slow_path(ret != NXT_REGEX_NOMATCH)) {
@@ -697,8 +697,8 @@ njs_regexp_prototype_exec(njs_vm_t *vm,
static njs_ret_t
-njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, u_char *string,
- nxt_regex_match_data_t *match_data, nxt_uint_t utf8)
+njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8,
+ u_char *string, nxt_regex_match_data_t *match_data)
{
int *captures;
u_char *start;
@@ -726,20 +726,10 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
start = &string[captures[n]];
size = captures[n + 1] - captures[n];
- switch (utf8) {
- case 0:
- length = 0;
- break;
- case 1:
- length = size;
- break;
- default:
- length = nxt_utf8_length(start, size);
- break;
- }
+ length = njs_string_length(utf8, start, size);
- ret = njs_regexp_string_create(vm, &array->start[i],
- start, size, length);
+ ret = njs_regexp_string_create(vm, &array->start[i], start, size,
+ length);
if (nxt_slow_path(ret != NXT_OK)) {
goto fail;
}
@@ -812,13 +802,9 @@ static njs_ret_t
njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
uint32_t size, int32_t length)
{
- if (nxt_fast_path(length >= 0)) {
- return njs_string_create(vm, value, start, size, length);
- }
+ length = (length >= 0) ? length : 0;
- vm->exception = &njs_exception_internal_error;
-
- return NXT_ERROR;
+ return njs_string_create(vm, value, start, size, length);
}
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_string.c
--- a/njs/njs_string.c Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_string.c Mon Sep 26 14:01:45 2016 +0300
@@ -88,7 +88,7 @@ static nxt_noinline ssize_t njs_string_i
static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
njs_regexp_pattern_t *pattern);
static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
- u_char *start, size_t size, nxt_uint_t utf8);
+ njs_utf8_t utf8, u_char *start, size_t size);
static njs_ret_t njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args,
njs_string_replace_t *r);
static njs_ret_t njs_string_replace_regexp_function(njs_vm_t *vm,
@@ -1609,8 +1609,9 @@ njs_string_match_multiple(njs_vm_t *vm,
u_char *start;
int32_t size, length;
njs_ret_t ret;
- nxt_uint_t n, utf8;
+ njs_utf8_t utf8;
njs_array_t *array;
+ njs_regexp_utf8_t type;
njs_string_prop_t string;
args[1].data.u.regexp->last_index = 0;
@@ -1618,26 +1619,23 @@ njs_string_match_multiple(njs_vm_t *vm,
(void) njs_string_prop(&string, &args[0]);
- /* Byte string. */
- utf8 = 0;
- n = 0;
+ utf8 = NJS_STRING_BYTE;
+ type = NJS_REGEXP_BYTE;
if (string.length != 0) {
- /* ASCII string. */
- utf8 = 1;
- n = 1;
+ utf8 = NJS_STRING_ASCII;
+ type = NJS_REGEXP_UTF8;
if (string.length != string.size) {
- /* UTF-8 string. */
- utf8 = 2;
+ utf8 = NJS_STRING_UTF8;
}
}
- if (nxt_regex_is_valid(&pattern->regex[n])) {
+ if (nxt_regex_is_valid(&pattern->regex[type])) {
array = NULL;
do {
- ret = njs_regexp_match(vm, &pattern->regex[n], string.start,
+ ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
string.size, vm->single_match_data);
if (ret >= 0) {
if (array != NULL) {
@@ -1667,25 +1665,7 @@ njs_string_match_multiple(njs_vm_t *vm,
size = captures[1] - captures[0];
- switch (utf8) {
- case 0:
- length = 0;
- break;
-
- case 1:
- length = size;
- break;
-
- default:
- length = nxt_utf8_length(start, size);
-
- if (nxt_slow_path(length < 0)) {
- vm->exception = &njs_exception_internal_error;
- return NXT_ERROR;
- }
-
- break;
- }
+ length = njs_string_length(utf8, start, size);
ret = njs_string_create(vm, &array->start[array->length],
start, size, length);
@@ -1721,9 +1701,10 @@ njs_string_prototype_split(njs_vm_t *vm,
u_char *p, *start, *next;
size_t size;
uint32_t limit;
- nxt_uint_t n, utf8;
+ njs_utf8_t utf8;
njs_array_t *array;
const u_char *end;
+ njs_regexp_utf8_t type;
njs_string_prop_t string, split;
njs_regexp_pattern_t *pattern;
@@ -1751,18 +1732,15 @@ njs_string_prototype_split(njs_vm_t *vm,
goto single;
}
- /* Byte string. */
- utf8 = 0;
- n = 0;
+ utf8 = NJS_STRING_BYTE;
+ type = NJS_REGEXP_BYTE;
if (string.length != 0) {
- /* ASCII string. */
- utf8 = 1;
+ utf8 = NJS_STRING_ASCII;
+ type = NJS_REGEXP_UTF8;
if (string.length != string.size) {
- /* UTF-8 string. */
- utf8 = 2;
- n = 1;
+ utf8 = NJS_STRING_UTF8;
}
}
@@ -1795,7 +1773,7 @@ njs_string_prototype_split(njs_vm_t *vm,
size = p - start;
- ret = njs_string_split_part_add(vm, array, start, size, utf8);
+ ret = njs_string_split_part_add(vm, array, utf8, start, size);
if (nxt_slow_path(ret != NXT_OK)) {
return ret;
}
@@ -1810,7 +1788,7 @@ njs_string_prototype_split(njs_vm_t *vm,
case NJS_REGEXP:
pattern = args[1].data.u.regexp->pattern;
- if (!nxt_regex_is_valid(&pattern->regex[n])) {
+ if (!nxt_regex_is_valid(&pattern->regex[type])) {
goto single;
}
@@ -1818,7 +1796,7 @@ njs_string_prototype_split(njs_vm_t *vm,
end = string.start + string.size;
do {
- ret = njs_regexp_match(vm, &pattern->regex[n], start,
+ ret = njs_regexp_match(vm, &pattern->regex[type], start,
end - start, vm->single_match_data);
if (ret >= 0) {
captures = nxt_regex_captures(vm->single_match_data);
@@ -1842,7 +1820,7 @@ njs_string_prototype_split(njs_vm_t *vm,
size = p - start;
- ret = njs_string_split_part_add(vm, array, start, size, utf8);
+ ret = njs_string_split_part_add(vm, array, utf8, start, size);
if (nxt_slow_path(ret != NXT_OK)) {
return ret;
}
@@ -1876,28 +1854,12 @@ done:
static njs_ret_t
-njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
- size_t size, nxt_uint_t utf8)
+njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, njs_utf8_t utf8,
+ u_char *start, size_t size)
{
ssize_t length;
- switch (utf8) {
- case 0:
- length = 0;
- break;
-
- case 1:
- length = size;
- break;
-
- default:
- length = nxt_utf8_length(start, size);
-
- if (nxt_slow_path(length < 0)) {
- vm->exception = &njs_exception_internal_error;
- return NXT_ERROR;
- }
- }
+ length = njs_string_length(utf8, start, size);
return njs_array_string_add(vm, array, start, size, length);
}
@@ -2141,7 +2103,7 @@ njs_string_replace_regexp_function(njs_v
size = captures[k + 1] - captures[k];
k += 2;
- length = njs_string_length(start, size, r->utf8);
+ length = njs_string_length(r->utf8, start, size);
ret = njs_string_create(vm, &arguments[i], start, size, length);
if (nxt_slow_path(ret != NXT_OK)) {
@@ -2153,7 +2115,7 @@ njs_string_replace_regexp_function(njs_v
njs_number_set(&arguments[n + 1], captures[0]);
/* The whole string being examined. */
- length = njs_string_length(r->part[0].start, r->part[0].size, r->utf8);
+ length = njs_string_length(r->utf8, r->part[0].start, r->part[0].size);
ret = njs_string_create(vm, &arguments[n + 2], r->part[0].start,
r->part[0].size, length);
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_string.h
--- a/njs/njs_string.h Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_string.h Mon Sep 26 14:01:45 2016 +0300
@@ -89,7 +89,7 @@ typedef enum {
nxt_inline uint32_t
-njs_string_length(u_char *start, size_t size, njs_utf8_t utf8)
+njs_string_length(njs_utf8_t utf8, u_char *start, size_t size)
{
ssize_t length;
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_vm.c
--- a/njs/njs_vm.c Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_vm.c Mon Sep 26 14:01:45 2016 +0300
@@ -3334,7 +3334,7 @@ njs_value_string_copy(njs_vm_t *vm, nxt_
void
njs_vm_throw_exception(njs_vm_t *vm, u_char *buf, uint32_t size)
{
- uint32_t length;
+ int32_t length;
njs_value_t *value;
value = nxt_mem_cache_alloc(vm->mem_cache_pool, sizeof(njs_value_t));
@@ -3343,6 +3343,7 @@ njs_vm_throw_exception(njs_vm_t *vm, u_c
vm->exception = value;
length = nxt_utf8_length(buf, size);
+ length = (length >= 0) ? length : 0;
(void) njs_string_new(vm, value, buf, size, length);
}
More information about the nginx-devel
mailing list