[njs] String processing unification using njs_string_length(),

Igor Sysoev igor at sysoev.ru
Mon Sep 26 15:42:55 UTC 2016


details:   http://hg.nginx.org/njs/rev/fec0d8dfa38c
branches:  
changeset: 183:fec0d8dfa38c
user:      Igor Sysoev <igor at sysoev.ru>
date:      Mon Sep 26 14:01:45 2016 +0300
description:
String processing unification using njs_string_length(),
njs_utf8_t, and njs_regexp_utf8_t.

diffstat:

 njs/njs_regexp.c |  62 ++++++++++++++-----------------------
 njs/njs_string.c |  92 ++++++++++++++++---------------------------------------
 njs/njs_string.h |   2 +-
 njs/njs_vm.c     |   3 +-
 4 files changed, 54 insertions(+), 105 deletions(-)

diffs (383 lines):

diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_regexp.c
--- a/njs/njs_regexp.c	Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_regexp.c	Mon Sep 26 14:01:45 2016 +0300
@@ -44,7 +44,7 @@ static u_char *njs_regexp_compile_trace_
 static u_char *njs_regexp_match_trace_handler(nxt_trace_t *trace,
     nxt_trace_data_t *td, u_char *start);
 static njs_ret_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
-    u_char *string, nxt_regex_match_data_t *match_data, nxt_uint_t utf8);
+    njs_utf8_t utf8, u_char *string, nxt_regex_match_data_t *match_data);
 static njs_ret_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value,
     u_char *start, uint32_t size, int32_t length);
 
@@ -539,7 +539,8 @@ static njs_ret_t
 njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *value)
 {
     u_char                *source;
-    size_t                length, size;
+    int32_t               length;
+    uint32_t              size;
     njs_regexp_pattern_t  *pattern;
 
     pattern = value->data.u.regexp->pattern;
@@ -558,7 +559,8 @@ njs_regexp_prototype_to_string(njs_vm_t 
     nxt_uint_t nargs, njs_index_t unused)
 {
     u_char                *source;
-    size_t                length, size;
+    int32_t               length;
+    uint32_t              size;
     njs_regexp_pattern_t  *pattern;
 
     pattern = args[0].data.u.regexp->pattern;
@@ -624,10 +626,11 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
     njs_index_t unused)
 {
     njs_ret_t               ret;
-    nxt_uint_t              n, utf8;
+    njs_utf8_t              utf8;
     njs_value_t             *value;
     njs_regexp_t            *regexp;
     njs_string_prop_t       string;
+    njs_regexp_utf8_t       type;
     njs_regexp_pattern_t    *pattern;
     nxt_regex_match_data_t  *match_data;
 
@@ -648,38 +651,35 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
 
     (void) njs_string_prop(&string, value);
 
-    /* Byte string. */
-    utf8 = 0;
-    n = 0;
+    utf8 = NJS_STRING_BYTE;
+    type = NJS_REGEXP_BYTE;
 
     if (string.length != 0) {
-        /* ASCII string. */
-        utf8 = 1;
-        n = 1;
+        utf8 = NJS_STRING_ASCII;
+        type = NJS_REGEXP_UTF8;
 
         if (string.length != string.size) {
-            /* UTF-8 string. */
-            utf8 = 2;
+            utf8 = NJS_STRING_UTF8;
         }
     }
 
     pattern = regexp->pattern;
 
-    if (nxt_regex_is_valid(&pattern->regex[n])) {
+    if (nxt_regex_is_valid(&pattern->regex[type])) {
         string.start += regexp->last_index;
         string.size -= regexp->last_index;
 
-        match_data = nxt_regex_match_data(&pattern->regex[n],
+        match_data = nxt_regex_match_data(&pattern->regex[type],
                                           vm->regex_context);
         if (nxt_slow_path(match_data == NULL)) {
             return NXT_ERROR;
         }
 
-        ret = njs_regexp_match(vm, &pattern->regex[n], string.start,
+        ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
                                string.size, match_data);
         if (ret >= 0) {
-            return njs_regexp_exec_result(vm, regexp, string.start, match_data,
-                                          utf8);
+            return njs_regexp_exec_result(vm, regexp, utf8, string.start,
+                                          match_data);
         }
 
         if (nxt_slow_path(ret != NXT_REGEX_NOMATCH)) {
@@ -697,8 +697,8 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
 
 
 static njs_ret_t
-njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, u_char *string,
-    nxt_regex_match_data_t *match_data, nxt_uint_t utf8)
+njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8,
+    u_char *string, nxt_regex_match_data_t *match_data)
 {
     int                 *captures;
     u_char              *start;
@@ -726,20 +726,10 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
             start = &string[captures[n]];
             size = captures[n + 1] - captures[n];
 
-            switch (utf8) {
-            case 0:
-                length = 0;
-                break;
-            case 1:
-                length = size;
-                break;
-            default:
-                length = nxt_utf8_length(start, size);
-                break;
-            }
+            length = njs_string_length(utf8, start, size);
 
-            ret = njs_regexp_string_create(vm, &array->start[i],
-                                           start, size, length);
+            ret = njs_regexp_string_create(vm, &array->start[i], start, size,
+                                           length);
             if (nxt_slow_path(ret != NXT_OK)) {
                 goto fail;
             }
@@ -812,13 +802,9 @@ static njs_ret_t
 njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
     uint32_t size, int32_t length)
 {
-    if (nxt_fast_path(length >= 0)) {
-        return njs_string_create(vm, value, start, size, length);
-    }
+    length = (length >= 0) ? length : 0;
 
-    vm->exception = &njs_exception_internal_error;
-
-    return NXT_ERROR;
+    return njs_string_create(vm, value, start, size, length);
 }
 
 
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_string.c
--- a/njs/njs_string.c	Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_string.c	Mon Sep 26 14:01:45 2016 +0300
@@ -88,7 +88,7 @@ static nxt_noinline ssize_t njs_string_i
 static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
     njs_regexp_pattern_t *pattern);
 static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
-    u_char *start, size_t size, nxt_uint_t utf8);
+    njs_utf8_t utf8, u_char *start, size_t size);
 static njs_ret_t njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args,
     njs_string_replace_t *r);
 static njs_ret_t njs_string_replace_regexp_function(njs_vm_t *vm,
@@ -1609,8 +1609,9 @@ njs_string_match_multiple(njs_vm_t *vm, 
     u_char             *start;
     int32_t            size, length;
     njs_ret_t          ret;
-    nxt_uint_t         n, utf8;
+    njs_utf8_t         utf8;
     njs_array_t        *array;
+    njs_regexp_utf8_t  type;
     njs_string_prop_t  string;
 
     args[1].data.u.regexp->last_index = 0;
@@ -1618,26 +1619,23 @@ njs_string_match_multiple(njs_vm_t *vm, 
 
     (void) njs_string_prop(&string, &args[0]);
 
-    /* Byte string. */
-    utf8 = 0;
-    n = 0;
+    utf8 = NJS_STRING_BYTE;
+    type = NJS_REGEXP_BYTE;
 
     if (string.length != 0) {
-        /* ASCII string. */
-        utf8 = 1;
-        n = 1;
+        utf8 = NJS_STRING_ASCII;
+        type = NJS_REGEXP_UTF8;
 
         if (string.length != string.size) {
-            /* UTF-8 string. */
-            utf8 = 2;
+            utf8 = NJS_STRING_UTF8;
         }
     }
 
-    if (nxt_regex_is_valid(&pattern->regex[n])) {
+    if (nxt_regex_is_valid(&pattern->regex[type])) {
         array = NULL;
 
         do {
-            ret = njs_regexp_match(vm, &pattern->regex[n], string.start,
+            ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
                                    string.size, vm->single_match_data);
             if (ret >= 0) {
                 if (array != NULL) {
@@ -1667,25 +1665,7 @@ njs_string_match_multiple(njs_vm_t *vm, 
 
                 size = captures[1] - captures[0];
 
-                switch (utf8) {
-                case 0:
-                    length = 0;
-                    break;
-
-                case 1:
-                    length = size;
-                    break;
-
-                default:
-                    length = nxt_utf8_length(start, size);
-
-                    if (nxt_slow_path(length < 0)) {
-                        vm->exception = &njs_exception_internal_error;
-                        return NXT_ERROR;
-                    }
-
-                    break;
-                }
+                length = njs_string_length(utf8, start, size);
 
                 ret = njs_string_create(vm, &array->start[array->length],
                                         start, size, length);
@@ -1721,9 +1701,10 @@ njs_string_prototype_split(njs_vm_t *vm,
     u_char                *p, *start, *next;
     size_t                size;
     uint32_t              limit;
-    nxt_uint_t            n, utf8;
+    njs_utf8_t            utf8;
     njs_array_t           *array;
     const u_char          *end;
+    njs_regexp_utf8_t     type;
     njs_string_prop_t     string, split;
     njs_regexp_pattern_t  *pattern;
 
@@ -1751,18 +1732,15 @@ njs_string_prototype_split(njs_vm_t *vm,
             goto single;
         }
 
-        /* Byte string. */
-        utf8 = 0;
-        n = 0;
+        utf8 = NJS_STRING_BYTE;
+        type = NJS_REGEXP_BYTE;
 
         if (string.length != 0) {
-            /* ASCII string. */
-            utf8 = 1;
+            utf8 = NJS_STRING_ASCII;
+            type = NJS_REGEXP_UTF8;
 
             if (string.length != string.size) {
-                /* UTF-8 string. */
-                utf8 = 2;
-                n = 1;
+                utf8 = NJS_STRING_UTF8;
             }
         }
 
@@ -1795,7 +1773,7 @@ njs_string_prototype_split(njs_vm_t *vm,
 
                 size = p - start;
 
-                ret = njs_string_split_part_add(vm, array, start, size, utf8);
+                ret = njs_string_split_part_add(vm, array, utf8, start, size);
                 if (nxt_slow_path(ret != NXT_OK)) {
                     return ret;
                 }
@@ -1810,7 +1788,7 @@ njs_string_prototype_split(njs_vm_t *vm,
         case NJS_REGEXP:
             pattern = args[1].data.u.regexp->pattern;
 
-            if (!nxt_regex_is_valid(&pattern->regex[n])) {
+            if (!nxt_regex_is_valid(&pattern->regex[type])) {
                 goto single;
             }
 
@@ -1818,7 +1796,7 @@ njs_string_prototype_split(njs_vm_t *vm,
             end = string.start + string.size;
 
             do {
-                ret = njs_regexp_match(vm, &pattern->regex[n], start,
+                ret = njs_regexp_match(vm, &pattern->regex[type], start,
                                        end - start, vm->single_match_data);
                 if (ret >= 0) {
                     captures = nxt_regex_captures(vm->single_match_data);
@@ -1842,7 +1820,7 @@ njs_string_prototype_split(njs_vm_t *vm,
 
                 size = p - start;
 
-                ret = njs_string_split_part_add(vm, array, start, size, utf8);
+                ret = njs_string_split_part_add(vm, array, utf8, start, size);
                 if (nxt_slow_path(ret != NXT_OK)) {
                     return ret;
                 }
@@ -1876,28 +1854,12 @@ done:
 
 
 static njs_ret_t
-njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
-    size_t size, nxt_uint_t utf8)
+njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, njs_utf8_t utf8,
+    u_char *start, size_t size)
 {
     ssize_t  length;
 
-    switch (utf8) {
-    case 0:
-        length = 0;
-        break;
-
-    case 1:
-        length = size;
-        break;
-
-    default:
-        length = nxt_utf8_length(start, size);
-
-        if (nxt_slow_path(length < 0)) {
-            vm->exception = &njs_exception_internal_error;
-            return NXT_ERROR;
-        }
-    }
+    length = njs_string_length(utf8, start, size);
 
     return njs_array_string_add(vm, array, start, size, length);
 }
@@ -2141,7 +2103,7 @@ njs_string_replace_regexp_function(njs_v
         size = captures[k + 1] - captures[k];
         k += 2;
 
-        length = njs_string_length(start, size, r->utf8);
+        length = njs_string_length(r->utf8, start, size);
 
         ret = njs_string_create(vm, &arguments[i], start, size, length);
         if (nxt_slow_path(ret != NXT_OK)) {
@@ -2153,7 +2115,7 @@ njs_string_replace_regexp_function(njs_v
     njs_number_set(&arguments[n + 1], captures[0]);
 
     /* The whole string being examined. */
-    length = njs_string_length(r->part[0].start, r->part[0].size, r->utf8);
+    length = njs_string_length(r->utf8, r->part[0].start, r->part[0].size);
 
     ret = njs_string_create(vm, &arguments[n + 2], r->part[0].start,
                             r->part[0].size, length);
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_string.h
--- a/njs/njs_string.h	Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_string.h	Mon Sep 26 14:01:45 2016 +0300
@@ -89,7 +89,7 @@ typedef enum {
 
 
 nxt_inline uint32_t
-njs_string_length(u_char *start, size_t size, njs_utf8_t utf8)
+njs_string_length(njs_utf8_t utf8, u_char *start, size_t size)
 {
     ssize_t  length;
 
diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_vm.c
--- a/njs/njs_vm.c	Mon Sep 26 14:01:39 2016 +0300
+++ b/njs/njs_vm.c	Mon Sep 26 14:01:45 2016 +0300
@@ -3334,7 +3334,7 @@ njs_value_string_copy(njs_vm_t *vm, nxt_
 void
 njs_vm_throw_exception(njs_vm_t *vm, u_char *buf, uint32_t size)
 {
-    uint32_t     length;
+    int32_t      length;
     njs_value_t  *value;
 
     value = nxt_mem_cache_alloc(vm->mem_cache_pool, sizeof(njs_value_t));
@@ -3343,6 +3343,7 @@ njs_vm_throw_exception(njs_vm_t *vm, u_c
         vm->exception = value;
 
         length = nxt_utf8_length(buf, size);
+        length = (length >= 0) ? length : 0;
 
         (void) njs_string_new(vm, value, buf, size, length);
     }



More information about the nginx-devel mailing list