[njs] String.split() method.

Igor Sysoev igor at sysoev.ru
Tue Jul 5 13:12:41 UTC 2016


details:   http://hg.nginx.org/njs/rev/41404eee5063
branches:  
changeset: 114:41404eee5063
user:      Igor Sysoev <igor at sysoev.ru>
date:      Tue Jun 28 19:28:00 2016 +0300
description:
String.split() method.

diffstat:

 njs/njs_array.c          |   62 +++++------------
 njs/njs_array.h          |    2 +
 njs/njs_string.c         |  161 +++++++++++++++++++++++++++++++++++++++++++++++
 njs/njscript.h           |    3 -
 njs/test/njs_unit_test.c |   42 ++++++++++++
 5 files changed, 223 insertions(+), 47 deletions(-)

diffs (333 lines):

diff -r c72fd0d1fabc -r 41404eee5063 njs/njs_array.c
--- a/njs/njs_array.c	Wed Jun 01 15:31:34 2016 +0300
+++ b/njs/njs_array.c	Tue Jun 28 19:28:00 2016 +0300
@@ -63,50 +63,6 @@ static nxt_noinline njs_ret_t njs_array_
     njs_array_next_t *next, njs_value_t *args, nxt_uint_t nargs);
 
 
-njs_value_t *
-njs_array_add(njs_vm_t *vm, njs_value_t *value, u_char *start, size_t size)
-{
-    njs_ret_t    ret;
-    njs_array_t  *array;
-
-    if (value != NULL) {
-        array = value->data.u.array;
-
-        if (array->size == array->length) {
-            ret = njs_array_realloc(vm, array, 0, array->size + 1);
-            if (nxt_slow_path(ret != NXT_OK)) {
-                return NULL;
-            }
-        }
-
-    } else {
-        value = nxt_mem_cache_align(vm->mem_cache_pool, sizeof(njs_value_t),
-                                    sizeof(njs_value_t));
-
-        if (nxt_slow_path(value == NULL)) {
-            return NULL;
-        }
-
-        array = njs_array_alloc(vm, 0, NJS_ARRAY_SPARE);
-        if (nxt_slow_path(array == NULL)) {
-            return NULL;
-        }
-
-        value->data.u.array = array;
-        value->type = NJS_ARRAY;
-        value->data.truth = 1;
-    }
-
-    ret = njs_string_create(vm, &array->start[array->length++], start, size, 0);
-
-    if (nxt_fast_path(ret == NXT_OK)) {
-        return value;
-    }
-
-    return NULL;
-}
-
-
 nxt_noinline njs_array_t *
 njs_array_alloc(njs_vm_t *vm, uint32_t length, uint32_t spare)
 {
@@ -139,6 +95,24 @@ njs_array_alloc(njs_vm_t *vm, uint32_t l
 
 
 njs_ret_t
+njs_array_string_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
+    size_t size, size_t length)
+{
+    njs_ret_t  ret;
+
+    if (array->size == array->length) {
+        ret = njs_array_realloc(vm, array, 0, array->size + 1);
+        if (nxt_slow_path(ret != NXT_OK)) {
+            return ret;
+        }
+    }
+
+    return njs_string_create(vm, &array->start[array->length++],
+                            start, size, length);
+}
+
+
+njs_ret_t
 njs_array_realloc(njs_vm_t *vm, njs_array_t *array, uint32_t prepend,
     uint32_t size)
 {
diff -r c72fd0d1fabc -r 41404eee5063 njs/njs_array.h
--- a/njs/njs_array.h	Wed Jun 01 15:31:34 2016 +0300
+++ b/njs/njs_array.h	Tue Jun 28 19:28:00 2016 +0300
@@ -21,6 +21,8 @@ struct njs_array_s {
 
 
 njs_array_t *njs_array_alloc(njs_vm_t *vm, uint32_t length, uint32_t spare);
+njs_ret_t njs_array_string_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
+    size_t size, size_t length);
 njs_ret_t njs_array_realloc(njs_vm_t *vm, njs_array_t *array, uint32_t prepend,
     uint32_t size);
 njs_ret_t njs_array_constructor(njs_vm_t *vm, njs_value_t *args,
diff -r c72fd0d1fabc -r 41404eee5063 njs/njs_string.c
--- a/njs/njs_string.c	Wed Jun 01 15:31:34 2016 +0300
+++ b/njs/njs_string.c	Tue Jun 28 19:28:00 2016 +0300
@@ -1645,6 +1645,160 @@ empty:
 }
 
 
+/*
+ * String.split([string|regexp[, limit]])
+ */
+
+static njs_ret_t
+njs_string_prototype_split(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
+    njs_index_t unused)
+{
+    int                   ret, *captures;
+    u_char                *p, *start, *next;
+    size_t                size, length;
+    uint32_t              limit;
+    nxt_uint_t            n;
+    njs_array_t           *array;
+    const u_char          *end;
+    njs_string_prop_t     string, split;
+    njs_regexp_pattern_t  *pattern;
+
+    array = njs_array_alloc(vm, 0, NJS_ARRAY_SPARE);
+    if (nxt_slow_path(array == NULL)) {
+        return NXT_ERROR;
+    }
+
+    if (nargs > 1) {
+
+        if (nargs > 2) {
+            limit = args[2].data.u.number;
+
+            if (limit == 0) {
+                goto done;
+            }
+
+        } else {
+            limit = (uint32_t) -1;
+        }
+
+        switch (args[1].type) {
+
+        case NJS_STRING:
+            (void) njs_string_prop(&split, &args[1]);
+
+            length = njs_string_prop(&string, &args[0]);
+
+            if (string.size < split.size) {
+                goto single;
+            }
+
+            start = string.start;
+            end = string.start + string.size;
+
+            do {
+                for (p = start; p < end; p++) {
+                    if (memcmp(p, split.start, split.size) == 0) {
+                        break;
+                    }
+                }
+
+                next = p + split.size;
+
+                /* Empty split string. */
+                if (p == next) {
+                    p++;
+                    next++;
+                }
+
+                size = p - start;
+                length = nxt_utf8_length(start, size);
+
+                ret = njs_array_string_add(vm, array, start, size, length);
+                if (nxt_slow_path(ret != NXT_OK)) {
+                    return ret;
+                }
+
+                start = next;
+                limit--;
+
+            } while (limit != 0 && p < end);
+
+            goto done;
+
+        case NJS_REGEXP:
+            pattern = args[1].data.u.regexp->pattern;
+
+            (void) njs_string_prop(&string, &args[0]);
+
+            n = (string.length != 0 && string.length != string.size);
+
+            if (!nxt_regex_is_valid(&pattern->regex[n])) {
+                goto single;
+            }
+
+            start = string.start;
+            end = string.start + string.size;
+
+            do {
+                ret = nxt_regex_match(&pattern->regex[n], start, end - start,
+                                      vm->single_match_data, vm->regex_context);
+                if (ret >= 0) {
+                    captures = nxt_regex_captures(vm->single_match_data);
+
+                    p = start + captures[0];
+                    next = start + captures[1];
+
+                } else if (ret == NGX_REGEX_NOMATCH) {
+                    p = (u_char *) end;
+                    next = (u_char *) end + 1;
+
+                } else {
+                    return njs_string_exception(vm, NJS_INTERNAL_ERROR,
+                                                vm->regex_context->error);
+                }
+
+                /* Empty split regexp. */
+                if (p == next) {
+                    p++;
+                    next++;
+                }
+
+                size = p - start;
+                length = nxt_utf8_length(start, size);
+
+                ret = njs_array_string_add(vm, array, start, size, length);
+                if (nxt_slow_path(ret != NXT_OK)) {
+                    return ret;
+                }
+
+                start = next;
+                limit--;
+
+            } while (limit != 0 && p < end);
+
+            goto done;
+
+        default: /* NJS_VOID */
+            break;
+        }
+    }
+
+single:
+
+    /* GC: retain. */
+    array->start[0] = args[0];
+    array->length = 1;
+
+done:
+
+    vm->retval.data.u.array = array;
+    vm->retval.type = NJS_ARRAY;
+    vm->retval.data.truth = 1;
+
+    return NXT_OK;
+}
+
+
 njs_ret_t
 njs_primitive_value_to_string(njs_vm_t *vm, njs_value_t *dst,
     const njs_value_t *src)
@@ -1901,6 +2055,13 @@ static const njs_object_prop_t  njs_stri
         .value = njs_native_function(njs_string_prototype_match, 0,
                      NJS_STRING_ARG, NJS_REGEXP_ARG),
     },
+
+    {
+        .type = NJS_METHOD,
+        .name = njs_string("split"),
+        .value = njs_native_function(njs_string_prototype_split, 0,
+                     NJS_STRING_OBJECT_ARG, NJS_REGEXP_ARG, NJS_INTEGER_ARG),
+    },
 };
 
 
diff -r c72fd0d1fabc -r 41404eee5063 njs/njscript.h
--- a/njs/njscript.h	Wed Jun 01 15:31:34 2016 +0300
+++ b/njs/njscript.h	Tue Jun 28 19:28:00 2016 +0300
@@ -106,8 +106,5 @@ NXT_EXPORT void *njs_value_data(njs_valu
 NXT_EXPORT nxt_int_t njs_value_string_copy(njs_vm_t *vm, nxt_str_t *retval,
     njs_value_t *value, uintptr_t *next);
 
-NXT_EXPORT njs_value_t *njs_array_add(njs_vm_t *vm, njs_value_t *array,
-    u_char *start, size_t size);
-
 
 #endif /* _NJSCRIPT_H_INCLUDED_ */
diff -r c72fd0d1fabc -r 41404eee5063 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c	Wed Jun 01 15:31:34 2016 +0300
+++ b/njs/test/njs_unit_test.c	Tue Jun 28 19:28:00 2016 +0300
@@ -2924,6 +2924,48 @@ static njs_unit_test_t  njs_test[] =
                  "a +' '+ a.length"),
       nxt_string("αα 4") },
 
+    { nxt_string("'abc'.split()"),
+      nxt_string("abc") },
+
+    { nxt_string("'abc'.split(undefined)"),
+      nxt_string("abc") },
+
+    { nxt_string("''.split('').length"),
+      nxt_string("1") },
+
+    { nxt_string("'abc'.split('')"),
+      nxt_string("a,b,c") },
+
+    { nxt_string("'a bc def'.split(' ')"),
+      nxt_string("a,bc,def") },
+
+    { nxt_string("'a bc  def'.split(' ')"),
+      nxt_string("a,bc,,def") },
+
+    { nxt_string("'a bc  def'.split(' ', 3)"),
+      nxt_string("a,bc,") },
+
+    { nxt_string("'abc'.split('abc')"),
+      nxt_string(",") },
+
+    { nxt_string("'ab'.split('123')"),
+      nxt_string("ab") },
+
+    { nxt_string("''.split(/0/).length"),
+      nxt_string("1") },
+
+    { nxt_string("'abc'.split(/(?:)/)"),
+      nxt_string("a,b,c") },
+
+    { nxt_string("'a bc def'.split(/ /)"),
+      nxt_string("a,bc,def") },
+
+    { nxt_string("'a bc  def'.split(/ /)"),
+      nxt_string("a,bc,,def") },
+
+    { nxt_string("'abc'.split(/abc/)"),
+      nxt_string(",") },
+
     /* Functions. */
 
     { nxt_string("function f() { } f()"),


More information about the nginx-devel mailing list