[njs] Fixed RegExp() instance properties.

Dmitry Volyntsev xeioex at nginx.com
Fri Dec 6 12:17:14 UTC 2019


details:   https://hg.nginx.org/njs/rev/990a4b7a4053
branches:  
changeset: 1278:990a4b7a4053
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Fri Dec 06 14:44:11 2019 +0300
description:
Fixed RegExp() instance properties.

1) "lastIndex" is the only own property descriptor.
2) "lastIndex" property descriptor is writable.
3) "lastIndex" is coersed to integer in RegExpBuiltinExec() and
   can be any value (not only positive integers).

diffstat:

 src/njs_array.c          |   13 +----
 src/njs_builtin.c        |    6 ++
 src/njs_object.h         |   19 +++++++
 src/njs_regexp.c         |  122 ++++++++++++++++++++++++++++++++++++----------
 src/njs_regexp.h         |    1 +
 src/njs_string.c         |    2 +-
 src/njs_value.h          |    2 +-
 src/njs_vm.h             |    2 +
 src/test/njs_unit_test.c |   42 ++++++++++++++++
 9 files changed, 170 insertions(+), 39 deletions(-)

diffs (433 lines):

diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_array.c
--- a/src/njs_array.c	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_array.c	Fri Dec 06 14:44:11 2019 +0300
@@ -342,21 +342,12 @@ njs_array_length(njs_vm_t *vm,njs_object
     proto = njs_object(value);
 
     if (njs_fast_path(setval == NULL)) {
-        do {
-            if (njs_fast_path(proto->type == NJS_ARRAY)) {
-                break;
-            }
-
-            proto = proto->__proto__;
-        } while (proto != NULL);
-
-        if (njs_slow_path(proto == NULL)) {
+        array = njs_object_proto_lookup(proto, NJS_ARRAY, njs_array_t);
+        if (njs_slow_path(array == NULL)) {
             njs_set_undefined(retval);
             return NJS_DECLINED;
         }
 
-        array = (njs_array_t *) proto;
-
         njs_set_number(retval, array->length);
         return NJS_OK;
     }
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_builtin.c
--- a/src/njs_builtin.c	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_builtin.c	Fri Dec 06 14:44:11 2019 +0300
@@ -166,6 +166,12 @@ njs_builtin_objects_create(njs_vm_t *vm)
         return NJS_ERROR;
     }
 
+    ret = njs_object_hash_init(vm, &shared->regexp_instance_hash,
+                               &njs_regexp_instance_init);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return NJS_ERROR;
+    }
+
     object = shared->objects;
 
     for (p = njs_object_init; *p != NULL; p++) {
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_object.h
--- a/src/njs_object.h	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_object.h	Fri Dec 06 14:44:11 2019 +0300
@@ -259,6 +259,25 @@ njs_object_string_tag(njs_vm_t *vm, njs_
 }
 
 
+njs_inline njs_object_t *
+_njs_object_proto_lookup(njs_object_t *proto, njs_value_type_t type)
+{
+    do {
+        if (njs_fast_path(proto->type == type)) {
+            break;
+        }
+
+        proto = proto->__proto__;
+    } while (proto != NULL);
+
+    return proto;
+}
+
+
+#define njs_object_proto_lookup(proto, vtype, ctype)                         \
+    (ctype *) _njs_object_proto_lookup(proto, vtype)
+
+
 extern const njs_object_type_init_t  njs_obj_type_init;
 
 
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_regexp.c
--- a/src/njs_regexp.c	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_regexp.c	Fri Dec 06 14:44:11 2019 +0300
@@ -29,7 +29,8 @@ static u_char *njs_regexp_compile_trace_
 static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace,
     njs_trace_data_t *td, u_char *start);
 static njs_int_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
-    njs_utf8_t utf8, u_char *string, njs_regex_match_data_t *match_data);
+    njs_utf8_t utf8, u_char *string, njs_regex_match_data_t *match_data,
+    uint32_t last_index);
 static njs_int_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value,
     u_char *start, uint32_t size, int32_t length);
 
@@ -707,13 +708,14 @@ njs_regexp_alloc(njs_vm_t *vm, njs_regex
 
     if (njs_fast_path(regexp != NULL)) {
         njs_lvlhsh_init(&regexp->object.hash);
-        njs_lvlhsh_init(&regexp->object.shared_hash);
+        regexp->object.shared_hash = vm->shared->regexp_instance_hash;
         regexp->object.__proto__ = &vm->prototypes[NJS_OBJ_TYPE_REGEXP].object;
         regexp->object.type = NJS_REGEXP;
         regexp->object.shared = 0;
         regexp->object.extensible = 1;
-        regexp->last_index = 0;
+        njs_set_number(&regexp->last_index, 0);
         regexp->pattern = pattern;
+        njs_string_short_set(&regexp->string, 0, 0);
         return regexp;
     }
 
@@ -724,20 +726,42 @@ njs_regexp_alloc(njs_vm_t *vm, njs_regex
 
 
 static njs_int_t
-njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *prop,
+njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *unused,
     njs_value_t *value, njs_value_t *setval, njs_value_t *retval)
 {
-    uint32_t           index;
+    uint32_t           index, last_index;
     njs_regexp_t       *regexp;
     njs_string_prop_t  string;
 
-    njs_release(vm, value);
+    regexp = njs_object_proto_lookup(njs_object(value), NJS_REGEXP,
+                                     njs_regexp_t);
+    if (njs_slow_path(regexp == NULL)) {
+        njs_set_undefined(retval);
+        return NJS_DECLINED;
+    }
 
-    regexp = njs_regexp(value);
+    if (setval != NULL) {
+        regexp->last_index = *setval;
+        *retval  = *setval;
+
+        return NJS_OK;
+    }
+
+    if (njs_slow_path(!njs_is_number(&regexp->last_index))) {
+        *retval = regexp->last_index;
+        return NJS_OK;
+    }
 
     (void) njs_string_prop(&string, &regexp->string);
 
-    index = njs_string_index(&string, regexp->last_index);
+    last_index = njs_number(&regexp->last_index);
+
+    if (njs_slow_path(string.size < last_index)) {
+        *retval = regexp->last_index;
+        return NJS_OK;
+    }
+
+    index = njs_string_index(&string, last_index);
     njs_set_number(retval, index);
 
     return NJS_OK;
@@ -843,9 +867,12 @@ static njs_int_t
 njs_regexp_prototype_test(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
     njs_index_t unused)
 {
-    njs_int_t               ret;
+    int                     *captures;
+    uint32_t                last_index;
+    njs_int_t               ret, match;
     njs_uint_t              n;
     njs_regex_t             *regex;
+    njs_regexp_t            *regexp;
     njs_value_t             *value, lvalue;
     const njs_value_t       *retval;
     njs_string_prop_t       string;
@@ -872,6 +899,7 @@ njs_regexp_prototype_test(njs_vm_t *vm, 
 
     n = (string.length != 0);
 
+    regexp = njs_regexp(njs_argument(args, 0));
     pattern = njs_regexp_pattern(&args[0]);
 
     regex = &pattern->regex[n];
@@ -886,15 +914,32 @@ njs_regexp_prototype_test(njs_vm_t *vm, 
             }
         }
 
-        ret = njs_regexp_match(vm, regex, string.start, string.size,
+        match = njs_regexp_match(vm, regex, string.start, string.size,
                                match_data);
-        if (ret >= 0) {
+        if (match >= 0) {
             retval = &njs_value_true;
 
-        } else if (ret != NJS_REGEX_NOMATCH) {
+        } else if (match != NJS_REGEX_NOMATCH) {
             ret = NJS_ERROR;
             goto done;
         }
+
+        if (pattern->global) {
+            ret = njs_value_to_length(vm, &regexp->last_index, &last_index);
+            if (njs_slow_path(ret != NJS_OK)) {
+                return NJS_ERROR;
+            }
+
+            if (match >= 0) {
+                captures = njs_regex_captures(match_data);
+                last_index += captures[1];
+
+            } else {
+                last_index = 0;
+            }
+
+            njs_set_number(&regexp->last_index, last_index);
+        }
     }
 
     ret = NJS_OK;
@@ -915,6 +960,7 @@ njs_int_t
 njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
     njs_index_t unused)
 {
+    uint32_t                last_index;
     njs_int_t               ret;
     njs_utf8_t              utf8;
     njs_value_t             *value, lvalue;
@@ -940,10 +986,20 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
 
     regexp = njs_regexp(&args[0]);
     regexp->string = *value;
+    pattern = regexp->pattern;
+
+    ret = njs_value_to_length(vm, &regexp->last_index, &last_index);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return NJS_ERROR;
+    }
+
+    if (!pattern->global) {
+        last_index = 0;
+    }
 
     (void) njs_string_prop(&string, value);
 
-    if (string.size >= regexp->last_index) {
+    if (string.size >= last_index) {
         utf8 = NJS_STRING_BYTE;
         type = NJS_REGEXP_BYTE;
 
@@ -959,8 +1015,8 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
         pattern = regexp->pattern;
 
         if (njs_regex_is_valid(&pattern->regex[type])) {
-            string.start += regexp->last_index;
-            string.size -= regexp->last_index;
+            string.start += last_index;
+            string.size -= last_index;
 
             match_data = njs_regex_match_data(&pattern->regex[type],
                                               vm->regex_context);
@@ -973,7 +1029,7 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
                                    string.size, match_data);
             if (ret >= 0) {
                 return njs_regexp_exec_result(vm, regexp, utf8, string.start,
-                                              match_data);
+                                              match_data, last_index);
             }
 
             if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
@@ -984,7 +1040,10 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
         }
     }
 
-    regexp->last_index = 0;
+    if (pattern->global) {
+        njs_set_number(&regexp->last_index, 0);
+    }
+
     vm->retval = njs_value_null;
 
     return NJS_OK;
@@ -993,7 +1052,7 @@ njs_regexp_prototype_exec(njs_vm_t *vm, 
 
 static njs_int_t
 njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8,
-    u_char *string, njs_regex_match_data_t *match_data)
+    u_char *string, njs_regex_match_data_t *match_data, uint32_t last_index)
 {
     int                 *captures;
     u_char              *start;
@@ -1045,10 +1104,10 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
 
     /* TODO: Non UTF-8 position */
 
-    njs_set_number(&prop->value, regexp->last_index + captures[0]);
+    njs_set_number(&prop->value, last_index + captures[0]);
 
     if (regexp->pattern->global) {
-        regexp->last_index += captures[1];
+        njs_set_number(&regexp->last_index, last_index + captures[1]);
     }
 
     lhq.key_hash = NJS_INDEX_HASH;
@@ -1206,12 +1265,6 @@ static const njs_object_prop_t  njs_rege
 
     {
         .type = NJS_PROPERTY_HANDLER,
-        .name = njs_string("lastIndex"),
-        .value = njs_prop_handler(njs_regexp_prototype_last_index),
-    },
-
-    {
-        .type = NJS_PROPERTY_HANDLER,
         .name = njs_string("global"),
         .value = njs_prop_handler(njs_regexp_prototype_global),
         .configurable = 1,
@@ -1264,6 +1317,23 @@ static const njs_object_prop_t  njs_rege
 };
 
 
+const njs_object_prop_t  njs_regexp_instance_properties[] =
+{
+    {
+        .type = NJS_PROPERTY_HANDLER,
+        .name = njs_string("lastIndex"),
+        .value = njs_prop_handler(njs_regexp_prototype_last_index),
+        .writable = 1,
+    },
+};
+
+
+const njs_object_init_t  njs_regexp_instance_init = {
+    njs_regexp_instance_properties,
+    njs_nitems(njs_regexp_instance_properties),
+};
+
+
 const njs_object_init_t  njs_regexp_prototype_init = {
     njs_regexp_prototype_properties,
     njs_nitems(njs_regexp_prototype_properties),
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_regexp.h
--- a/src/njs_regexp.h	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_regexp.h	Fri Dec 06 14:44:11 2019 +0300
@@ -33,6 +33,7 @@ njs_int_t njs_regexp_to_string(njs_vm_t 
     const njs_value_t *regexp);
 
 
+extern const njs_object_init_t  njs_regexp_instance_init;
 extern const njs_object_type_init_t  njs_regexp_type_init;
 
 
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_string.c
--- a/src/njs_string.c	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_string.c	Fri Dec 06 14:44:11 2019 +0300
@@ -3166,7 +3166,7 @@ njs_string_match_multiple(njs_vm_t *vm, 
     njs_regexp_utf8_t  type;
     njs_string_prop_t  string;
 
-    args[1].data.u.regexp->last_index = 0;
+    njs_set_number(&args[1].data.u.regexp->last_index, 0);
     vm->retval = njs_value_null;
 
     (void) njs_string_prop(&string, &args[0]);
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_value.h
--- a/src/njs_value.h	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_value.h	Fri Dec 06 14:44:11 2019 +0300
@@ -262,7 +262,7 @@ struct njs_function_s {
 
 struct njs_regexp_s {
     njs_object_t                      object;
-    uint32_t                          last_index;
+    njs_value_t                       last_index;
     njs_regexp_pattern_t              *pattern;
     /*
      * This string value can be unaligned since
diff -r ef64784b77f6 -r 990a4b7a4053 src/njs_vm.h
--- a/src/njs_vm.h	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/njs_vm.h	Fri Dec 06 14:44:11 2019 +0300
@@ -247,11 +247,13 @@ typedef struct {
 struct njs_vm_shared_s {
     njs_lvlhsh_t             keywords_hash;
     njs_lvlhsh_t             values_hash;
+
     njs_lvlhsh_t             array_instance_hash;
     njs_lvlhsh_t             string_instance_hash;
     njs_lvlhsh_t             function_instance_hash;
     njs_lvlhsh_t             arrow_instance_hash;
     njs_lvlhsh_t             arguments_object_instance_hash;
+    njs_lvlhsh_t             regexp_instance_hash;
 
     njs_lvlhsh_t             env_hash;
 
diff -r ef64784b77f6 -r 990a4b7a4053 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c	Tue Dec 03 18:02:40 2019 +0300
+++ b/src/test/njs_unit_test.c	Fri Dec 06 14:44:11 2019 +0300
@@ -8591,6 +8591,48 @@ static njs_unit_test_t  njs_test[] =
                  "r.lastIndex +' '+ r.source +' '+ r.source.length +' '+ r"),
       njs_str("1 \\x80 4 /\\x80/g") },
 
+    { njs_str("var descs = Object.getOwnPropertyDescriptors(RegExp('a'));"
+              "Object.keys(descs)"),
+      njs_str("lastIndex") },
+
+    { njs_str("var props = Object.getOwnPropertyDescriptor(RegExp('a'), 'lastIndex');"
+              "props.writable && !props.enumerable && !props.configurable"),
+      njs_str("true") },
+
+    { njs_str("var re = /a/; re.lastIndex"),
+      njs_str("0") },
+
+    { njs_str("var re = /aα/g; re.exec('aα'.repeat(32)); re.lastIndex"),
+      njs_str("2") },
+
+    { njs_str("var re = new RegExp('α'.repeat(33), 'g'); re.exec('α'.repeat(33)); re.lastIndex"),
+      njs_str("33") },
+
+    { njs_str("var re = new RegExp('α'.repeat(33), 'g'); re.exec('α'.repeat(33)); "
+              "re.lastIndex = 67; re.lastIndex"),
+      njs_str("67") },
+
+    { njs_str("var re = /a/; re.lastIndex = 4; Object.create(re).lastIndex"),
+      njs_str("4") },
+
+    { njs_str("var re = /a/g; re.lastIndex = {valueOf(){throw 'Oops'}}; typeof re.lastIndex"),
+      njs_str("object") },
+
+    { njs_str("var re = /a/g; re.lastIndex = {valueOf(){throw 'Oops'}}; re.exec('a')"),
+      njs_str("Oops") },
+
+    { njs_str("var re = /a/; Object.defineProperty(re, 'lastIndex', {value:'qq'}); re.lastIndex"),
+      njs_str("qq") },
+
+    { njs_str("var re = /a/; re.lastIndex = 'qq'; Object.create(re).lastIndex"),
+      njs_str("qq") },
+
+    { njs_str("var re = /(?:ab|cd)\\d?/g; re.lastIndex=-1; re.test('aacd22 '); re.lastIndex"),
+      njs_str("5") },
+
+    { njs_str("var re = /(?:ab|cd)\\d?/g; re.lastIndex=-1; re.test('@@'); re.lastIndex"),
+      njs_str("0") },
+
     /*
      * It seems that "/стоп/ig" fails on early PCRE versions.
      * It fails at least in 8.1 and works at least in 8.31.


More information about the nginx-devel mailing list