[njs] Added support for numeric separators (ES12).

Valentin Bartenev vbart at nginx.com
Mon Aug 17 16:56:38 UTC 2020


details:   https://hg.nginx.org/njs/rev/4818a450f4e6
branches:  
changeset: 1495:4818a450f4e6
user:      Valentin Bartenev <vbart at nginx.com>
date:      Mon Aug 17 19:55:46 2020 +0300
description:
Added support for numeric separators (ES12).

diffstat:

 src/njs_json.c           |    2 +-
 src/njs_lexer.c          |   10 +++-
 src/njs_number.c         |   42 +++++++++++++-----
 src/njs_number.h         |    6 +-
 src/njs_parser.c         |    4 +-
 src/njs_string.c         |    6 +-
 src/njs_strtod.c         |   36 +++++++++++++++-
 src/njs_strtod.h         |    3 +-
 src/test/njs_unit_test.c |  101 ++++++++++++++++++++++++++++++++++++++++++----
 9 files changed, 173 insertions(+), 37 deletions(-)

diffs (518 lines):

diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_json.c
--- a/src/njs_json.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_json.c	Mon Aug 17 19:55:46 2020 +0300
@@ -796,7 +796,7 @@ njs_json_parse_number(njs_json_parse_ctx
     }
 
     start = p;
-    num = njs_number_dec_parse(&p, ctx->end);
+    num = njs_number_dec_parse(&p, ctx->end, 0);
     if (p != start) {
         njs_set_number(value, sign * num);
         return p;
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_lexer.c
--- a/src/njs_lexer.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_lexer.c	Mon Aug 17 19:55:46 2020 +0300
@@ -787,7 +787,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
                 goto illegal_token;
             }
 
-            token->number = njs_number_hex_parse(&p, lexer->end);
+            token->number = njs_number_hex_parse(&p, lexer->end, 1);
 
             goto done;
         }
@@ -830,16 +830,20 @@ njs_lexer_number(njs_lexer_t *lexer, njs
 
         /* Legacy Octal literals are deprecated. */
 
-        if (*p >= '0' && *p <= '9') {
+        if ((*p >= '0' && *p <= '9') || *p == '_') {
             goto illegal_trailer;
         }
     }
 
     p--;
-    token->number = njs_number_dec_parse(&p, lexer->end);
+    token->number = njs_number_dec_parse(&p, lexer->end, 1);
 
 done:
 
+    if (p[-1] == '_') {
+        p--;
+    }
+
     lexer->start = (u_char *) p;
     token->text.length = p - token->text.start;
 
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_number.c
--- a/src/njs_number.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_number.c	Mon Aug 17 19:55:46 2020 +0300
@@ -54,9 +54,10 @@ njs_key_to_index(const njs_value_t *valu
 
 
 double
-njs_number_dec_parse(const u_char **start, const u_char *end)
+njs_number_dec_parse(const u_char **start, const u_char *end,
+    njs_bool_t literal)
 {
-    return njs_strtod(start, end);
+    return njs_strtod(start, end, literal);
 }
 
 
@@ -65,22 +66,27 @@ njs_number_oct_parse(const u_char **star
 {
     u_char        c;
     uint64_t      num;
-    const u_char  *p;
+    const u_char  *p, *_;
 
     p = *start;
 
     num = 0;
+    _ = p - 1;
 
-    while (p < end) {
+    for (; p < end; p++) {
         /* Values less than '0' become >= 208. */
         c = *p - '0';
 
         if (njs_slow_path(c > 7)) {
+            if (*p == '_' && (p - _) > 1) {
+                _ = p;
+                continue;
+            }
+
             break;
         }
 
         num = num * 8 + c;
-        p++;
     }
 
     *start = p;
@@ -94,22 +100,27 @@ njs_number_bin_parse(const u_char **star
 {
     u_char        c;
     uint64_t      num;
-    const u_char  *p;
+    const u_char  *p, *_;
 
     p = *start;
 
     num = 0;
+    _ = p - 1;
 
-    while (p < end) {
+    for (; p < end; p++) {
         /* Values less than '0' become >= 208. */
         c = *p - '0';
 
         if (njs_slow_path(c > 1)) {
+            if (*p == '_' && (p - _) > 1) {
+                _ = p;
+                continue;
+            }
+
             break;
         }
 
         num = num * 2 + c;
-        p++;
     }
 
     *start = p;
@@ -119,24 +130,31 @@ njs_number_bin_parse(const u_char **star
 
 
 uint64_t
-njs_number_hex_parse(const u_char **start, const u_char *end)
+njs_number_hex_parse(const u_char **start, const u_char *end,
+    njs_bool_t literal)
 {
     uint64_t      num;
     njs_int_t     n;
-    const u_char  *p;
+    const u_char  *p, *_;
 
     p = *start;
 
     num = 0;
+    _ = p - 1;
 
-    while (p < end) {
+    for (; p < end; p++) {
         n = njs_char_to_hex(*p);
+
         if (njs_slow_path(n < 0)) {
+            if (literal && *p == '_' && (p - _) > 1) {
+                _ = p;
+                continue;
+            }
+
             break;
         }
 
         num = num * 16 + n;
-        p++;
     }
 
     *start = p;
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_number.h
--- a/src/njs_number.h	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_number.h	Mon Aug 17 19:55:46 2020 +0300
@@ -12,10 +12,12 @@
 
 
 double njs_key_to_index(const njs_value_t *value);
-double njs_number_dec_parse(const u_char **start, const u_char *end);
+double njs_number_dec_parse(const u_char **start, const u_char *end,
+    njs_bool_t literal);
 uint64_t njs_number_oct_parse(const u_char **start, const u_char *end);
 uint64_t njs_number_bin_parse(const u_char **start, const u_char *end);
-uint64_t njs_number_hex_parse(const u_char **start, const u_char *end);
+uint64_t njs_number_hex_parse(const u_char **start, const u_char *end,
+    njs_bool_t literal);
 int64_t njs_number_radix_parse(const u_char **start, const u_char *end,
     uint8_t radix);
 njs_int_t njs_number_to_string(njs_vm_t *vm, njs_value_t *string,
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_parser.c
--- a/src/njs_parser.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_parser.c	Mon Aug 17 19:55:46 2020 +0300
@@ -8078,7 +8078,7 @@ njs_parser_escape_string_create(njs_pars
         hex_end = src + hex_length;
 
     hex:
-        cp = njs_number_hex_parse(&src, hex_end);
+        cp = njs_number_hex_parse(&src, hex_end, 0);
 
         /* Skip '}' character. */
 
@@ -8219,7 +8219,7 @@ njs_parser_escape_string_calc_length(njs
     hex:
 
         ptr = src;
-        cp = njs_number_hex_parse(&src, hex_end);
+        cp = njs_number_hex_parse(&src, hex_end, 0);
 
         if (hex_length != 0) {
             if (src != hex_end) {
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_string.c
--- a/src/njs_string.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_string.c	Mon Aug 17 19:55:46 2020 +0300
@@ -3718,11 +3718,11 @@ njs_string_to_number(const njs_value_t *
         && p + 2 < end && p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
     {
         p += 2;
-        num = njs_number_hex_parse(&p, end);
+        num = njs_number_hex_parse(&p, end, 0);
 
     } else {
         start = p;
-        num = njs_number_dec_parse(&p, end);
+        num = njs_number_dec_parse(&p, end, 0);
 
         if (p == start) {
             if (p + infinity > end || memcmp(p, "Infinity", infinity) != 0) {
@@ -3821,7 +3821,7 @@ njs_string_to_index(const njs_value_t *v
         }
     }
 
-    num = njs_strtod(&p, end);
+    num = njs_strtod(&p, end, 0);
     if (p != end) {
         return NAN;
     }
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_strtod.c
--- a/src/njs_strtod.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_strtod.c	Mon Aug 17 19:55:46 2020 +0300
@@ -307,12 +307,12 @@ njs_strtod_internal(const u_char *start,
 
 
 double
-njs_strtod(const u_char **start, const u_char *end)
+njs_strtod(const u_char **start, const u_char *end, njs_bool_t literal)
 {
     int           exponent, exp, insignf;
     u_char        c, *pos;
     njs_bool_t    minus;
-    const u_char  *e, *p, *last;
+    const u_char  *e, *p, *last, *_;
     u_char        data[128];
 
     exponent = 0;
@@ -321,11 +321,25 @@ njs_strtod(const u_char **start, const u
     pos = data;
     last = data + sizeof(data);
 
-    for (p = *start; p < end; p++) {
+    p = *start;
+    _ = p - 2;
+
+    for (; p < end; p++) {
         /* Values less than '0' become >= 208. */
         c = *p - '0';
 
         if (njs_slow_path(c > 9)) {
+            if (literal) {
+                if ((p - _) == 1) {
+                    goto done;
+                }
+
+                if (*p == '_') {
+                    _ = p;
+                    continue;
+                }
+            }
+
             break;
         }
 
@@ -339,12 +353,18 @@ njs_strtod(const u_char **start, const u
 
     /* Do not emit a '.', but adjust the exponent instead. */
     if (p < end && *p == '.') {
+        _ = p;
 
         for (p++; p < end; p++) {
             /* Values less than '0' become >= 208. */
             c = *p - '0';
 
             if (njs_slow_path(c > 9)) {
+                if (literal && *p == '_' && (p - _) > 1) {
+                    _ = p;
+                    continue;
+                }
+
                 break;
             }
 
@@ -388,6 +408,11 @@ njs_strtod(const u_char **start, const u
                 c = *p - '0';
 
                 if (njs_slow_path(c > 9)) {
+                    if (literal && *p == '_' && (p - _) > 1) {
+                        _ = p;
+                        continue;
+                    }
+
                     break;
                 }
 
@@ -397,9 +422,14 @@ njs_strtod(const u_char **start, const u
             }
 
             exponent += minus ? -exp : exp;
+
+        } else if (literal && *e == '_') {
+            p = e;
         }
     }
 
+done:
+
     *start = p;
 
     exponent += insignf;
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/njs_strtod.h
--- a/src/njs_strtod.h	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/njs_strtod.h	Mon Aug 17 19:55:46 2020 +0300
@@ -7,6 +7,7 @@
 #ifndef _NJS_STRTOD_H_INCLUDED_
 #define _NJS_STRTOD_H_INCLUDED_
 
-NJS_EXPORT double njs_strtod(const u_char **start, const u_char *end);
+NJS_EXPORT double njs_strtod(const u_char **start, const u_char *end,
+    njs_bool_t literal);
 
 #endif /* _NJS_STRTOD_H_INCLUDED_ */
diff -r 10a2c35d53e7 -r 4818a450f4e6 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c	Mon Aug 17 14:44:29 2020 +0300
+++ b/src/test/njs_unit_test.c	Mon Aug 17 19:55:46 2020 +0300
@@ -171,13 +171,13 @@ static njs_unit_test_t  njs_test[] =
     { njs_str(".9"),
       njs_str("0.9") },
 
-    { njs_str("-.01"),
+    { njs_str("-.0_1"),
       njs_str("-0.01") },
 
-    { njs_str("0.000001"),
+    { njs_str("0.000_001"),
       njs_str("0.000001") },
 
-    { njs_str("0.00000123456"),
+    { njs_str("0.00000_123456"),
       njs_str("0.00000123456") },
 
     { njs_str("0.0000001"),
@@ -186,10 +186,13 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("1.1000000"),
       njs_str("1.1") },
 
-    { njs_str("99999999999999999999"),
+    { njs_str("1_0"),
+      njs_str("10") },
+
+    { njs_str("99_999_999_999_999_999_999"),
       njs_str("100000000000000000000") },
 
-    { njs_str("99999999999999999999.111"),
+    { njs_str("9999999999999999999_9.1_1_1"),
       njs_str("100000000000000000000") },
 
     { njs_str("999999999999999999999"),
@@ -201,7 +204,7 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("18446744073709551616"),
       njs_str("18446744073709552000") },
 
-    { njs_str("1.7976931348623157E+308"),
+    { njs_str("1.79769313_48623157E+3_0_8"),
       njs_str("1.7976931348623157e+308") },
 
     { njs_str("+1"),
@@ -213,6 +216,48 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("."),
       njs_str("SyntaxError: Unexpected token \".\" in 1") },
 
+    { njs_str("0_1"),
+      njs_str("SyntaxError: Unexpected token \"0_\" in 1") },
+
+    { njs_str("1_"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
+    { njs_str("1__0"),
+      njs_str("SyntaxError: Unexpected token \"__0\" in 1") },
+
+    { njs_str("._1"),
+      njs_str("SyntaxError: Unexpected token \".\" in 1") },
+
+    { njs_str(".1_"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
+    { njs_str("1_.1"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
+    { njs_str(".0__1"),
+      njs_str("SyntaxError: Unexpected token \"__1\" in 1") },
+
+    { njs_str("1e_1"),
+      njs_str("SyntaxError: Unexpected token \"_1\" in 1") },
+
+    { njs_str("1e-_1"),
+      njs_str("SyntaxError: Unexpected token \"_1\" in 1") },
+
+    { njs_str("1E1__0"),
+      njs_str("SyntaxError: Unexpected token \"__0\" in 1") },
+
+    { njs_str("1_e1"),
+      njs_str("SyntaxError: Unexpected token \"_e1\" in 1") },
+
+    { njs_str("1e1_"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
+    { njs_str("-_1"),
+      njs_str("ReferenceError: \"_1\" is not defined in 1") },
+
+    { njs_str("_1"),
+      njs_str("ReferenceError: \"_1\" is not defined in 1") },
+
     /* Octal Numbers. */
 
     { njs_str("0o0"),
@@ -224,7 +269,7 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0o011"),
       njs_str("9") },
 
-    { njs_str("-0O777"),
+    { njs_str("-0O7_7_7"),
       njs_str("-511") },
 
     { njs_str("0o"),
@@ -233,6 +278,15 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0O778"),
       njs_str("SyntaxError: Unexpected token \"0O778\" in 1") },
 
+    { njs_str("0O_7"),
+      njs_str("SyntaxError: Unexpected token \"_7\" in 1") },
+
+    { njs_str("0o7_"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
+    { njs_str("0o7__7"),
+      njs_str("SyntaxError: Unexpected token \"__7\" in 1") },
+
     /* Legacy Octal Numbers are deprecated. */
 
     { njs_str("00"),
@@ -247,6 +301,15 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0011"),
       njs_str("SyntaxError: Unexpected token \"00\" in 1") },
 
+    { njs_str("0_"),
+      njs_str("SyntaxError: Unexpected token \"0_\" in 1") },
+
+    { njs_str("0_1"),
+      njs_str("SyntaxError: Unexpected token \"0_\" in 1") },
+
+    { njs_str("00_1"),
+      njs_str("SyntaxError: Unexpected token \"00\" in 1") },
+
     /* Binary Numbers. */
 
     { njs_str("0b0"),
@@ -255,10 +318,10 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0B10"),
       njs_str("2") },
 
-    { njs_str("0b0101"),
+    { njs_str("0b0_1_0_1"),
       njs_str("5") },
 
-    { njs_str("-0B11111111"),
+    { njs_str("-0B1111_1111"),
       njs_str("-255") },
 
     { njs_str("0b"),
@@ -267,6 +330,15 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0B12"),
       njs_str("SyntaxError: Unexpected token \"0B12\" in 1") },
 
+    { njs_str("0b_11"),
+      njs_str("SyntaxError: Unexpected token \"_11\" in 1") },
+
+    { njs_str("0B1__1"),
+      njs_str("SyntaxError: Unexpected token \"__1\" in 1") },
+
+    { njs_str("0b11_"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
     /* Hex Numbers. */
 
     { njs_str("0x0"),
@@ -278,7 +350,7 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0xffFF"),
       njs_str("65535") },
 
-    { njs_str("0X0000BEEF"),
+    { njs_str("0X00_00_BE_EF"),
       njs_str("48879") },
 
     { njs_str("0x"),
@@ -290,6 +362,15 @@ static njs_unit_test_t  njs_test[] =
     { njs_str("0x12g"),
       njs_str("SyntaxError: Unexpected token \"g\" in 1") },
 
+    { njs_str("0X_ff"),
+      njs_str("SyntaxError: Unexpected token \"_ff\" in 1") },
+
+    { njs_str("0xff_"),
+      njs_str("SyntaxError: Unexpected token \"_\" in 1") },
+
+    { njs_str("0Xf__f"),
+      njs_str("SyntaxError: Unexpected token \"__f\" in 1") },
+
     { njs_str(""),
       njs_str("undefined") },
 


More information about the nginx-devel mailing list