[njs] Lexer refactoring.

Alexander Borisov alexander.borisov at nginx.com
Wed Feb 26 13:22:48 UTC 2020


details:   https://hg.nginx.org/njs/rev/87d05fb35ff9
branches:  
changeset: 1336:87d05fb35ff9
user:      Alexander Borisov <alexander.borisov at nginx.com>
date:      Wed Feb 26 16:22:10 2020 +0300
description:
Lexer refactoring.

diffstat:

 src/njs_builtin.c         |   54 +--
 src/njs_function.c        |    2 +-
 src/njs_generator.c       |   64 ++-
 src/njs_lexer.c           |  632 ++++++++++++++++++++++++++-------------------
 src/njs_lexer.h           |   80 ++++-
 src/njs_lexer_keyword.c   |  180 +++---------
 src/njs_lexer_tables.h    |  146 ++++++++++
 src/njs_module.c          |    2 +-
 src/njs_parser.c          |   76 +++--
 src/njs_parser.h          |   33 +-
 src/njs_parser_terminal.c |   40 +-
 src/njs_shell.c           |   43 +-
 src/njs_variable.c        |  358 ++++++++++---------------
 src/njs_variable.h        |   52 ++-
 src/njs_vm.c              |    2 +-
 src/njs_vm.h              |    2 +-
 utils/lexer_keyword.py    |  245 +++++++++++++++++
 17 files changed, 1226 insertions(+), 785 deletions(-)

diffs (truncated from 3036 to 1000 lines):

diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_builtin.c
--- a/src/njs_builtin.c	Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_builtin.c	Wed Feb 26 16:22:10 2020 +0300
@@ -134,12 +134,6 @@ njs_builtin_objects_create(njs_vm_t *vm)
     }
 
     njs_lvlhsh_init(&shared->keywords_hash);
-
-    ret = njs_lexer_keywords_init(vm->mem_pool, &shared->keywords_hash);
-    if (njs_slow_path(ret != NJS_OK)) {
-        return NJS_ERROR;
-    }
-
     njs_lvlhsh_init(&shared->values_hash);
 
     pattern = njs_regexp_pattern_create(vm, (u_char *) "(?:)",
@@ -495,7 +489,6 @@ njs_builtin_completions(njs_vm_t *vm)
     njs_arr_t                *array;
     njs_str_t                *completion;
     njs_int_t                ret;
-    njs_keyword_t            *keyword;
     njs_lvlhsh_each_t        lhe;
     njs_builtin_traverse_t   ctx;
     const njs_object_prop_t  *prop;
@@ -505,23 +498,9 @@ njs_builtin_completions(njs_vm_t *vm)
         return NULL;
     }
 
-    /* Keywords completions. */
-
-    njs_lvlhsh_each_init(&lhe, &njs_keyword_hash_proto);
-
-    for ( ;; ) {
-        keyword = njs_lvlhsh_each(&vm->shared->keywords_hash, &lhe);
-
-        if (keyword == NULL) {
-            break;
-        }
-
-        completion = njs_arr_add(array);
-        if (njs_slow_path(completion == NULL)) {
-            return NULL;
-        }
-
-        *completion = keyword->name;
+    ret = njs_lexer_keywords(array);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return NULL;
     }
 
     /* Global object completions. */
@@ -570,12 +549,14 @@ njs_vm_completions(njs_vm_t *vm, njs_str
 static njs_arr_t *
 njs_vm_expression_completions(njs_vm_t *vm, njs_str_t *expression)
 {
-    u_char              *p, *end;
-    njs_int_t           ret;
-    njs_value_t         *value;
-    njs_variable_t      *var;
-    njs_object_prop_t   *prop;
-    njs_lvlhsh_query_t  lhq;
+    u_char               *p, *end;
+    njs_int_t            ret;
+    njs_value_t          *value;
+    njs_variable_t       *var;
+    njs_rbtree_node_t    *node;
+    njs_object_prop_t    *prop;
+    njs_lvlhsh_query_t   lhq;
+    njs_variable_node_t  var_node;
 
     if (njs_slow_path(vm->parser == NULL)) {
         return NULL;
@@ -588,16 +569,23 @@ njs_vm_expression_completions(njs_vm_t *
 
     while (p < end && *p != '.') { p++; }
 
-    lhq.proto = &njs_variables_hash_proto;
+    lhq.proto = &njs_lexer_hash_proto;
     lhq.key.length = p - lhq.key.start;
     lhq.key_hash = njs_djb_hash(lhq.key.start, lhq.key.length);
 
-    ret = njs_lvlhsh_find(&vm->parser->scope->variables, &lhq);
+    ret = njs_lvlhsh_find(&vm->shared->keywords_hash, &lhq);
     if (njs_slow_path(ret != NJS_OK)) {
         return NULL;
     }
 
-    var = lhq.value;
+    var_node.key = (uintptr_t) lhq.value;
+
+    node = njs_rbtree_find(&vm->parser->scope->variables, &var_node.node);
+    if (njs_slow_path(node == NULL)) {
+        return NULL;
+    }
+
+    var = ((njs_variable_node_t *) node)->variable;
     value = njs_vmcode_operand(vm, var->index);
 
     if (!njs_is_object(value)) {
diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_function.c
--- a/src/njs_function.c	Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_function.c	Wed Feb 26 16:22:10 2020 +0300
@@ -932,7 +932,7 @@ njs_function_constructor(njs_vm_t *vm, n
 
     scope = parser->scope;
 
-    ret = njs_variables_copy(vm, &scope->variables, &vm->variables_hash);
+    ret = njs_variables_copy(vm, &scope->variables, vm->variables_hash);
     if (njs_slow_path(ret != NJS_OK)) {
         return ret;
     }
diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_generator.c
--- a/src/njs_generator.c	Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_generator.c	Wed Feb 26 16:22:10 2020 +0300
@@ -384,7 +384,8 @@ njs_generate(njs_vm_t *vm, njs_generator
         return njs_generate_inc_dec_operation(vm, generator, node, 1);
 
     case NJS_TOKEN_NULL:
-    case NJS_TOKEN_BOOLEAN:
+    case NJS_TOKEN_TRUE:
+    case NJS_TOKEN_FALSE:
     case NJS_TOKEN_NUMBER:
     case NJS_TOKEN_STRING:
         node->index = njs_value_index(vm, &node->u.value, generator->runtime);
@@ -2321,9 +2322,10 @@ static njs_int_t
 njs_generate_function_declaration(njs_vm_t *vm, njs_generator_t *generator,
     njs_parser_node_t *node)
 {
-    njs_int_t              ret;
-    njs_variable_t         *var;
-    njs_function_lambda_t  *lambda;
+    njs_int_t                ret;
+    njs_variable_t           *var;
+    njs_function_lambda_t    *lambda;
+    const njs_lexer_entry_t  *lex_entry;
 
     var = njs_variable_resolve(vm, node);
     if (njs_slow_path(var == NULL)) {
@@ -2337,14 +2339,18 @@ njs_generate_function_declaration(njs_vm
 
     lambda = njs_function_lambda(&var->value);
 
-    ret = njs_generate_function_scope(vm, lambda, node,
-                                      &node->u.reference.name);
+    lex_entry = njs_lexer_entry(node->u.reference.unique_id);
+    if (njs_slow_path(lex_entry == NULL)) {
+        return NJS_ERROR;
+    }
+
+    ret = njs_generate_function_scope(vm, lambda, node, &lex_entry->name);
     if (njs_slow_path(ret != NJS_OK)) {
         return ret;
     }
 
     if (vm->debug != NULL) {
-        ret = njs_generate_function_debug(vm, &var->name, lambda, node);
+        ret = njs_generate_function_debug(vm, &lex_entry->name, lambda, node);
     }
 
     return ret;
@@ -2473,15 +2479,17 @@ njs_generate_lambda_variables(njs_vm_t *
 {
     njs_index_t             index;
     njs_variable_t          *var;
+    njs_rbtree_node_t       *rb_node;
     njs_vmcode_move_t       *move;
-    njs_lvlhsh_each_t       lhe;
     njs_vmcode_this_t       *this;
+    njs_variable_node_t     *var_node;
     njs_vmcode_arguments_t  *arguments;
 
-    njs_lvlhsh_each_init(&lhe, &njs_variables_hash_proto);
-
-    for ( ;; ) {
-        var = njs_lvlhsh_each(&node->scope->variables, &lhe);
+    rb_node = njs_rbtree_min(&node->scope->variables);
+
+    while (njs_rbtree_is_there_successor(&node->scope->variables, rb_node)) {
+        var_node = (njs_variable_node_t *) rb_node;
+        var = var_node->variable;
 
         if (var == NULL) {
             break;
@@ -2504,6 +2512,8 @@ njs_generate_lambda_variables(njs_vm_t *
                               NJS_VMCODE_ARGUMENTS, 1);
             arguments->dst = var->index;
         }
+
+        rb_node = njs_rbtree_node_successor(&node->scope->variables, rb_node);
     }
 
     return NJS_OK;
@@ -3297,11 +3307,11 @@ static njs_int_t
 njs_generate_global_reference(njs_vm_t *vm, njs_generator_t *generator,
     njs_parser_node_t *node, njs_bool_t exception)
 {
-    njs_str_t              *name;
-    njs_int_t              ret;
-    njs_index_t            index;
-    njs_value_t            property;
-    njs_vmcode_prop_get_t  *prop_get;
+    njs_int_t                ret;
+    njs_index_t              index;
+    njs_value_t              property;
+    njs_vmcode_prop_get_t    *prop_get;
+    const njs_lexer_entry_t  *lex_entry;
 
     index = njs_generate_dest_index(vm, generator, node);
     if (njs_slow_path(index == NJS_INDEX_ERROR)) {
@@ -3314,11 +3324,13 @@ njs_generate_global_reference(njs_vm_t *
     prop_get->value = index;
     prop_get->object = NJS_INDEX_GLOBAL_OBJECT;
 
-    /* FIXME: cache keys in a hash. */
-
-    name = &node->u.reference.name;
-
-    ret = njs_string_set(vm, &property, name->start, name->length);
+    lex_entry = njs_lexer_entry(node->u.reference.unique_id);
+    if (njs_slow_path(lex_entry == NULL)) {
+        return NJS_ERROR;
+    }
+
+    ret = njs_string_set(vm, &property, lex_entry->name.start,
+                         lex_entry->name.length);
     if (njs_slow_path(ret != NJS_OK)) {
         return NJS_ERROR;
     }
@@ -3343,6 +3355,7 @@ njs_generate_reference_error(njs_vm_t *v
     njs_parser_node_t *node)
 {
     njs_jump_off_t                ret;
+    const njs_lexer_entry_t       *lex_entry;
     njs_vmcode_reference_error_t  *ref_err;
 
     if (njs_slow_path(!node->u.reference.not_defined)) {
@@ -3365,7 +3378,12 @@ njs_generate_reference_error(njs_vm_t *v
         }
     }
 
-    return njs_name_copy(vm, &ref_err->name, &node->u.reference.name);
+    lex_entry = njs_lexer_entry(node->u.reference.unique_id);
+    if (njs_slow_path(lex_entry == NULL)) {
+        return NJS_ERROR;
+    }
+
+    return njs_name_copy(vm, &ref_err->name, &lex_entry->name);
 }
 
 
diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_lexer.c
--- a/src/njs_lexer.c	Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_lexer.c	Wed Feb 26 16:22:10 2020 +0300
@@ -18,23 +18,28 @@ struct njs_lexer_multi_s {
 };
 
 
+static njs_int_t njs_lexer_hash_test(njs_lvlhsh_query_t *lhq, void *data);
+static njs_int_t njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *token);
+static void njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *token,
+    u_char quote);
+static void njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token);
+static void njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *token,
+    const njs_lexer_multi_t *multi, size_t length);
+static void njs_lexer_division(njs_lexer_t *lexer, njs_lexer_token_t *token);
+
 static njs_lexer_token_t *njs_lexer_token_push(njs_vm_t *vm,
     njs_lexer_t *lexer);
 static njs_lexer_token_t *njs_lexer_token_pop(njs_lexer_t *lexer);
-static njs_token_t njs_lexer_token_name_resolve(njs_lexer_t *lexer,
-    njs_lexer_token_t *lt);
-static njs_token_t njs_lexer_next_token(njs_lexer_t *lexer,
-    njs_lexer_token_t *lt);
-static njs_token_t njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *lt,
-    u_char c);
-static njs_token_t njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *lt,
-    u_char quote);
-static njs_token_t njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *lt,
-    u_char c);
-static njs_token_t njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *lt,
-    njs_token_t token, njs_uint_t n, const njs_lexer_multi_t *multi);
-static njs_token_t njs_lexer_division(njs_lexer_t *lexer,
-    njs_token_t token);
+
+
+const njs_lvlhsh_proto_t  njs_lexer_hash_proto
+    njs_aligned(64) =
+{
+    NJS_LVLHSH_DEFAULT,
+    njs_lexer_hash_test,
+    njs_lvlhsh_alloc,
+    njs_lvlhsh_free,
+};
 
 
 static const uint8_t  njs_tokens[256]  njs_aligned(64) = {
@@ -297,7 +302,8 @@ njs_lexer_init(njs_vm_t *vm, njs_lexer_t
     lexer->start = start;
     lexer->end = end;
     lexer->line = 1;
-    lexer->keywords_hash = vm->shared->keywords_hash;
+    lexer->keywords_hash = &vm->shared->keywords_hash;
+    lexer->mem_pool = vm->mem_pool;
 
     njs_queue_init(&lexer->preread);
 
@@ -312,9 +318,9 @@ njs_lexer_token(njs_vm_t *vm, njs_lexer_
 
     lexer->prev_start = lexer->start;
 
-    if (lexer->lexer_token != NULL) {
-        lexer->prev_token = lexer->lexer_token->token;
-        njs_mp_free(vm->mem_pool, lexer->lexer_token);
+    if (lexer->token != NULL) {
+        lexer->prev_token = lexer->token->type;
+        njs_mp_free(vm->mem_pool, lexer->token);
     }
 
     if (njs_queue_is_empty(&lexer->preread)) {
@@ -324,9 +330,9 @@ njs_lexer_token(njs_vm_t *vm, njs_lexer_
         }
     }
 
-    lexer->lexer_token = njs_lexer_token_pop(lexer);
+    lexer->token = njs_lexer_token_pop(lexer);
 
-    return njs_lexer_token_name_resolve(lexer, lexer->lexer_token);
+    return lexer->token->type;
 }
 
 
@@ -350,9 +356,7 @@ njs_lexer_peek_token(njs_vm_t *vm, njs_l
 
             /* NJS_TOKEN_DIVISION stands for regexp literal. */
 
-            if (lt->token == NJS_TOKEN_DIVISION
-                || lt->token == NJS_TOKEN_END)
-            {
+            if (lt->type == NJS_TOKEN_DIVISION || lt->type == NJS_TOKEN_END) {
                 break;
             }
 
@@ -368,25 +372,50 @@ njs_lexer_peek_token(njs_vm_t *vm, njs_l
         }
     }
 
-    return njs_lexer_token_name_resolve(lexer, lt);
+    return lt->type;
+}
+
+
+njs_int_t
+njs_lexer_rollback(njs_vm_t *vm, njs_lexer_t *lexer)
+{
+    njs_lexer_token_t  *lt;
+
+    lt = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
+    if (njs_slow_path(lt == NULL)) {
+        return NJS_ERROR;
+    }
+
+    *lt = *lexer->token;
+
+    njs_queue_insert_head(&lexer->preread, &lt->link);
+
+    return NJS_OK;
 }
 
 
 static njs_lexer_token_t *
 njs_lexer_token_push(njs_vm_t *vm, njs_lexer_t *lexer)
 {
-    njs_lexer_token_t  *lt;
+    njs_int_t          ret;
+    njs_lexer_token_t  *token;
 
-    lt = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
-    if (njs_slow_path(lt == NULL)) {
+    token = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
+    if (njs_slow_path(token == NULL)) {
         return NULL;
     }
 
-    lt->token = njs_lexer_next_token(lexer, lt);
+    do {
+        ret = njs_lexer_next_token(lexer, token);
+        if (njs_slow_path(ret != NJS_OK)) {
+            return NULL;
+        }
 
-    njs_queue_insert_tail(&lexer->preread, &lt->link);
+    } while (token->type == NJS_TOKEN_COMMENT);
 
-    return lt;
+    njs_queue_insert_tail(&lexer->preread, &token->link);
+
+    return token;
 }
 
 
@@ -403,197 +432,223 @@ njs_lexer_token_pop(njs_lexer_t *lexer)
 
 
 njs_int_t
-njs_lexer_rollback(njs_vm_t *vm, njs_lexer_t *lexer)
+njs_lexer_next_token(njs_lexer_t *lexer, njs_lexer_token_t *token)
 {
-    njs_lexer_token_t  *lt;
+    u_char  c, *p;
+
+    c = ' ';
 
-    lt = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
-    if (njs_slow_path(lt == NULL)) {
-        return NJS_ERROR;
+    while (lexer->start < lexer->end) {
+        c = *lexer->start++;
+
+        if (njs_tokens[c] != NJS_TOKEN_SPACE) {
+            break;
+        }
     }
 
-    *lt = *lexer->lexer_token;
+    lexer->keyword = 0;
+    token->type = njs_tokens[c];
+
+    switch (token->type) {
+
+    case NJS_TOKEN_LETTER:
+        return njs_lexer_word(lexer, token);
+
+    case NJS_TOKEN_DOUBLE_QUOTE:
+    case NJS_TOKEN_SINGLE_QUOTE:
+        njs_lexer_string(lexer, token, c);
+        break;
+
+    case NJS_TOKEN_DOT:
+        p = lexer->start;
+
+        if (p + 1 < lexer->end
+            && njs_tokens[p[0]] == NJS_TOKEN_DOT
+            && njs_tokens[p[1]] == NJS_TOKEN_DOT)
+        {
+            token->text.start = lexer->start - 1;
+            token->text.length = (p - token->text.start) + 2;
+
+            token->type = NJS_TOKEN_ELLIPSIS;
+
+            lexer->start += 2;
+
+            return NJS_OK;
+        }
+
+        if (p == lexer->end || njs_tokens[*p] != NJS_TOKEN_DIGIT) {
+            token->text.start = lexer->start - 1;
+            token->text.length = p - token->text.start;
+
+            token->type = NJS_TOKEN_DOT;
+
+            return NJS_OK;
+        }
+
+        /* Fall through. */
+
+    case NJS_TOKEN_DIGIT:
+        njs_lexer_number(lexer, token);
+        break;
+
+    case NJS_TOKEN_DIVISION:
+        njs_lexer_division(lexer, token);
+        break;
+
+    case NJS_TOKEN_ASSIGNMENT:
+        njs_lexer_multi(lexer, token, njs_assignment_token,
+                        njs_nitems(njs_assignment_token));
+        break;
+
+    case NJS_TOKEN_ADDITION:
+        njs_lexer_multi(lexer, token, njs_addition_token,
+                        njs_nitems(njs_addition_token));
+        break;
 
-    njs_queue_insert_head(&lexer->preread, &lt->link);
+    case NJS_TOKEN_SUBSTRACTION:
+        njs_lexer_multi(lexer, token, njs_substraction_token,
+                        njs_nitems(njs_substraction_token));
+        break;
+
+    case NJS_TOKEN_MULTIPLICATION:
+        njs_lexer_multi(lexer, token, njs_multiplication_token,
+                        njs_nitems(njs_multiplication_token));
+        break;
+
+    case NJS_TOKEN_REMAINDER:
+        njs_lexer_multi(lexer, token, njs_remainder_token,
+                        njs_nitems(njs_remainder_token));
+        break;
+
+    case NJS_TOKEN_BITWISE_AND:
+        njs_lexer_multi(lexer, token, njs_bitwise_and_token,
+                        njs_nitems(njs_bitwise_and_token));
+        break;
+
+    case NJS_TOKEN_BITWISE_XOR:
+        njs_lexer_multi(lexer, token, njs_bitwise_xor_token,
+                        njs_nitems(njs_bitwise_xor_token));
+        break;
+
+    case NJS_TOKEN_BITWISE_OR:
+        njs_lexer_multi(lexer, token, njs_bitwise_or_token,
+                        njs_nitems(njs_bitwise_or_token));
+        break;
+
+    case NJS_TOKEN_LOGICAL_NOT:
+        njs_lexer_multi(lexer, token, njs_logical_not_token,
+                        njs_nitems(njs_logical_not_token));
+        break;
+
+    case NJS_TOKEN_LESS:
+        njs_lexer_multi(lexer, token, njs_less_token,
+                        njs_nitems(njs_less_token));
+        break;
+
+    case NJS_TOKEN_GREATER:
+        njs_lexer_multi(lexer, token, njs_greater_token,
+                        njs_nitems(njs_greater_token));
+        break;
+
+    case NJS_TOKEN_CONDITIONAL:
+        njs_lexer_multi(lexer, token, njs_conditional_token,
+                        njs_nitems(njs_conditional_token));
+        break;
+
+    case NJS_TOKEN_SPACE:
+        token->type = NJS_TOKEN_END;
+        return NJS_OK;
+
+    case NJS_TOKEN_LINE_END:
+        lexer->line++;
+
+        /* Fall through. */
+
+    default:
+        token->text.start = lexer->start - 1;
+        token->text.length = lexer->start - token->text.start;
+
+        break;
+    }
 
     return NJS_OK;
 }
 
 
-static njs_token_t
-njs_lexer_token_name_resolve(njs_lexer_t *lexer, njs_lexer_token_t *lt)
+static njs_int_t
+njs_lexer_hash_test(njs_lvlhsh_query_t *lhq, void *data)
 {
-    if (lt->token == NJS_TOKEN_NAME) {
-        njs_lexer_keyword(lexer, lt);
+    njs_lexer_entry_t  *entry;
+
+    entry = data;
+
+    if (entry->name.length == lhq->key.length
+        && memcmp(entry->name.start, lhq->key.start, lhq->key.length) == 0)
+    {
+        return NJS_OK;
     }
 
-    return lt->token;
+    return NJS_DECLINED;
 }
 
 
-static njs_token_t
-njs_lexer_next_token(njs_lexer_t *lexer, njs_lexer_token_t *lt)
+static njs_lexer_entry_t *
+njs_lexer_keyword_find(njs_lexer_t *lexer, u_char *key, size_t length,
+    uint32_t hash)
 {
-    u_char                   c, *p;
-    njs_uint_t               n;
-    njs_token_t              token;
-    const njs_lexer_multi_t  *multi;
-
-    lt->text.start = lexer->start;
-
-    while (lexer->start < lexer->end) {
-        c = *lexer->start++;
-
-        token = njs_tokens[c];
-
-        switch (token) {
-
-        case NJS_TOKEN_SPACE:
-            lt->text.start = lexer->start;
-            continue;
-
-        case NJS_TOKEN_LETTER:
-            return njs_lexer_word(lexer, lt, c);
-
-        case NJS_TOKEN_DOUBLE_QUOTE:
-        case NJS_TOKEN_SINGLE_QUOTE:
-            return njs_lexer_string(lexer, lt, c);
-
-        case NJS_TOKEN_DOT:
-            p = lexer->start;
-
-            if (p + 1 < lexer->end
-                && njs_tokens[p[0]] == NJS_TOKEN_DOT
-                && njs_tokens[p[1]] == NJS_TOKEN_DOT)
-            {
-                lt->text.length = (p - lt->text.start) + 2;
-                lexer->start += 2;
-                return NJS_TOKEN_ELLIPSIS;
-            }
-
-            if (p == lexer->end || njs_tokens[*p] != NJS_TOKEN_DIGIT) {
-                lt->text.length = p - lt->text.start;
-                return NJS_TOKEN_DOT;
-            }
-
-            /* Fall through. */
-
-        case NJS_TOKEN_DIGIT:
-            return njs_lexer_number(lexer, lt, c);
-
-        case NJS_TOKEN_ASSIGNMENT:
-            n = njs_nitems(njs_assignment_token),
-            multi = njs_assignment_token;
-
-            goto multi;
-
-        case NJS_TOKEN_ADDITION:
-            n = njs_nitems(njs_addition_token),
-            multi = njs_addition_token;
-
-            goto multi;
-
-        case NJS_TOKEN_SUBSTRACTION:
-            n = njs_nitems(njs_substraction_token),
-            multi = njs_substraction_token;
-
-            goto multi;
-
-        case NJS_TOKEN_MULTIPLICATION:
-            n = njs_nitems(njs_multiplication_token),
-            multi = njs_multiplication_token;
+    njs_int_t           ret;
+    njs_lexer_entry_t   *entry;
+    njs_lvlhsh_query_t  lhq;
 
-            goto multi;
-
-        case NJS_TOKEN_DIVISION:
-            token = njs_lexer_division(lexer, token);
-
-            if (token != NJS_TOKEN_AGAIN) {
-                goto done;
-            }
-
-            continue;
-
-        case NJS_TOKEN_REMAINDER:
-            n = njs_nitems(njs_remainder_token),
-            multi = njs_remainder_token;
-
-            goto multi;
-
-        case NJS_TOKEN_BITWISE_AND:
-            n = njs_nitems(njs_bitwise_and_token),
-            multi = njs_bitwise_and_token;
-
-            goto multi;
-
-        case NJS_TOKEN_BITWISE_XOR:
-            n = njs_nitems(njs_bitwise_xor_token),
-            multi = njs_bitwise_xor_token;
-
-            goto multi;
-
-        case NJS_TOKEN_BITWISE_OR:
-            n = njs_nitems(njs_bitwise_or_token),
-            multi = njs_bitwise_or_token;
-
-            goto multi;
+    lhq.key.start = key;
+    lhq.key.length = length;
 
-        case NJS_TOKEN_LOGICAL_NOT:
-            n = njs_nitems(njs_logical_not_token),
-            multi = njs_logical_not_token;
-
-            goto multi;
-
-        case NJS_TOKEN_LESS:
-            n = njs_nitems(njs_less_token),
-            multi = njs_less_token;
-
-            goto multi;
-
-        case NJS_TOKEN_GREATER:
-            n = njs_nitems(njs_greater_token),
-            multi = njs_greater_token;
-
-            goto multi;
+    lhq.key_hash = hash;
+    lhq.proto = &njs_lexer_hash_proto;
 
-        case NJS_TOKEN_CONDITIONAL:
-            n = njs_nitems(njs_conditional_token),
-            multi = njs_conditional_token;
-
-            goto multi;
-
-        case NJS_TOKEN_LINE_END:
-            lexer->line++;
-
-            /* Fall through. */
-
-        default:
-            goto done;
-        }
-
-    multi:
-
-        return njs_lexer_multi(lexer, lt, token, n, multi);
+    ret = njs_lvlhsh_find(lexer->keywords_hash, &lhq);
+    if (ret == NJS_OK) {
+        return lhq.value;
     }
 
-    token = NJS_TOKEN_END;
+    entry = njs_mp_alloc(lexer->mem_pool, sizeof(njs_lexer_entry_t));
+    if (njs_slow_path(entry == NULL)) {
+        return NULL;
+    }
 
-done:
+    entry->name.start = njs_mp_alloc(lexer->mem_pool, length + 1);
+    if (njs_slow_path(entry->name.start == NULL)) {
+        return NULL;
+    }
+
+    memcpy(entry->name.start, key, length);
 
-    lt->text.length = lexer->start - lt->text.start;
+    entry->name.start[length] = '\0';
+    entry->name.length = length;
+
+    lhq.value = entry;
+    lhq.pool = lexer->mem_pool;
 
-    return token;
+    ret = njs_lvlhsh_insert(lexer->keywords_hash, &lhq);
+    if (njs_slow_path(ret != NJS_OK)) {
+        return NULL;
+    }
+
+    return entry;
 }
 
 
-static njs_token_t
-njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *lt, u_char c)
+static njs_int_t
+njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *token)
 {
-    u_char  *p;
+    u_char                           *p, c;
+    uint32_t                         hash_id;
+    const njs_lexer_entry_t          *entry;
+    const njs_lexer_keyword_entry_t  *key_entry;
 
     /* TODO: UTF-8 */
 
-    static const uint8_t  letter_digit[32]  njs_aligned(32) = {
+    static const uint8_t letter_digit[32]  njs_aligned(32) = {
         0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
 
                                 /* '&%$ #"!  /.-, |*)(  7654 3210 ?>=< ;:98 */
@@ -611,9 +666,10 @@ njs_lexer_word(njs_lexer_t *lexer, njs_l
         0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
     };
 
-    lt->token_line = lexer->line;
-    lt->key_hash = njs_djb_hash_add(NJS_DJB_HASH_INIT, c);
-    lt->text.start = lexer->start - 1;
+    token->line = lexer->line;
+    token->text.start = lexer->start - 1;
+
+    hash_id = njs_djb_hash_add(NJS_DJB_HASH_INIT, *token->text.start);
 
     for (p = lexer->start; p < lexer->end; p++) {
         c = *p;
@@ -622,25 +678,46 @@ njs_lexer_word(njs_lexer_t *lexer, njs_l
             break;
         }
 
-        lt->key_hash = njs_djb_hash_add(lt->key_hash, c);
+        hash_id = njs_djb_hash_add(hash_id, c);
     }
 
+    token->text.length = p - token->text.start;
     lexer->start = p;
-    lt->text.length = p - lt->text.start;
+
+    key_entry = njs_lexer_keyword(token->text.start, token->text.length);
+
+    if (key_entry == NULL) {
+        entry = njs_lexer_keyword_find(lexer, token->text.start,
+                                       token->text.length, hash_id);
+        if (njs_slow_path(entry == NULL)) {
+            return NJS_ERROR;
+        }
 
-    return NJS_TOKEN_NAME;
+        token->type = NJS_TOKEN_NAME;
+
+    } else {
+        entry = &key_entry->value->entry;
+        token->type = key_entry->value->type;
+
+        lexer->keyword = 1;
+    }
+
+    token->unique_id = (uintptr_t) entry;
+
+    return NJS_OK;
 }
 
 
-static njs_token_t
-njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *lt, u_char quote)
+static void
+njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *token, u_char quote)
 {
     u_char      *p, c;
     njs_bool_t  escape;
 
     escape = 0;
-    lt->text.start = lexer->start;
+
     p = lexer->start;
+    token->text.start = p;
 
     while (p < lexer->end) {
 
@@ -670,31 +747,31 @@ njs_lexer_string(njs_lexer_t *lexer, njs
 
         if (c == quote) {
             lexer->start = p;
-            lt->text.length = (p - 1) - lt->text.start;
+            token->text.length = (p - 1) - token->text.start;
 
-            if (escape == 0) {
-                return NJS_TOKEN_STRING;
-            }
-
-            return NJS_TOKEN_ESCAPE_STRING;
+            token->type = (escape == 0) ? NJS_TOKEN_STRING
+                                        : NJS_TOKEN_ESCAPE_STRING;
+            return;
         }
     }
 
-    lt->text.start--;
-    lt->text.length = p - lt->text.start;
+    token->text.start--;
+    token->text.length = p - token->text.start;
 
-    return NJS_TOKEN_UNTERMINATED_STRING;
+    token->type = NJS_TOKEN_UNTERMINATED_STRING;
 }
 
 
-static njs_token_t
-njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *lt, u_char c)
+static void
+njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token)
 {
+    u_char        c;
     const u_char  *p;
 
-    lt->text.start = lexer->start - 1;
+    c = lexer->start[-1];
+    p = lexer->start;
 
-    p = lexer->start;
+    token->text.start = lexer->start - 1;
 
     if (c == '0' && p != lexer->end) {
 
@@ -707,7 +784,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
                 goto illegal_token;
             }
 
-            lt->number = njs_number_hex_parse(&p, lexer->end);
+            token->number = njs_number_hex_parse(&p, lexer->end);
 
             goto done;
         }
@@ -721,7 +798,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
                 goto illegal_token;
             }
 
-            lt->number = njs_number_oct_parse(&p, lexer->end);
+            token->number = njs_number_oct_parse(&p, lexer->end);
 
             if (p < lexer->end && (*p == '8' || *p == '9')) {
                 goto illegal_trailer;
@@ -739,7 +816,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
                 goto illegal_token;
             }
 
-            lt->number = njs_number_bin_parse(&p, lexer->end);
+            token->number = njs_number_bin_parse(&p, lexer->end);
 
             if (p < lexer->end && (*p >= '2' && *p <= '9')) {
                 goto illegal_trailer;
@@ -756,14 +833,16 @@ njs_lexer_number(njs_lexer_t *lexer, njs
     }
 
     p--;
-    lt->number = njs_number_dec_parse(&p, lexer->end);
+    token->number = njs_number_dec_parse(&p, lexer->end);
 
 done:
 
     lexer->start = (u_char *) p;
-    lt->text.length = p - lt->text.start;
+    token->text.length = p - token->text.start;
 
-    return NJS_TOKEN_NUMBER;
+    token->type = NJS_TOKEN_NUMBER;
+
+    return;
 
 illegal_trailer:
 
@@ -771,92 +850,105 @@ illegal_trailer:
 
 illegal_token:
 
-    lt->text.length = p - lt->text.start;
+    token->text.length = p - token->text.start;
 
-    return NJS_TOKEN_ILLEGAL;
+    token->type = NJS_TOKEN_ILLEGAL;
 }
 
 
-static njs_token_t
-njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *lt, njs_token_t token,
-    njs_uint_t n, const njs_lexer_multi_t *multi)
+static void
+njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *token,
+    const njs_lexer_multi_t *multi, size_t length)
 {
     u_char  c;
 
-    if (lexer->start < lexer->end) {
+    token->text.start = lexer->start - 1;
+
+    while (length != 0 && multi != NULL) {
         c = lexer->start[0];
 
-        do {
-            if (c == multi->symbol) {
-                lexer->start++;
+        if (c == multi->symbol) {
+            lexer->start++;
 
-                if (multi->count == 0) {
-                    token = multi->token;
-                    break;
-                }
+            token->type = multi->token;
 
-                return njs_lexer_multi(lexer, lt, multi->token, multi->count,
-                                       multi->next);
+            if (multi->count == 0) {
+                break;
             }
 
+            length = multi->count;
+            multi = multi->next;
+
+        } else {
+            length--;
             multi++;
-            n--;
-
-        } while (n != 0);
+        }
     }
 
-    lt->text.length = lexer->start - lt->text.start;


More information about the nginx-devel mailing list