Welcome! Log In Create A New Profile

Advanced

[njs] Lexer refactoring.

Alexander Borisov
February 26, 2020 08:24AM
details: https://hg.nginx.org/njs/rev/87d05fb35ff9
branches:
changeset: 1336:87d05fb35ff9
user: Alexander Borisov <alexander.borisov@nginx.com>
date: Wed Feb 26 16:22:10 2020 +0300
description:
Lexer refactoring.

diffstat:

src/njs_builtin.c | 54 +--
src/njs_function.c | 2 +-
src/njs_generator.c | 64 ++-
src/njs_lexer.c | 632 ++++++++++++++++++++++++++-------------------
src/njs_lexer.h | 80 ++++-
src/njs_lexer_keyword.c | 180 +++---------
src/njs_lexer_tables.h | 146 ++++++++++
src/njs_module.c | 2 +-
src/njs_parser.c | 76 +++--
src/njs_parser.h | 33 +-
src/njs_parser_terminal.c | 40 +-
src/njs_shell.c | 43 +-
src/njs_variable.c | 358 ++++++++++---------------
src/njs_variable.h | 52 ++-
src/njs_vm.c | 2 +-
src/njs_vm.h | 2 +-
utils/lexer_keyword.py | 245 +++++++++++++++++
17 files changed, 1226 insertions(+), 785 deletions(-)

diffs (truncated from 3036 to 1000 lines):

diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_builtin.c
--- a/src/njs_builtin.c Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_builtin.c Wed Feb 26 16:22:10 2020 +0300
@@ -134,12 +134,6 @@ njs_builtin_objects_create(njs_vm_t *vm)
}

njs_lvlhsh_init(&shared->keywords_hash);
-
- ret = njs_lexer_keywords_init(vm->mem_pool, &shared->keywords_hash);
- if (njs_slow_path(ret != NJS_OK)) {
- return NJS_ERROR;
- }
-
njs_lvlhsh_init(&shared->values_hash);

pattern = njs_regexp_pattern_create(vm, (u_char *) "(?:)",
@@ -495,7 +489,6 @@ njs_builtin_completions(njs_vm_t *vm)
njs_arr_t *array;
njs_str_t *completion;
njs_int_t ret;
- njs_keyword_t *keyword;
njs_lvlhsh_each_t lhe;
njs_builtin_traverse_t ctx;
const njs_object_prop_t *prop;
@@ -505,23 +498,9 @@ njs_builtin_completions(njs_vm_t *vm)
return NULL;
}

- /* Keywords completions. */
-
- njs_lvlhsh_each_init(&lhe, &njs_keyword_hash_proto);
-
- for ( ;; ) {
- keyword = njs_lvlhsh_each(&vm->shared->keywords_hash, &lhe);
-
- if (keyword == NULL) {
- break;
- }
-
- completion = njs_arr_add(array);
- if (njs_slow_path(completion == NULL)) {
- return NULL;
- }
-
- *completion = keyword->name;
+ ret = njs_lexer_keywords(array);
+ if (njs_slow_path(ret != NJS_OK)) {
+ return NULL;
}

/* Global object completions. */
@@ -570,12 +549,14 @@ njs_vm_completions(njs_vm_t *vm, njs_str
static njs_arr_t *
njs_vm_expression_completions(njs_vm_t *vm, njs_str_t *expression)
{
- u_char *p, *end;
- njs_int_t ret;
- njs_value_t *value;
- njs_variable_t *var;
- njs_object_prop_t *prop;
- njs_lvlhsh_query_t lhq;
+ u_char *p, *end;
+ njs_int_t ret;
+ njs_value_t *value;
+ njs_variable_t *var;
+ njs_rbtree_node_t *node;
+ njs_object_prop_t *prop;
+ njs_lvlhsh_query_t lhq;
+ njs_variable_node_t var_node;

if (njs_slow_path(vm->parser == NULL)) {
return NULL;
@@ -588,16 +569,23 @@ njs_vm_expression_completions(njs_vm_t *

while (p < end && *p != '.') { p++; }

- lhq.proto = &njs_variables_hash_proto;
+ lhq.proto = &njs_lexer_hash_proto;
lhq.key.length = p - lhq.key.start;
lhq.key_hash = njs_djb_hash(lhq.key.start, lhq.key.length);

- ret = njs_lvlhsh_find(&vm->parser->scope->variables, &lhq);
+ ret = njs_lvlhsh_find(&vm->shared->keywords_hash, &lhq);
if (njs_slow_path(ret != NJS_OK)) {
return NULL;
}

- var = lhq.value;
+ var_node.key = (uintptr_t) lhq.value;
+
+ node = njs_rbtree_find(&vm->parser->scope->variables, &var_node.node);
+ if (njs_slow_path(node == NULL)) {
+ return NULL;
+ }
+
+ var = ((njs_variable_node_t *) node)->variable;
value = njs_vmcode_operand(vm, var->index);

if (!njs_is_object(value)) {
diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_function.c
--- a/src/njs_function.c Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_function.c Wed Feb 26 16:22:10 2020 +0300
@@ -932,7 +932,7 @@ njs_function_constructor(njs_vm_t *vm, n

scope = parser->scope;

- ret = njs_variables_copy(vm, &scope->variables, &vm->variables_hash);
+ ret = njs_variables_copy(vm, &scope->variables, vm->variables_hash);
if (njs_slow_path(ret != NJS_OK)) {
return ret;
}
diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_generator.c
--- a/src/njs_generator.c Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_generator.c Wed Feb 26 16:22:10 2020 +0300
@@ -384,7 +384,8 @@ njs_generate(njs_vm_t *vm, njs_generator
return njs_generate_inc_dec_operation(vm, generator, node, 1);

case NJS_TOKEN_NULL:
- case NJS_TOKEN_BOOLEAN:
+ case NJS_TOKEN_TRUE:
+ case NJS_TOKEN_FALSE:
case NJS_TOKEN_NUMBER:
case NJS_TOKEN_STRING:
node->index = njs_value_index(vm, &node->u.value, generator->runtime);
@@ -2321,9 +2322,10 @@ static njs_int_t
njs_generate_function_declaration(njs_vm_t *vm, njs_generator_t *generator,
njs_parser_node_t *node)
{
- njs_int_t ret;
- njs_variable_t *var;
- njs_function_lambda_t *lambda;
+ njs_int_t ret;
+ njs_variable_t *var;
+ njs_function_lambda_t *lambda;
+ const njs_lexer_entry_t *lex_entry;

var = njs_variable_resolve(vm, node);
if (njs_slow_path(var == NULL)) {
@@ -2337,14 +2339,18 @@ njs_generate_function_declaration(njs_vm

lambda = njs_function_lambda(&var->value);

- ret = njs_generate_function_scope(vm, lambda, node,
- &node->u.reference.name);
+ lex_entry = njs_lexer_entry(node->u.reference.unique_id);
+ if (njs_slow_path(lex_entry == NULL)) {
+ return NJS_ERROR;
+ }
+
+ ret = njs_generate_function_scope(vm, lambda, node, &lex_entry->name);
if (njs_slow_path(ret != NJS_OK)) {
return ret;
}

if (vm->debug != NULL) {
- ret = njs_generate_function_debug(vm, &var->name, lambda, node);
+ ret = njs_generate_function_debug(vm, &lex_entry->name, lambda, node);
}

return ret;
@@ -2473,15 +2479,17 @@ njs_generate_lambda_variables(njs_vm_t *
{
njs_index_t index;
njs_variable_t *var;
+ njs_rbtree_node_t *rb_node;
njs_vmcode_move_t *move;
- njs_lvlhsh_each_t lhe;
njs_vmcode_this_t *this;
+ njs_variable_node_t *var_node;
njs_vmcode_arguments_t *arguments;

- njs_lvlhsh_each_init(&lhe, &njs_variables_hash_proto);
-
- for ( ;; ) {
- var = njs_lvlhsh_each(&node->scope->variables, &lhe);
+ rb_node = njs_rbtree_min(&node->scope->variables);
+
+ while (njs_rbtree_is_there_successor(&node->scope->variables, rb_node)) {
+ var_node = (njs_variable_node_t *) rb_node;
+ var = var_node->variable;

if (var == NULL) {
break;
@@ -2504,6 +2512,8 @@ njs_generate_lambda_variables(njs_vm_t *
NJS_VMCODE_ARGUMENTS, 1);
arguments->dst = var->index;
}
+
+ rb_node = njs_rbtree_node_successor(&node->scope->variables, rb_node);
}

return NJS_OK;
@@ -3297,11 +3307,11 @@ static njs_int_t
njs_generate_global_reference(njs_vm_t *vm, njs_generator_t *generator,
njs_parser_node_t *node, njs_bool_t exception)
{
- njs_str_t *name;
- njs_int_t ret;
- njs_index_t index;
- njs_value_t property;
- njs_vmcode_prop_get_t *prop_get;
+ njs_int_t ret;
+ njs_index_t index;
+ njs_value_t property;
+ njs_vmcode_prop_get_t *prop_get;
+ const njs_lexer_entry_t *lex_entry;

index = njs_generate_dest_index(vm, generator, node);
if (njs_slow_path(index == NJS_INDEX_ERROR)) {
@@ -3314,11 +3324,13 @@ njs_generate_global_reference(njs_vm_t *
prop_get->value = index;
prop_get->object = NJS_INDEX_GLOBAL_OBJECT;

- /* FIXME: cache keys in a hash. */
-
- name = &node->u.reference.name;
-
- ret = njs_string_set(vm, &property, name->start, name->length);
+ lex_entry = njs_lexer_entry(node->u.reference.unique_id);
+ if (njs_slow_path(lex_entry == NULL)) {
+ return NJS_ERROR;
+ }
+
+ ret = njs_string_set(vm, &property, lex_entry->name.start,
+ lex_entry->name.length);
if (njs_slow_path(ret != NJS_OK)) {
return NJS_ERROR;
}
@@ -3343,6 +3355,7 @@ njs_generate_reference_error(njs_vm_t *v
njs_parser_node_t *node)
{
njs_jump_off_t ret;
+ const njs_lexer_entry_t *lex_entry;
njs_vmcode_reference_error_t *ref_err;

if (njs_slow_path(!node->u.reference.not_defined)) {
@@ -3365,7 +3378,12 @@ njs_generate_reference_error(njs_vm_t *v
}
}

- return njs_name_copy(vm, &ref_err->name, &node->u.reference.name);
+ lex_entry = njs_lexer_entry(node->u.reference.unique_id);
+ if (njs_slow_path(lex_entry == NULL)) {
+ return NJS_ERROR;
+ }
+
+ return njs_name_copy(vm, &ref_err->name, &lex_entry->name);
}


diff -r 079d4d4556f0 -r 87d05fb35ff9 src/njs_lexer.c
--- a/src/njs_lexer.c Wed Feb 26 12:41:51 2020 +0300
+++ b/src/njs_lexer.c Wed Feb 26 16:22:10 2020 +0300
@@ -18,23 +18,28 @@ struct njs_lexer_multi_s {
};


+static njs_int_t njs_lexer_hash_test(njs_lvlhsh_query_t *lhq, void *data);
+static njs_int_t njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *token);
+static void njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *token,
+ u_char quote);
+static void njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token);
+static void njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *token,
+ const njs_lexer_multi_t *multi, size_t length);
+static void njs_lexer_division(njs_lexer_t *lexer, njs_lexer_token_t *token);
+
static njs_lexer_token_t *njs_lexer_token_push(njs_vm_t *vm,
njs_lexer_t *lexer);
static njs_lexer_token_t *njs_lexer_token_pop(njs_lexer_t *lexer);
-static njs_token_t njs_lexer_token_name_resolve(njs_lexer_t *lexer,
- njs_lexer_token_t *lt);
-static njs_token_t njs_lexer_next_token(njs_lexer_t *lexer,
- njs_lexer_token_t *lt);
-static njs_token_t njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *lt,
- u_char c);
-static njs_token_t njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *lt,
- u_char quote);
-static njs_token_t njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *lt,
- u_char c);
-static njs_token_t njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *lt,
- njs_token_t token, njs_uint_t n, const njs_lexer_multi_t *multi);
-static njs_token_t njs_lexer_division(njs_lexer_t *lexer,
- njs_token_t token);
+
+
+const njs_lvlhsh_proto_t njs_lexer_hash_proto
+ njs_aligned(64) =
+{
+ NJS_LVLHSH_DEFAULT,
+ njs_lexer_hash_test,
+ njs_lvlhsh_alloc,
+ njs_lvlhsh_free,
+};


static const uint8_t njs_tokens[256] njs_aligned(64) = {
@@ -297,7 +302,8 @@ njs_lexer_init(njs_vm_t *vm, njs_lexer_t
lexer->start = start;
lexer->end = end;
lexer->line = 1;
- lexer->keywords_hash = vm->shared->keywords_hash;
+ lexer->keywords_hash = &vm->shared->keywords_hash;
+ lexer->mem_pool = vm->mem_pool;

njs_queue_init(&lexer->preread);

@@ -312,9 +318,9 @@ njs_lexer_token(njs_vm_t *vm, njs_lexer_

lexer->prev_start = lexer->start;

- if (lexer->lexer_token != NULL) {
- lexer->prev_token = lexer->lexer_token->token;
- njs_mp_free(vm->mem_pool, lexer->lexer_token);
+ if (lexer->token != NULL) {
+ lexer->prev_token = lexer->token->type;
+ njs_mp_free(vm->mem_pool, lexer->token);
}

if (njs_queue_is_empty(&lexer->preread)) {
@@ -324,9 +330,9 @@ njs_lexer_token(njs_vm_t *vm, njs_lexer_
}
}

- lexer->lexer_token = njs_lexer_token_pop(lexer);
+ lexer->token = njs_lexer_token_pop(lexer);

- return njs_lexer_token_name_resolve(lexer, lexer->lexer_token);
+ return lexer->token->type;
}


@@ -350,9 +356,7 @@ njs_lexer_peek_token(njs_vm_t *vm, njs_l

/* NJS_TOKEN_DIVISION stands for regexp literal. */

- if (lt->token == NJS_TOKEN_DIVISION
- || lt->token == NJS_TOKEN_END)
- {
+ if (lt->type == NJS_TOKEN_DIVISION || lt->type == NJS_TOKEN_END) {
break;
}

@@ -368,25 +372,50 @@ njs_lexer_peek_token(njs_vm_t *vm, njs_l
}
}

- return njs_lexer_token_name_resolve(lexer, lt);
+ return lt->type;
+}
+
+
+njs_int_t
+njs_lexer_rollback(njs_vm_t *vm, njs_lexer_t *lexer)
+{
+ njs_lexer_token_t *lt;
+
+ lt = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
+ if (njs_slow_path(lt == NULL)) {
+ return NJS_ERROR;
+ }
+
+ *lt = *lexer->token;
+
+ njs_queue_insert_head(&lexer->preread, &lt->link);
+
+ return NJS_OK;
}


static njs_lexer_token_t *
njs_lexer_token_push(njs_vm_t *vm, njs_lexer_t *lexer)
{
- njs_lexer_token_t *lt;
+ njs_int_t ret;
+ njs_lexer_token_t *token;

- lt = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
- if (njs_slow_path(lt == NULL)) {
+ token = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
+ if (njs_slow_path(token == NULL)) {
return NULL;
}

- lt->token = njs_lexer_next_token(lexer, lt);
+ do {
+ ret = njs_lexer_next_token(lexer, token);
+ if (njs_slow_path(ret != NJS_OK)) {
+ return NULL;
+ }

- njs_queue_insert_tail(&lexer->preread, &lt->link);
+ } while (token->type == NJS_TOKEN_COMMENT);

- return lt;
+ njs_queue_insert_tail(&lexer->preread, &token->link);
+
+ return token;
}


@@ -403,197 +432,223 @@ njs_lexer_token_pop(njs_lexer_t *lexer)


njs_int_t
-njs_lexer_rollback(njs_vm_t *vm, njs_lexer_t *lexer)
+njs_lexer_next_token(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
- njs_lexer_token_t *lt;
+ u_char c, *p;
+
+ c = ' ';

- lt = njs_mp_zalloc(vm->mem_pool, sizeof(njs_lexer_token_t));
- if (njs_slow_path(lt == NULL)) {
- return NJS_ERROR;
+ while (lexer->start < lexer->end) {
+ c = *lexer->start++;
+
+ if (njs_tokens[c] != NJS_TOKEN_SPACE) {
+ break;
+ }
}

- *lt = *lexer->lexer_token;
+ lexer->keyword = 0;
+ token->type = njs_tokens[c];
+
+ switch (token->type) {
+
+ case NJS_TOKEN_LETTER:
+ return njs_lexer_word(lexer, token);
+
+ case NJS_TOKEN_DOUBLE_QUOTE:
+ case NJS_TOKEN_SINGLE_QUOTE:
+ njs_lexer_string(lexer, token, c);
+ break;
+
+ case NJS_TOKEN_DOT:
+ p = lexer->start;
+
+ if (p + 1 < lexer->end
+ && njs_tokens[p[0]] == NJS_TOKEN_DOT
+ && njs_tokens[p[1]] == NJS_TOKEN_DOT)
+ {
+ token->text.start = lexer->start - 1;
+ token->text.length = (p - token->text.start) + 2;
+
+ token->type = NJS_TOKEN_ELLIPSIS;
+
+ lexer->start += 2;
+
+ return NJS_OK;
+ }
+
+ if (p == lexer->end || njs_tokens[*p] != NJS_TOKEN_DIGIT) {
+ token->text.start = lexer->start - 1;
+ token->text.length = p - token->text.start;
+
+ token->type = NJS_TOKEN_DOT;
+
+ return NJS_OK;
+ }
+
+ /* Fall through. */
+
+ case NJS_TOKEN_DIGIT:
+ njs_lexer_number(lexer, token);
+ break;
+
+ case NJS_TOKEN_DIVISION:
+ njs_lexer_division(lexer, token);
+ break;
+
+ case NJS_TOKEN_ASSIGNMENT:
+ njs_lexer_multi(lexer, token, njs_assignment_token,
+ njs_nitems(njs_assignment_token));
+ break;
+
+ case NJS_TOKEN_ADDITION:
+ njs_lexer_multi(lexer, token, njs_addition_token,
+ njs_nitems(njs_addition_token));
+ break;

- njs_queue_insert_head(&lexer->preread, &lt->link);
+ case NJS_TOKEN_SUBSTRACTION:
+ njs_lexer_multi(lexer, token, njs_substraction_token,
+ njs_nitems(njs_substraction_token));
+ break;
+
+ case NJS_TOKEN_MULTIPLICATION:
+ njs_lexer_multi(lexer, token, njs_multiplication_token,
+ njs_nitems(njs_multiplication_token));
+ break;
+
+ case NJS_TOKEN_REMAINDER:
+ njs_lexer_multi(lexer, token, njs_remainder_token,
+ njs_nitems(njs_remainder_token));
+ break;
+
+ case NJS_TOKEN_BITWISE_AND:
+ njs_lexer_multi(lexer, token, njs_bitwise_and_token,
+ njs_nitems(njs_bitwise_and_token));
+ break;
+
+ case NJS_TOKEN_BITWISE_XOR:
+ njs_lexer_multi(lexer, token, njs_bitwise_xor_token,
+ njs_nitems(njs_bitwise_xor_token));
+ break;
+
+ case NJS_TOKEN_BITWISE_OR:
+ njs_lexer_multi(lexer, token, njs_bitwise_or_token,
+ njs_nitems(njs_bitwise_or_token));
+ break;
+
+ case NJS_TOKEN_LOGICAL_NOT:
+ njs_lexer_multi(lexer, token, njs_logical_not_token,
+ njs_nitems(njs_logical_not_token));
+ break;
+
+ case NJS_TOKEN_LESS:
+ njs_lexer_multi(lexer, token, njs_less_token,
+ njs_nitems(njs_less_token));
+ break;
+
+ case NJS_TOKEN_GREATER:
+ njs_lexer_multi(lexer, token, njs_greater_token,
+ njs_nitems(njs_greater_token));
+ break;
+
+ case NJS_TOKEN_CONDITIONAL:
+ njs_lexer_multi(lexer, token, njs_conditional_token,
+ njs_nitems(njs_conditional_token));
+ break;
+
+ case NJS_TOKEN_SPACE:
+ token->type = NJS_TOKEN_END;
+ return NJS_OK;
+
+ case NJS_TOKEN_LINE_END:
+ lexer->line++;
+
+ /* Fall through. */
+
+ default:
+ token->text.start = lexer->start - 1;
+ token->text.length = lexer->start - token->text.start;
+
+ break;
+ }

return NJS_OK;
}


-static njs_token_t
-njs_lexer_token_name_resolve(njs_lexer_t *lexer, njs_lexer_token_t *lt)
+static njs_int_t
+njs_lexer_hash_test(njs_lvlhsh_query_t *lhq, void *data)
{
- if (lt->token == NJS_TOKEN_NAME) {
- njs_lexer_keyword(lexer, lt);
+ njs_lexer_entry_t *entry;
+
+ entry = data;
+
+ if (entry->name.length == lhq->key.length
+ && memcmp(entry->name.start, lhq->key.start, lhq->key.length) == 0)
+ {
+ return NJS_OK;
}

- return lt->token;
+ return NJS_DECLINED;
}


-static njs_token_t
-njs_lexer_next_token(njs_lexer_t *lexer, njs_lexer_token_t *lt)
+static njs_lexer_entry_t *
+njs_lexer_keyword_find(njs_lexer_t *lexer, u_char *key, size_t length,
+ uint32_t hash)
{
- u_char c, *p;
- njs_uint_t n;
- njs_token_t token;
- const njs_lexer_multi_t *multi;
-
- lt->text.start = lexer->start;
-
- while (lexer->start < lexer->end) {
- c = *lexer->start++;
-
- token = njs_tokens[c];
-
- switch (token) {
-
- case NJS_TOKEN_SPACE:
- lt->text.start = lexer->start;
- continue;
-
- case NJS_TOKEN_LETTER:
- return njs_lexer_word(lexer, lt, c);
-
- case NJS_TOKEN_DOUBLE_QUOTE:
- case NJS_TOKEN_SINGLE_QUOTE:
- return njs_lexer_string(lexer, lt, c);
-
- case NJS_TOKEN_DOT:
- p = lexer->start;
-
- if (p + 1 < lexer->end
- && njs_tokens[p[0]] == NJS_TOKEN_DOT
- && njs_tokens[p[1]] == NJS_TOKEN_DOT)
- {
- lt->text.length = (p - lt->text.start) + 2;
- lexer->start += 2;
- return NJS_TOKEN_ELLIPSIS;
- }
-
- if (p == lexer->end || njs_tokens[*p] != NJS_TOKEN_DIGIT) {
- lt->text.length = p - lt->text.start;
- return NJS_TOKEN_DOT;
- }
-
- /* Fall through. */
-
- case NJS_TOKEN_DIGIT:
- return njs_lexer_number(lexer, lt, c);
-
- case NJS_TOKEN_ASSIGNMENT:
- n = njs_nitems(njs_assignment_token),
- multi = njs_assignment_token;
-
- goto multi;
-
- case NJS_TOKEN_ADDITION:
- n = njs_nitems(njs_addition_token),
- multi = njs_addition_token;
-
- goto multi;
-
- case NJS_TOKEN_SUBSTRACTION:
- n = njs_nitems(njs_substraction_token),
- multi = njs_substraction_token;
-
- goto multi;
-
- case NJS_TOKEN_MULTIPLICATION:
- n = njs_nitems(njs_multiplication_token),
- multi = njs_multiplication_token;
+ njs_int_t ret;
+ njs_lexer_entry_t *entry;
+ njs_lvlhsh_query_t lhq;

- goto multi;
-
- case NJS_TOKEN_DIVISION:
- token = njs_lexer_division(lexer, token);
-
- if (token != NJS_TOKEN_AGAIN) {
- goto done;
- }
-
- continue;
-
- case NJS_TOKEN_REMAINDER:
- n = njs_nitems(njs_remainder_token),
- multi = njs_remainder_token;
-
- goto multi;
-
- case NJS_TOKEN_BITWISE_AND:
- n = njs_nitems(njs_bitwise_and_token),
- multi = njs_bitwise_and_token;
-
- goto multi;
-
- case NJS_TOKEN_BITWISE_XOR:
- n = njs_nitems(njs_bitwise_xor_token),
- multi = njs_bitwise_xor_token;
-
- goto multi;
-
- case NJS_TOKEN_BITWISE_OR:
- n = njs_nitems(njs_bitwise_or_token),
- multi = njs_bitwise_or_token;
-
- goto multi;
+ lhq.key.start = key;
+ lhq.key.length = length;

- case NJS_TOKEN_LOGICAL_NOT:
- n = njs_nitems(njs_logical_not_token),
- multi = njs_logical_not_token;
-
- goto multi;
-
- case NJS_TOKEN_LESS:
- n = njs_nitems(njs_less_token),
- multi = njs_less_token;
-
- goto multi;
-
- case NJS_TOKEN_GREATER:
- n = njs_nitems(njs_greater_token),
- multi = njs_greater_token;
-
- goto multi;
+ lhq.key_hash = hash;
+ lhq.proto = &njs_lexer_hash_proto;

- case NJS_TOKEN_CONDITIONAL:
- n = njs_nitems(njs_conditional_token),
- multi = njs_conditional_token;
-
- goto multi;
-
- case NJS_TOKEN_LINE_END:
- lexer->line++;
-
- /* Fall through. */
-
- default:
- goto done;
- }
-
- multi:
-
- return njs_lexer_multi(lexer, lt, token, n, multi);
+ ret = njs_lvlhsh_find(lexer->keywords_hash, &lhq);
+ if (ret == NJS_OK) {
+ return lhq.value;
}

- token = NJS_TOKEN_END;
+ entry = njs_mp_alloc(lexer->mem_pool, sizeof(njs_lexer_entry_t));
+ if (njs_slow_path(entry == NULL)) {
+ return NULL;
+ }

-done:
+ entry->name.start = njs_mp_alloc(lexer->mem_pool, length + 1);
+ if (njs_slow_path(entry->name.start == NULL)) {
+ return NULL;
+ }
+
+ memcpy(entry->name.start, key, length);

- lt->text.length = lexer->start - lt->text.start;
+ entry->name.start[length] = '\0';
+ entry->name.length = length;
+
+ lhq.value = entry;
+ lhq.pool = lexer->mem_pool;

- return token;
+ ret = njs_lvlhsh_insert(lexer->keywords_hash, &lhq);
+ if (njs_slow_path(ret != NJS_OK)) {
+ return NULL;
+ }
+
+ return entry;
}


-static njs_token_t
-njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *lt, u_char c)
+static njs_int_t
+njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
- u_char *p;
+ u_char *p, c;
+ uint32_t hash_id;
+ const njs_lexer_entry_t *entry;
+ const njs_lexer_keyword_entry_t *key_entry;

/* TODO: UTF-8 */

- static const uint8_t letter_digit[32] njs_aligned(32) = {
+ static const uint8_t letter_digit[32] njs_aligned(32) = {
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */

/* '&%$ #"! /.-, |*)( 7654 3210 ?>=< ;:98 */
@@ -611,9 +666,10 @@ njs_lexer_word(njs_lexer_t *lexer, njs_l
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
};

- lt->token_line = lexer->line;
- lt->key_hash = njs_djb_hash_add(NJS_DJB_HASH_INIT, c);
- lt->text.start = lexer->start - 1;
+ token->line = lexer->line;
+ token->text.start = lexer->start - 1;
+
+ hash_id = njs_djb_hash_add(NJS_DJB_HASH_INIT, *token->text.start);

for (p = lexer->start; p < lexer->end; p++) {
c = *p;
@@ -622,25 +678,46 @@ njs_lexer_word(njs_lexer_t *lexer, njs_l
break;
}

- lt->key_hash = njs_djb_hash_add(lt->key_hash, c);
+ hash_id = njs_djb_hash_add(hash_id, c);
}

+ token->text.length = p - token->text.start;
lexer->start = p;
- lt->text.length = p - lt->text.start;
+
+ key_entry = njs_lexer_keyword(token->text.start, token->text.length);
+
+ if (key_entry == NULL) {
+ entry = njs_lexer_keyword_find(lexer, token->text.start,
+ token->text.length, hash_id);
+ if (njs_slow_path(entry == NULL)) {
+ return NJS_ERROR;
+ }

- return NJS_TOKEN_NAME;
+ token->type = NJS_TOKEN_NAME;
+
+ } else {
+ entry = &key_entry->value->entry;
+ token->type = key_entry->value->type;
+
+ lexer->keyword = 1;
+ }
+
+ token->unique_id = (uintptr_t) entry;
+
+ return NJS_OK;
}


-static njs_token_t
-njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *lt, u_char quote)
+static void
+njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *token, u_char quote)
{
u_char *p, c;
njs_bool_t escape;

escape = 0;
- lt->text.start = lexer->start;
+
p = lexer->start;
+ token->text.start = p;

while (p < lexer->end) {

@@ -670,31 +747,31 @@ njs_lexer_string(njs_lexer_t *lexer, njs

if (c == quote) {
lexer->start = p;
- lt->text.length = (p - 1) - lt->text.start;
+ token->text.length = (p - 1) - token->text.start;

- if (escape == 0) {
- return NJS_TOKEN_STRING;
- }
-
- return NJS_TOKEN_ESCAPE_STRING;
+ token->type = (escape == 0) ? NJS_TOKEN_STRING
+ : NJS_TOKEN_ESCAPE_STRING;
+ return;
}
}

- lt->text.start--;
- lt->text.length = p - lt->text.start;
+ token->text.start--;
+ token->text.length = p - token->text.start;

- return NJS_TOKEN_UNTERMINATED_STRING;
+ token->type = NJS_TOKEN_UNTERMINATED_STRING;
}


-static njs_token_t
-njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *lt, u_char c)
+static void
+njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
+ u_char c;
const u_char *p;

- lt->text.start = lexer->start - 1;
+ c = lexer->start[-1];
+ p = lexer->start;

- p = lexer->start;
+ token->text.start = lexer->start - 1;

if (c == '0' && p != lexer->end) {

@@ -707,7 +784,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
goto illegal_token;
}

- lt->number = njs_number_hex_parse(&p, lexer->end);
+ token->number = njs_number_hex_parse(&p, lexer->end);

goto done;
}
@@ -721,7 +798,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
goto illegal_token;
}

- lt->number = njs_number_oct_parse(&p, lexer->end);
+ token->number = njs_number_oct_parse(&p, lexer->end);

if (p < lexer->end && (*p == '8' || *p == '9')) {
goto illegal_trailer;
@@ -739,7 +816,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs
goto illegal_token;
}

- lt->number = njs_number_bin_parse(&p, lexer->end);
+ token->number = njs_number_bin_parse(&p, lexer->end);

if (p < lexer->end && (*p >= '2' && *p <= '9')) {
goto illegal_trailer;
@@ -756,14 +833,16 @@ njs_lexer_number(njs_lexer_t *lexer, njs
}

p--;
- lt->number = njs_number_dec_parse(&p, lexer->end);
+ token->number = njs_number_dec_parse(&p, lexer->end);

done:

lexer->start = (u_char *) p;
- lt->text.length = p - lt->text.start;
+ token->text.length = p - token->text.start;

- return NJS_TOKEN_NUMBER;
+ token->type = NJS_TOKEN_NUMBER;
+
+ return;

illegal_trailer:

@@ -771,92 +850,105 @@ illegal_trailer:

illegal_token:

- lt->text.length = p - lt->text.start;
+ token->text.length = p - token->text.start;

- return NJS_TOKEN_ILLEGAL;
+ token->type = NJS_TOKEN_ILLEGAL;
}


-static njs_token_t
-njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *lt, njs_token_t token,
- njs_uint_t n, const njs_lexer_multi_t *multi)
+static void
+njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *token,
+ const njs_lexer_multi_t *multi, size_t length)
{
u_char c;

- if (lexer->start < lexer->end) {
+ token->text.start = lexer->start - 1;
+
+ while (length != 0 && multi != NULL) {
c = lexer->start[0];

- do {
- if (c == multi->symbol) {
- lexer->start++;
+ if (c == multi->symbol) {
+ lexer->start++;

- if (multi->count == 0) {
- token = multi->token;
- break;
- }
+ token->type = multi->token;

- return njs_lexer_multi(lexer, lt, multi->token, multi->count,
- multi->next);
+ if (multi->count == 0) {
+ break;
}

+ length = multi->count;
+ multi = multi->next;
+
+ } else {
+ length--;
multi++;
- n--;
-
- } while (n != 0);
+ }
}

- lt->text.length = lexer->start - lt->text.start;
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Lexer refactoring.

Alexander Borisov 251 February 26, 2020 08:24AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 251
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready