Welcome! Log In Create A New Profile

Advanced

[njs] Introduced RegExpBuiltinExec().

Dmitry Volyntsev
July 02, 2020 10:02AM
details: https://hg.nginx.org/njs/rev/f9082cd59ba6
branches:
changeset: 1447:f9082cd59ba6
user: Dmitry Volyntsev <xeioex@nginx.com>
date: Thu Jul 02 13:59:33 2020 +0000
description:
Introduced RegExpBuiltinExec().

diffstat:

src/njs_pcre.c | 6 +-
src/njs_regex.h | 3 +-
src/njs_regexp.c | 277 +++++++++++++++++++++++++++-------------------
src/njs_regexp.h | 4 +-
src/njs_string.c | 8 +-
src/test/njs_unit_test.c | 3 +
6 files changed, 180 insertions(+), 121 deletions(-)

diffs (523 lines):

diff -r c30a2a805014 -r f9082cd59ba6 src/njs_pcre.c
--- a/src/njs_pcre.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_pcre.c Thu Jul 02 13:59:33 2020 +0000
@@ -280,13 +280,13 @@ njs_pcre_default_free(void *p, void *mem


njs_int_t
-njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t len,
- njs_regex_match_data_t *match_data, njs_regex_context_t *ctx)
+njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
+ size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx)
{
int ret;

ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
- 0, 0, match_data->captures, match_data->ncaptures);
+ off, 0, match_data->captures, match_data->ncaptures);

/* PCRE_ERROR_NOMATCH is -1. */

diff -r c30a2a805014 -r f9082cd59ba6 src/njs_regex.h
--- a/src/njs_regex.h Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_regex.h Thu Jul 02 13:59:33 2020 +0000
@@ -39,7 +39,8 @@ NJS_EXPORT njs_regex_match_data_t *njs_r
NJS_EXPORT void njs_regex_match_data_free(njs_regex_match_data_t *match_data,
njs_regex_context_t *ctx);
NJS_EXPORT njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject,
- size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx);
+ size_t off, size_t len, njs_regex_match_data_t *match_data,
+ njs_regex_context_t *ctx);
NJS_EXPORT int *njs_regex_captures(njs_regex_match_data_t *match_data);


diff -r c30a2a805014 -r f9082cd59ba6 src/njs_regexp.c
--- a/src/njs_regexp.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_regexp.c Thu Jul 02 13:59:33 2020 +0000
@@ -26,9 +26,9 @@ static u_char *njs_regexp_compile_trace_
njs_trace_data_t *td, u_char *start);
static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace,
njs_trace_data_t *td, u_char *start);
-static njs_int_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
- njs_utf8_t utf8, u_char *string, njs_regex_match_data_t *match_data,
- uint32_t last_index);
+static njs_array_t *njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
+ njs_regexp_utf8_t type, njs_string_prop_t *string,
+ njs_regex_match_data_t *data);
static njs_int_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value,
u_char *start, uint32_t size, int32_t length);

@@ -550,7 +550,7 @@ njs_regexp_compile_trace_handler(njs_tra

njs_int_t
njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject,
- size_t len, njs_regex_match_data_t *match_data)
+ size_t off, size_t len, njs_regex_match_data_t *match_data)
{
njs_int_t ret;
njs_trace_handler_t handler;
@@ -558,7 +558,8 @@ njs_regexp_match(njs_vm_t *vm, njs_regex
handler = vm->trace.handler;
vm->trace.handler = njs_regexp_match_trace_handler;

- ret = njs_regex_match(regex, subject, len, match_data, vm->regex_context);
+ ret = njs_regex_match(regex, subject, off, len, match_data,
+ vm->regex_context);

vm->trace.handler = handler;

@@ -617,9 +618,7 @@ static njs_int_t
njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *unused,
njs_value_t *value, njs_value_t *setval, njs_value_t *retval)
{
- uint32_t index, last_index;
- njs_regexp_t *regexp;
- njs_string_prop_t string;
+ njs_regexp_t *regexp;

regexp = njs_object_proto_lookup(njs_object(value), NJS_REGEXP,
njs_regexp_t);
@@ -635,23 +634,7 @@ njs_regexp_prototype_last_index(njs_vm_t
return NJS_OK;
}

- if (njs_slow_path(!njs_is_number(&regexp->last_index))) {
- *retval = regexp->last_index;
- return NJS_OK;
- }
-
- (void) njs_string_prop(&string, &regexp->string);
-
- last_index = njs_number(&regexp->last_index);
-
- if (njs_slow_path(string.size < last_index)) {
- *retval = regexp->last_index;
- return NJS_OK;
- }
-
- index = njs_string_index(&string, last_index);
- njs_set_number(retval, index);
-
+ *retval = regexp->last_index;
return NJS_OK;
}

@@ -802,8 +785,8 @@ njs_regexp_prototype_test(njs_vm_t *vm,
}
}

- match = njs_regexp_match(vm, regex, string.start, string.size,
- match_data);
+ match = njs_regexp_match(vm, regex, string.start, 0, string.size,
+ match_data);
if (match >= 0) {
retval = &njs_value_true;

@@ -844,36 +827,25 @@ done:
}


-njs_int_t
-njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
- njs_index_t unused)
+/**
+ * TODO: sticky, unicode flags.
+ */
+static njs_int_t
+njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s,
+ njs_value_t *retval)
{
+ size_t length, offset;
int64_t last_index;
njs_int_t ret;
- njs_utf8_t utf8;
- njs_value_t *value, lvalue;
+ njs_array_t *result;
njs_regexp_t *regexp;
njs_string_prop_t string;
njs_regexp_utf8_t type;
njs_regexp_pattern_t *pattern;
njs_regex_match_data_t *match_data;

- if (!njs_is_regexp(njs_arg(args, nargs, 0))) {
- njs_type_error(vm, "\"this\" argument is not a regexp");
- return NJS_ERROR;
- }
-
- value = njs_lvalue_arg(&lvalue, args, nargs, 1);
-
- if (!njs_is_string(value)) {
- ret = njs_value_to_string(vm, value, value);
- if (njs_slow_path(ret != NJS_OK)) {
- return ret;
- }
- }
-
- regexp = njs_regexp(&args[0]);
- regexp->string = *value;
+ regexp = njs_regexp(r);
+ regexp->string = *s;
pattern = regexp->pattern;

ret = njs_value_to_length(vm, &regexp->last_index, &last_index);
@@ -885,94 +857,113 @@ njs_regexp_prototype_exec(njs_vm_t *vm,
last_index = 0;
}

- (void) njs_string_prop(&string, value);
+ length = njs_string_prop(&string, s);
+
+ if (njs_slow_path((size_t) last_index > length)) {
+ goto not_found;
+ }
+
+ type = NJS_REGEXP_BYTE;

- if (string.size >= (size_t) last_index) {
- utf8 = NJS_STRING_BYTE;
- type = NJS_REGEXP_BYTE;
+ if (length != string.size) {
+ /* UTF-8 string. */
+ type = NJS_REGEXP_UTF8;
+ }
+
+ pattern = regexp->pattern;
+
+ if (njs_slow_path(!njs_regex_is_valid(&pattern->regex[type]))) {
+ goto not_found;
+ }

- if (string.length != 0) {
- utf8 = NJS_STRING_ASCII;
- type = NJS_REGEXP_UTF8;
+ match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context);
+ if (njs_slow_path(match_data == NULL)) {
+ njs_memory_error(vm);
+ return NJS_ERROR;
+ }
+
+ if (type != NJS_REGEXP_UTF8) {
+ offset = last_index;

- if (string.length != string.size) {
- utf8 = NJS_STRING_UTF8;
- }
+ } else {
+ /* UTF-8 string. */
+ offset = njs_string_offset(string.start, string.start + string.size,
+ last_index) - string.start;
+ }
+
+ ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset,
+ string.size, match_data);
+ if (ret >= 0) {
+ result = njs_regexp_exec_result(vm, regexp, type, &string, match_data);
+ if (njs_slow_path(result == NULL)) {
+ return NJS_ERROR;
}

- pattern = regexp->pattern;
-
- if (njs_regex_is_valid(&pattern->regex[type])) {
- string.start += last_index;
- string.size -= last_index;
-
- match_data = njs_regex_match_data(&pattern->regex[type],
- vm->regex_context);
- if (njs_slow_path(match_data == NULL)) {
- njs_memory_error(vm);
- return NJS_ERROR;
- }
+ njs_set_array(retval, result);
+ return NJS_OK;
+ }

- ret = njs_regexp_match(vm, &pattern->regex[type], string.start,
- string.size, match_data);
- if (ret >= 0) {
- return njs_regexp_exec_result(vm, regexp, utf8, string.start,
- match_data, last_index);
- }
+ if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
+ njs_regex_match_data_free(match_data, vm->regex_context);

- if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
- njs_regex_match_data_free(match_data, vm->regex_context);
+ return NJS_ERROR;
+ }

- return NJS_ERROR;
- }
- }
- }
+not_found:

if (pattern->global) {
njs_set_number(&regexp->last_index, 0);
}

- vm->retval = njs_value_null;
+ njs_set_null(retval);

return NJS_OK;
}


-static njs_int_t
-njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8,
- u_char *string, njs_regex_match_data_t *match_data, uint32_t last_index)
+static njs_array_t *
+njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
+ njs_regexp_utf8_t type, njs_string_prop_t *string,
+ njs_regex_match_data_t *match_data)
{
- int *captures;
- u_char *start;
- int32_t size, length;
- njs_int_t ret;
- njs_uint_t i, n;
- njs_array_t *array;
- njs_value_t name;
- njs_object_t *groups;
- njs_object_prop_t *prop;
- njs_regexp_group_t *group;
- njs_lvlhsh_query_t lhq;
+ int *captures;
+ u_char *start;
+ int32_t size, length;
+ njs_int_t ret;
+ njs_uint_t i, n;
+ njs_array_t *array;
+ njs_value_t name;
+ njs_object_t *groups;
+ njs_object_prop_t *prop;
+ njs_regexp_group_t *group;
+ njs_lvlhsh_query_t lhq;
+ njs_regexp_pattern_t *pattern;

static const njs_value_t string_index = njs_string("index");
static const njs_value_t string_input = njs_string("input");
static const njs_value_t string_groups = njs_string("groups");

- array = njs_array_alloc(vm, 0, regexp->pattern->ncaptures, 0);
+ pattern = regexp->pattern;
+ array = njs_array_alloc(vm, 0, pattern->ncaptures, 0);
if (njs_slow_path(array == NULL)) {
goto fail;
}

captures = njs_regex_captures(match_data);

- for (i = 0; i < regexp->pattern->ncaptures; i++) {
+ for (i = 0; i < pattern->ncaptures; i++) {
n = 2 * i;

if (captures[n] != -1) {
- start = &string[captures[n]];
+ start = &string->start[captures[n]];
size = captures[n + 1] - captures[n];

- length = njs_string_calc_length(utf8, start, size);
+ if (type == NJS_REGEXP_UTF8) {
+ length = njs_max(njs_utf8_length(start, size), 0);
+
+ } else {
+ length = size;
+ }

ret = njs_regexp_string_create(vm, &array->start[i], start, size,
length);
@@ -985,17 +976,17 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
}
}

+ /* FIXME: implement fast CreateDataPropertyOrThrow(). */
prop = njs_object_prop_alloc(vm, &string_index, &njs_value_undefined, 1);
if (njs_slow_path(prop == NULL)) {
goto fail;
}

- /* TODO: Non UTF-8 position */
+ njs_set_number(&prop->value, njs_string_index(string, captures[0]));

- njs_set_number(&prop->value, last_index + captures[0]);
-
- if (regexp->pattern->global) {
- njs_set_number(&regexp->last_index, last_index + captures[1]);
+ if (pattern->global) {
+ njs_set_number(&regexp->last_index,
+ njs_string_index(string, captures[1]));
}

lhq.key_hash = NJS_INDEX_HASH;
@@ -1038,7 +1029,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
goto insert_fail;
}

- if (regexp->pattern->ngroups != 0) {
+ if (pattern->ngroups != 0) {
groups = njs_object_alloc(vm);
if (njs_slow_path(groups == NULL)) {
goto fail;
@@ -1049,7 +1040,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
i = 0;

do {
- group = &regexp->pattern->groups[i];
+ group = &pattern->groups[i];

ret = njs_string_set(vm, &name, group->name.start,
group->name.length);
@@ -1074,11 +1065,9 @@ njs_regexp_exec_result(njs_vm_t *vm, njs

i++;

- } while (i < regexp->pattern->ngroups);
+ } while (i < pattern->ngroups);
}

- njs_set_array(&vm->retval, array);
-
ret = NJS_OK;
goto done;

@@ -1094,7 +1083,71 @@ done:

njs_regex_match_data_free(match_data, vm->regex_context);

- return ret;
+ return (ret == NJS_OK) ? array : NULL;
+}
+
+
+njs_int_t
+njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
+ njs_index_t unused)
+{
+ njs_int_t ret;
+ njs_value_t *r, *s;
+ njs_value_t string_lvalue;
+
+ r = njs_argument(args, 0);
+
+ if (njs_slow_path(!njs_is_regexp(r))) {
+ njs_type_error(vm, "\"this\" argument is not a regexp");
+ return NJS_ERROR;
+ }
+
+ s = njs_lvalue_arg(&string_lvalue, args, nargs, 1);
+
+ ret = njs_value_to_string(vm, s, s);
+ if (njs_slow_path(ret != NJS_OK)) {
+ return ret;
+ }
+
+ return njs_regexp_builtin_exec(vm, r, s, &vm->retval);
+}
+
+
+njs_int_t
+njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s,
+ njs_value_t *retval)
+{
+ njs_int_t ret;
+ njs_value_t exec;
+
+ static const njs_value_t string_exec = njs_string("exec");
+
+ ret = njs_value_property(vm, r, njs_value_arg(&string_exec), &exec);
+ if (njs_slow_path(ret == NJS_ERROR)) {
+ return NJS_ERROR;
+ }
+
+ if (njs_is_function(&exec)) {
+ ret = njs_function_call(vm, njs_function(&exec), r, s, 1, retval);
+ if (njs_slow_path(ret == NJS_ERROR)) {
+ return NJS_ERROR;
+ }
+
+ if (njs_slow_path(!njs_is_object(retval) && !njs_is_null(retval))) {
+ njs_type_error(vm, "unexpected \"%s\" retval in njs_regexp_exec()",
+ njs_type_string(retval->type));
+ return NJS_ERROR;
+ }
+
+ return NJS_OK;
+ }
+
+ if (njs_slow_path(!njs_is_regexp(r))) {
+ njs_type_error(vm, "receiver argument is not a regexp");
+ return NJS_ERROR;
+ }
+
+ return njs_regexp_builtin_exec(vm, r, s, retval);
}


diff -r c30a2a805014 -r f9082cd59ba6 src/njs_regexp.h
--- a/src/njs_regexp.h Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_regexp.h Thu Jul 02 13:59:33 2020 +0000
@@ -24,8 +24,10 @@ njs_regexp_flags_t njs_regexp_flags(u_ch
njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm,
u_char *string, size_t length, njs_regexp_flags_t flags);
njs_int_t njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex,
- const u_char *subject, size_t len, njs_regex_match_data_t *match_data);
+ const u_char *subject, size_t off, size_t len, njs_regex_match_data_t *d);
njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern);
+njs_int_t njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s,
+ njs_value_t *retval);
njs_int_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args,
njs_uint_t nargs, njs_index_t unused);

diff -r c30a2a805014 -r f9082cd59ba6 src/njs_string.c
--- a/src/njs_string.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/njs_string.c Thu Jul 02 13:59:33 2020 +0000
@@ -3022,7 +3022,7 @@ njs_string_prototype_search(njs_vm_t *vm

if (njs_regex_is_valid(&pattern->regex[n])) {
ret = njs_regexp_match(vm, &pattern->regex[n], string.start,
- string.size, vm->single_match_data);
+ 0, string.size, vm->single_match_data);
if (ret >= 0) {
captures = njs_regex_captures(vm->single_match_data);
index = njs_string_index(&string, captures[0]);
@@ -3147,7 +3147,7 @@ njs_string_match_multiple(njs_vm_t *vm,
end = p + string.size;

do {
- ret = njs_regexp_match(vm, &pattern->regex[type], p, string.size,
+ ret = njs_regexp_match(vm, &pattern->regex[type], p, 0, string.size,
vm->single_match_data);
if (ret < 0) {
if (njs_fast_path(ret == NJS_REGEX_NOMATCH)) {
@@ -3286,7 +3286,7 @@ njs_string_prototype_split(njs_vm_t *vm,
end = string.start + string.size;

do {
- ret = njs_regexp_match(vm, &pattern->regex[type], start,
+ ret = njs_regexp_match(vm, &pattern->regex[type], start, 0,
end - start, vm->single_match_data);
if (ret >= 0) {
captures = njs_regex_captures(vm->single_match_data);
@@ -3564,7 +3564,7 @@ njs_string_replace_regexp(njs_vm_t *vm,

do {
ret = njs_regexp_match(vm, &pattern->regex[r->type],
- r->part[0].start, r->part[0].size,
+ r->part[0].start, 0, r->part[0].size,
r->match_data);

if (ret < 0) {
diff -r c30a2a805014 -r f9082cd59ba6 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c Thu Jul 02 12:59:54 2020 +0000
+++ b/src/test/njs_unit_test.c Thu Jul 02 13:59:33 2020 +0000
@@ -9711,6 +9711,9 @@ static njs_unit_test_t njs_test[] =
{ njs_str("var s; var r = /./g; while (s = r.exec('abc')); s"),
njs_str("null") },

+ { njs_str("(/α/).exec('γαβγ').index"),
+ njs_str("1") },
+
{ njs_str("var r = /LS/i.exec(false); r[0]"),
njs_str("ls") },

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Introduced RegExpBuiltinExec().

Dmitry Volyntsev 226 July 02, 2020 10:02AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 320
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready