Welcome! Log In Create A New Profile

Advanced

[njs] RegExp: incapsulating PCRE API.

Dmitry Volyntsev
November 11, 2021 09:32AM
details: https://hg.nginx.org/njs/rev/67ee2e4907a8
branches:
changeset: 1743:67ee2e4907a8
user: Dmitry Volyntsev <xeioex@nginx.com>
date: Thu Nov 11 14:26:30 2021 +0000
description:
RegExp: incapsulating PCRE API.

diffstat:

src/njs_main.h | 1 -
src/njs_parser.c | 2 +-
src/njs_pcre.c | 214 +++++++++++++++++++++++++++++++------
src/njs_pcre.h | 40 -------
src/njs_regex.h | 65 +++++++++--
src/njs_regexp.c | 262 +++++++++++++---------------------------------
src/njs_regexp.h | 16 +--
src/njs_string.c | 31 ++--
src/njs_vm.h | 3 +-
src/test/njs_unit_test.c | 2 +
10 files changed, 326 insertions(+), 310 deletions(-)

diffs (truncated from 1159 to 1000 lines):

diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_main.h
--- a/src/njs_main.h Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_main.h Thu Nov 11 14:26:30 2021 +0000
@@ -38,7 +38,6 @@
#include <njs_sprintf.h>
#include <njs_assert.h>

-#include <njs_pcre.h>
#include <njs_regex.h>

#include <njs_md5.h>
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_parser.c
--- a/src/njs_parser.c Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_parser.c Thu Nov 11 14:26:30 2021 +0000
@@ -1198,7 +1198,7 @@ njs_parser_regexp_literal(njs_parser_t *
njs_int_t ret;
njs_lexer_t *lexer;
njs_value_t *value, retval;
- njs_regexp_flags_t flags;
+ njs_regex_flags_t flags;
njs_regexp_pattern_t *pattern;

static const njs_value_t string_message = njs_string("message");
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_pcre.c
--- a/src/njs_pcre.c Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_pcre.c Thu Nov 11 14:26:30 2021 +0000
@@ -7,21 +7,23 @@

#include <njs_main.h>

+#include <pcre.h>
+

static void *njs_pcre_malloc(size_t size);
static void njs_pcre_free(void *p);


-static njs_regex_context_t *regex_context;
+static njs_regex_generic_ctx_t *regex_context;


-njs_regex_context_t *
-njs_regex_context_create(njs_pcre_malloc_t private_malloc,
+njs_regex_generic_ctx_t *
+njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
njs_pcre_free_t private_free, void *memory_data)
{
- njs_regex_context_t *ctx;
+ njs_regex_generic_ctx_t *ctx;

- ctx = private_malloc(sizeof(njs_regex_context_t), memory_data);
+ ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data);

if (njs_fast_path(ctx != NULL)) {
ctx->private_malloc = private_malloc;
@@ -33,15 +35,138 @@ njs_regex_context_create(njs_pcre_malloc
}


+njs_regex_compile_ctx_t *
+njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
+{
+ return ctx;
+}
+
+
+/*
+ * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid. Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * 2) escaping zero byte characters as "\u0000".
+ *
+ * Escaping it here as a workaround.
+ */
+
+njs_int_t
+njs_regex_escape(njs_mp_t *mp, njs_str_t *text)
+{
+ size_t brackets, zeros;
+ u_char *p, *dst, *start, *end;
+ njs_bool_t in;
+
+ start = text->start;
+ end = text->start + text->length;
+
+ in = 0;
+ zeros = 0;
+ brackets = 0;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ in = 1;
+ break;
+
+ case ']':
+ if (!in) {
+ brackets++;
+ }
+
+ in = 0;
+ break;
+
+ case '\\':
+ p++;
+
+ if (p == end || *p != '\0') {
+ break;
+ }
+
+ /* Fall through. */
+
+ case '\0':
+ zeros++;
+ break;
+ }
+ }
+
+ if (!brackets && !zeros) {
+ return NJS_OK;
+ }
+
+ text->length = text->length + brackets + zeros * njs_length("\\u0000");
+
+ text->start = njs_mp_alloc(mp, text->length);
+ if (njs_slow_path(text->start == NULL)) {
+ return NJS_ERROR;
+ }
+
+ in = 0;
+ dst = text->start;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ in = 1;
+ break;
+
+ case ']':
+ if (!in) {
+ *dst++ = '\\';
+ }
+
+ in = 0;
+ break;
+
+ case '\\':
+ *dst++ = *p++;
+
+ if (p == end) {
+ goto done;
+ }
+
+ if (*p != '\0') {
+ break;
+ }
+
+ /* Fall through. */
+
+ case '\0':
+ dst = njs_cpymem(dst, "\\u0000", 6);
+ continue;
+ }
+
+ *dst++ = *p;
+ }
+
+done:
+
+ text->length = dst - text->start;
+
+ return NJS_OK;
+}
+
+
njs_int_t
njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len,
- njs_uint_t options, njs_regex_context_t *ctx)
+ njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace)
{
- int ret, err, erroff;
- char *pattern, *error;
- void *(*saved_malloc)(size_t size);
- void (*saved_free)(void *p);
- const char *errstr;
+ int ret, err, erroff;
+ char *pattern, *error;
+ void *(*saved_malloc)(size_t size);
+ void (*saved_free)(void *p);
+ njs_uint_t options;
+ const char *errstr;
+ njs_regex_generic_ctx_t *ctx;
+
+ ctx = cctx;

ret = NJS_ERROR;

@@ -51,31 +176,43 @@ njs_regex_compile(njs_regex_t *regex, u_
pcre_free = njs_pcre_free;
regex_context = ctx;

- if (len == 0) {
- pattern = (char *) source;
+#ifdef PCRE_JAVASCRIPT_COMPAT
+ /* JavaScript compatibility has been introduced in PCRE-7.7. */
+ options = PCRE_JAVASCRIPT_COMPAT;
+#else
+ options = 0;
+#endif
+
+ if ((flags & NJS_REGEX_IGNORE_CASE)) {
+ options |= PCRE_CASELESS;
+ }

- } else {
- pattern = ctx->private_malloc(len + 1, ctx->memory_data);
- if (njs_slow_path(pattern == NULL)) {
- goto done;
- }
+ if ((flags & NJS_REGEX_MULTILINE)) {
+ options |= PCRE_MULTILINE;
+ }

- memcpy(pattern, source, len);
- pattern[len] = '\0';
+ if ((flags & NJS_REGEX_STICKY)) {
+ options |= PCRE_ANCHORED;
}

+ if ((flags & NJS_REGEX_UTF8)) {
+ options |= PCRE_UTF8;
+ }
+
+ pattern = (char *) source;
+
regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL);

if (njs_slow_path(regex->code == NULL)) {
error = pattern + erroff;

if (*error != '\0') {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+ njs_alert(trace, NJS_LEVEL_ERROR,
"pcre_compile(\"%s\") failed: %s at \"%s\"",
pattern, errstr, error);

} else {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+ njs_alert(trace, NJS_LEVEL_ERROR,
"pcre_compile(\"%s\") failed: %s", pattern, errstr);
}

@@ -87,7 +224,7 @@ njs_regex_compile(njs_regex_t *regex, u_
regex->extra = pcre_study(regex->code, 0, &errstr);

if (njs_slow_path(errstr != NULL)) {
- njs_alert(ctx->trace, NJS_LEVEL_WARN,
+ njs_alert(trace, NJS_LEVEL_WARN,
"pcre_study(\"%s\") failed: %s", pattern, errstr);
}

@@ -95,7 +232,7 @@ njs_regex_compile(njs_regex_t *regex, u_
&regex->ncaptures);

if (njs_slow_path(err < 0)) {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+ njs_alert(trace, NJS_LEVEL_ERROR,
"pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d",
pattern, err);

@@ -106,7 +243,7 @@ njs_regex_compile(njs_regex_t *regex, u_
&regex->backrefmax);

if (njs_slow_path(err < 0)) {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+ njs_alert(trace, NJS_LEVEL_ERROR,
"pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d",
pattern, err);

@@ -121,7 +258,7 @@ njs_regex_compile(njs_regex_t *regex, u_
&regex->nentries);

if (njs_slow_path(err < 0)) {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR,
+ njs_alert(trace, NJS_LEVEL_ERROR,
"pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
pattern, err);

@@ -133,7 +270,7 @@ njs_regex_compile(njs_regex_t *regex, u_
&regex->entry_size);

if (njs_slow_path(err < 0)) {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
"PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);

goto done;
@@ -143,7 +280,7 @@ njs_regex_compile(njs_regex_t *regex, u_
&regex->entries);

if (njs_slow_path(err < 0)) {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
"PCRE_INFO_NAMETABLE) failed: %d", pattern, err);

goto done;
@@ -193,7 +330,7 @@ njs_regex_named_captures(njs_regex_t *re


njs_regex_match_data_t *
-njs_regex_match_data(njs_regex_t *regex, njs_regex_context_t *ctx)
+njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx)
{
size_t size;
njs_uint_t ncaptures;
@@ -222,7 +359,7 @@ njs_regex_match_data(njs_regex_t *regex,

void
njs_regex_match_data_free(njs_regex_match_data_t *match_data,
- njs_regex_context_t *ctx)
+ njs_regex_generic_ctx_t *ctx)
{
ctx->private_free(match_data, ctx->memory_data);
}
@@ -244,25 +381,28 @@ njs_pcre_free(void *p)

njs_int_t
njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
- size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx)
+ size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace)
{
int ret;

ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
off, 0, match_data->captures, match_data->ncaptures);

- /* PCRE_ERROR_NOMATCH is -1. */
+ if (ret <= PCRE_ERROR_NOMATCH) {
+ if (ret == PCRE_ERROR_NOMATCH) {
+ return NJS_DECLINED;
+ }

- if (njs_slow_path(ret < PCRE_ERROR_NOMATCH)) {
- njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
+ return NJS_ERROR;
}

return ret;
}


-int *
-njs_regex_captures(njs_regex_match_data_t *match_data)
+size_t
+njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n)
{
- return match_data->captures;
+ return match_data->captures[n];
}
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_pcre.h
--- a/src/njs_pcre.h Wed Nov 10 14:50:16 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-
-/*
- * Copyright (C) Igor Sysoev
- * Copyright (C) NGINX, Inc.
- */
-
-#ifndef _NJS_PCRE_H_INCLUDED_
-#define _NJS_PCRE_H_INCLUDED_
-
-
-#include <pcre.h>
-
-
-#define NJS_REGEX_NOMATCH PCRE_ERROR_NOMATCH
-
-
-struct njs_regex_s {
- pcre *code;
- pcre_extra *extra;
- int ncaptures;
- int backrefmax;
- int nentries;
- int entry_size;
- char *entries;
-};
-
-
-struct njs_regex_match_data_s {
- int ncaptures;
- /*
- * Each capture is stored in 3 "int" vector elements.
- * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements.
- * The 3rd bookkeeping elements are at the end of the vector.
- * The first vector is for the "$0" capture and it is always allocated.
- */
- int captures[3];
-};
-
-
-#endif /* _NJS_PCRE_H_INCLUDED_ */
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_regex.h
--- a/src/njs_regex.h Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_regex.h Thu Nov 11 14:26:30 2021 +0000
@@ -7,39 +7,78 @@
#ifndef _NJS_REGEX_H_INCLUDED_
#define _NJS_REGEX_H_INCLUDED_

+#define NJS_REGEX_UNSET (size_t) (-1)
+
+
+typedef enum {
+ NJS_REGEX_INVALID_FLAG = -1,
+ NJS_REGEX_NO_FLAGS = 0,
+ NJS_REGEX_GLOBAL = 1,
+ NJS_REGEX_IGNORE_CASE = 2,
+ NJS_REGEX_MULTILINE = 4,
+ NJS_REGEX_STICKY = 8,
+ NJS_REGEX_UTF8 = 16,
+} njs_regex_flags_t;
+

typedef void *(*njs_pcre_malloc_t)(size_t size, void *memory_data);
typedef void (*njs_pcre_free_t)(void *p, void *memory_data);


-typedef struct njs_regex_s njs_regex_t;
-typedef struct njs_regex_match_data_s njs_regex_match_data_t;
-
-
typedef struct {
njs_pcre_malloc_t private_malloc;
njs_pcre_free_t private_free;
void *memory_data;
- njs_trace_t *trace;
-} njs_regex_context_t;
+} njs_regex_generic_ctx_t;
+
+
+#define njs_regex_compile_ctx_t void
+
+
+typedef struct {
+ void *code;
+ void *extra;
+ int ncaptures;
+ int backrefmax;
+ int nentries;
+ int entry_size;
+ char *entries;
+} njs_regex_t;


-NJS_EXPORT njs_regex_context_t *
- njs_regex_context_create(njs_pcre_malloc_t private_malloc,
+typedef struct {
+ int ncaptures;
+ /*
+ * Each capture is stored in 3 "int" vector elements.
+ * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements.
+ * The 3rd bookkeeping elements are at the end of the vector.
+ * The first vector is for the "$0" capture and it is always allocated.
+ */
+ int captures[3];
+} njs_regex_match_data_t;
+
+
+NJS_EXPORT njs_regex_generic_ctx_t *
+ njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
njs_pcre_free_t private_free, void *memory_data);
+NJS_EXPORT njs_regex_compile_ctx_t *njs_regex_compile_ctx_create(
+ njs_regex_generic_ctx_t *ctx);
+NJS_EXPORT njs_int_t njs_regex_escape(njs_mp_t *mp, njs_str_t *text);
NJS_EXPORT njs_int_t njs_regex_compile(njs_regex_t *regex, u_char *source,
- size_t len, njs_uint_t options, njs_regex_context_t *ctx);
+ size_t len, njs_regex_flags_t flags, njs_regex_compile_ctx_t *ctx,
+ njs_trace_t *trace);
NJS_EXPORT njs_bool_t njs_regex_is_valid(njs_regex_t *regex);
NJS_EXPORT njs_int_t njs_regex_named_captures(njs_regex_t *regex,
njs_str_t *name, int n);
NJS_EXPORT njs_regex_match_data_t *njs_regex_match_data(njs_regex_t *regex,
- njs_regex_context_t *ctx);
+ njs_regex_generic_ctx_t *ctx);
NJS_EXPORT void njs_regex_match_data_free(njs_regex_match_data_t *match_data,
- njs_regex_context_t *ctx);
+ njs_regex_generic_ctx_t *ctx);
NJS_EXPORT njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject,
size_t off, size_t len, njs_regex_match_data_t *match_data,
- njs_regex_context_t *ctx);
-NJS_EXPORT int *njs_regex_captures(njs_regex_match_data_t *match_data);
+ njs_trace_t *trace);
+NJS_EXPORT size_t njs_regex_capture(njs_regex_match_data_t *match_data,
+ njs_uint_t n);


#endif /* _NJS_REGEX_H_INCLUDED_ */
diff -r dedadba0ee87 -r 67ee2e4907a8 src/njs_regexp.c
--- a/src/njs_regexp.c Wed Nov 10 14:50:16 2021 +0000
+++ b/src/njs_regexp.c Thu Nov 11 14:26:30 2021 +0000
@@ -20,7 +20,7 @@ static void njs_regexp_free(void *p, voi
static njs_int_t njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *args,
njs_uint_t nargs, njs_index_t unused);
static int njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex,
- u_char *source, int options);
+ u_char *source, size_t len, njs_regex_flags_t flags);
static u_char *njs_regexp_compile_trace_handler(njs_trace_t *trace,
njs_trace_data_t *td, u_char *start);
static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace,
@@ -37,21 +37,26 @@ const njs_value_t njs_string_lindex = n
njs_int_t
njs_regexp_init(njs_vm_t *vm)
{
- vm->regex_context = njs_regex_context_create(njs_regexp_malloc,
- njs_regexp_free, vm->mem_pool);
- if (njs_slow_path(vm->regex_context == NULL)) {
+ vm->regex_generic_ctx = njs_regex_generic_ctx_create(njs_regexp_malloc,
+ njs_regexp_free,
+ vm->mem_pool);
+ if (njs_slow_path(vm->regex_generic_ctx == NULL)) {
njs_memory_error(vm);
return NJS_ERROR;
}

- vm->single_match_data = njs_regex_match_data(NULL, vm->regex_context);
+ vm->regex_compile_ctx = njs_regex_compile_ctx_create(vm->regex_generic_ctx);
+ if (njs_slow_path(vm->regex_compile_ctx == NULL)) {
+ njs_memory_error(vm);
+ return NJS_ERROR;
+ }
+
+ vm->single_match_data = njs_regex_match_data(NULL, vm->regex_generic_ctx);
if (njs_slow_path(vm->single_match_data == NULL)) {
njs_memory_error(vm);
return NJS_ERROR;
}

- vm->regex_context->trace = &vm->trace;
-
return NJS_OK;
}

@@ -70,10 +75,10 @@ njs_regexp_free(void *p, void *memory_da
}


-static njs_regexp_flags_t
+static njs_regex_flags_t
njs_regexp_value_flags(njs_vm_t *vm, const njs_value_t *regexp)
{
- njs_regexp_flags_t flags;
+ njs_regex_flags_t flags;
njs_regexp_pattern_t *pattern;

flags = 0;
@@ -81,19 +86,19 @@ njs_regexp_value_flags(njs_vm_t *vm, con
pattern = njs_regexp_pattern(regexp);

if (pattern->global) {
- flags |= NJS_REGEXP_GLOBAL;
+ flags |= NJS_REGEX_GLOBAL;
}

if (pattern->ignore_case) {
- flags |= NJS_REGEXP_IGNORE_CASE;
+ flags |= NJS_REGEX_IGNORE_CASE;
}

if (pattern->multiline) {
- flags |= NJS_REGEXP_MULTILINE;
+ flags |= NJS_REGEX_MULTILINE;
}

if (pattern->sticky) {
- flags |= NJS_REGEXP_STICKY;
+ flags |= NJS_REGEX_STICKY;
}

return flags;
@@ -108,7 +113,7 @@ njs_regexp_constructor(njs_vm_t *vm, njs
njs_int_t ret;
njs_str_t string;
njs_value_t source, *pattern, *flags;
- njs_regexp_flags_t re_flags;
+ njs_regex_flags_t re_flags;

pattern = njs_arg(args, nargs, 1);

@@ -168,7 +173,7 @@ njs_regexp_constructor(njs_vm_t *vm, njs

njs_int_t
njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
- size_t length, njs_regexp_flags_t flags)
+ size_t length, njs_regex_flags_t flags)
{
njs_regexp_t *regexp;
njs_regexp_pattern_t *pattern;
@@ -200,143 +205,30 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
}


-/*
- * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
- * lone closing square brackets as invalid. Whereas according
- * to ES6: 11.8.5 it is a valid regexp expression.
- *
- * 2) escaping zero byte characters as "\u0000".
- *
- * Escaping it here as a workaround.
- */
-
-njs_inline njs_int_t
-njs_regexp_escape(njs_vm_t *vm, njs_str_t *text)
-{
- size_t brackets, zeros;
- u_char *p, *dst, *start, *end;
- njs_bool_t in;
-
- start = text->start;
- end = text->start + text->length;
-
- in = 0;
- zeros = 0;
- brackets = 0;
-
- for (p = start; p < end; p++) {
-
- switch (*p) {
- case '[':
- in = 1;
- break;
-
- case ']':
- if (!in) {
- brackets++;
- }
-
- in = 0;
- break;
-
- case '\\':
- p++;
-
- if (p == end || *p != '\0') {
- break;
- }
-
- /* Fall through. */
-
- case '\0':
- zeros++;
- break;
- }
- }
-
- if (!brackets && !zeros) {
- return NJS_OK;
- }
-
- text->length = text->length + brackets + zeros * njs_length("\\u0000");
-
- text->start = njs_mp_alloc(vm->mem_pool, text->length);
- if (njs_slow_path(text->start == NULL)) {
- njs_memory_error(vm);
- return NJS_ERROR;
- }
-
- in = 0;
- dst = text->start;
-
- for (p = start; p < end; p++) {
-
- switch (*p) {
- case '[':
- in = 1;
- break;
-
- case ']':
- if (!in) {
- *dst++ = '\\';
- }
-
- in = 0;
- break;
-
- case '\\':
- *dst++ = *p++;
-
- if (p == end) {
- goto done;
- }
-
- if (*p != '\0') {
- break;
- }
-
- /* Fall through. */
-
- case '\0':
- dst = njs_cpymem(dst, "\\u0000", 6);
- continue;
- }
-
- *dst++ = *p;
- }
-
-done:
-
- text->length = dst - text->start;
-
- return NJS_OK;
-}
-
-
-njs_regexp_flags_t
+njs_regex_flags_t
njs_regexp_flags(u_char **start, u_char *end)
{
- u_char *p;
- njs_regexp_flags_t flags, flag;
+ u_char *p;
+ njs_regex_flags_t flags, flag;

- flags = NJS_REGEXP_NO_FLAGS;
+ flags = NJS_REGEX_NO_FLAGS;

for (p = *start; p < end; p++) {
switch (*p) {
case 'g':
- flag = NJS_REGEXP_GLOBAL;
+ flag = NJS_REGEX_GLOBAL;
break;

case 'i':
- flag = NJS_REGEXP_IGNORE_CASE;
+ flag = NJS_REGEX_IGNORE_CASE;
break;

case 'm':
- flag = NJS_REGEXP_MULTILINE;
+ flag = NJS_REGEX_MULTILINE;
break;

case 'y':
- flag = NJS_REGEXP_STICKY;
+ flag = NJS_REGEX_STICKY;
break;

default:
@@ -364,15 +256,15 @@ invalid:

*start = p + 1;

- return NJS_REGEXP_INVALID_FLAG;
+ return NJS_REGEX_INVALID_FLAG;
}


njs_regexp_pattern_t *
njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length,
- njs_regexp_flags_t flags)
+ njs_regex_flags_t flags)
{
- int options, ret;
+ int ret;
u_char *p, *end;
size_t size;
njs_str_t text;
@@ -382,15 +274,16 @@ njs_regexp_pattern_create(njs_vm_t *vm,
njs_regexp_pattern_t *pattern;

size = 1; /* A trailing "/". */
- size += ((flags & NJS_REGEXP_GLOBAL) != 0);
- size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
- size += ((flags & NJS_REGEXP_MULTILINE) != 0);
+ size += ((flags & NJS_REGEX_GLOBAL) != 0);
+ size += ((flags & NJS_REGEX_IGNORE_CASE) != 0);
+ size += ((flags & NJS_REGEX_MULTILINE) != 0);

text.start = start;
text.length = length;

- ret = njs_regexp_escape(vm, &text);
+ ret = njs_regex_escape(vm->mem_pool, &text);
if (njs_slow_path(ret != NJS_OK)) {
+ njs_memory_error(vm);
return NULL;
}

@@ -412,39 +305,27 @@ njs_regexp_pattern_create(njs_vm_t *vm,
end = p;
*p++ = '\0';

- pattern->global = ((flags & NJS_REGEXP_GLOBAL) != 0);
+ pattern->global = ((flags & NJS_REGEX_GLOBAL) != 0);
if (pattern->global) {
*p++ = 'g';
}

-#ifdef PCRE_JAVASCRIPT_COMPAT
- /* JavaScript compatibility has been introduced in PCRE-7.7. */
- options = PCRE_JAVASCRIPT_COMPAT;
-#else
- options = 0;
-#endif
-
- pattern->ignore_case = ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
+ pattern->ignore_case = ((flags & NJS_REGEX_IGNORE_CASE) != 0);
if (pattern->ignore_case) {
*p++ = 'i';
- options |= PCRE_CASELESS;
}

- pattern->multiline = ((flags & NJS_REGEXP_MULTILINE) != 0);
+ pattern->multiline = ((flags & NJS_REGEX_MULTILINE) != 0);
if (pattern->multiline) {
*p++ = 'm';
- options |= PCRE_MULTILINE;
}

- pattern->sticky = ((flags & NJS_REGEXP_STICKY) != 0);
- if (pattern->sticky) {
- options |= PCRE_ANCHORED;
- }
+ pattern->sticky = ((flags & NJS_REGEX_STICKY) != 0);

*p++ = '\0';

ret = njs_regexp_pattern_compile(vm, &pattern->regex[0],
- &pattern->source[1], options);
+ &pattern->source[1], text.length, flags);

if (njs_fast_path(ret >= 0)) {
pattern->ncaptures = ret;
@@ -454,7 +335,8 @@ njs_regexp_pattern_create(njs_vm_t *vm,
}

ret = njs_regexp_pattern_compile(vm, &pattern->regex[1],
- &pattern->source[1], options | PCRE_UTF8);
+ &pattern->source[1], text.length,
+ flags | NJS_REGEX_UTF8);
if (njs_fast_path(ret >= 0)) {

if (njs_slow_path(njs_regex_is_valid(&pattern->regex[0])
@@ -519,7 +401,7 @@ fail:

static int
njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, u_char *source,
- int options)
+ size_t len, njs_regex_flags_t flags)
{
njs_int_t ret;
njs_trace_handler_t handler;
@@ -527,8 +409,8 @@ njs_regexp_pattern_compile(njs_vm_t *vm,
handler = vm->trace.handler;
vm->trace.handler = njs_regexp_compile_trace_handler;

- /* Zero length means a zero-terminated string. */
- ret = njs_regex_compile(regex, source, 0, options, vm->regex_context);
+ ret = njs_regex_compile(regex, source, len, flags, vm->regex_compile_ctx,
+ &vm->trace);

vm->trace.handler = handler;

@@ -568,8 +450,7 @@ njs_regexp_match(njs_vm_t *vm, njs_regex
handler = vm->trace.handler;
vm->trace.handler = njs_regexp_match_trace_handler;

- ret = njs_regex_match(regex, subject, off, len, match_data,
- vm->regex_context);
+ ret = njs_regex_match(regex, subject, off, len, match_data, &vm->trace);

vm->trace.handler = handler;

@@ -742,19 +623,19 @@ njs_regexp_prototype_flag(njs_vm_t *vm,
pattern = njs_regexp_pattern(this);

switch (flag) {
- case NJS_REGEXP_GLOBAL:
+ case NJS_REGEX_GLOBAL:
yn = pattern->global;
break;

- case NJS_REGEXP_IGNORE_CASE:
+ case NJS_REGEX_IGNORE_CASE:
yn = pattern->ignore_case;
break;

- case NJS_REGEXP_MULTILINE:
+ case NJS_REGEX_MULTILINE:
yn = pattern->multiline;
break;

- case NJS_REGEXP_STICKY:
+ case NJS_REGEX_STICKY:
default:
yn = pattern->sticky;
break;
@@ -996,7 +877,8 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
goto not_found;
}

- match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context);
+ match_data = njs_regex_match_data(&pattern->regex[type],
+ vm->regex_generic_ctx);
if (njs_slow_path(match_data == NULL)) {
njs_memory_error(vm);
return NJS_ERROR;
@@ -1023,9 +905,8 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj
return NJS_OK;
}

- if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) {
- njs_regex_match_data_free(match_data, vm->regex_context);
-
+ if (njs_slow_path(ret == NJS_ERROR)) {
+ njs_regex_match_data_free(match_data, vm->regex_generic_ctx);
return NJS_ERROR;
}

@@ -1050,8 +931,8 @@ static njs_array_t *
njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_utf8_t utf8,
njs_string_prop_t *string, njs_regex_match_data_t *match_data)
{
- int *captures;
u_char *start;
+ size_t c;
int32_t size, length;
uint32_t index;
njs_int_t ret;
@@ -1076,14 +957,13 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
goto fail;
}

- captures = njs_regex_captures(match_data);
-
for (i = 0; i < pattern->ncaptures; i++) {
n = 2 * i;
+ c = njs_regex_capture(match_data, n);

- if (captures[n] != -1) {
- start = &string->start[captures[n]];
- size = captures[n + 1] - captures[n];
+ if (c != NJS_REGEX_UNSET) {
+ start = &string->start[c];
+ size = njs_regex_capture(match_data, n + 1) - c;

if (utf8 == NJS_STRING_UTF8) {
length = njs_max(njs_utf8_length(start, size), 0);
@@ -1109,21 +989,25 @@ njs_regexp_exec_result(njs_vm_t *vm, njs
goto fail;
}

+ c = njs_regex_capture(match_data, 0);
+
if (utf8 == NJS_STRING_UTF8) {
- index = njs_string_index(string, captures[0]);
+ index = njs_string_index(string, c);

} else {
- index = captures[0];
+ index = c;
}

njs_set_number(&prop->value, index);

if (pattern->global || pattern->sticky) {
+ c = njs_regex_capture(match_data, 1);
+
if (utf8 == NJS_STRING_UTF8) {
- index = njs_string_index(string, captures[1]);
+ index = njs_string_index(string, c);

} else {
- index = captures[1];
+ index = c;
}

njs_set_number(&value, index);
@@ -1226,7 +1110,7 @@ fail:

done:

- njs_regex_match_data_free(match_data, vm->regex_context);
+ njs_regex_match_data_free(match_data, vm->regex_generic_ctx);

return (ret == NJS_OK) ? array : NULL;
}
@@ -1919,7 +1803,7 @@ static const njs_object_prop_t njs_rege
.name = njs_string("global"),
.value = njs_value(NJS_INVALID, 1, NAN),
.getter = njs_native_function2(njs_regexp_prototype_flag, 0,
- NJS_REGEXP_GLOBAL),
+ NJS_REGEX_GLOBAL),
.setter = njs_value(NJS_UNDEFINED, 0, NAN),
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] RegExp: incapsulating PCRE API.

Dmitry Volyntsev 138 November 11, 2021 09:32AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 127
Record Number of Users: 6 on February 13, 2018
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready