Welcome! Log In Create A New Profile

Advanced

[njs] Building regexp backend as an external.

Dmitry Volyntsev
December 24, 2021 12:00PM
details: https://hg.nginx.org/njs/rev/8ef86212d24b
branches:
changeset: 1784:8ef86212d24b
user: Dmitry Volyntsev <xeioex@nginx.com>
date: Fri Dec 24 15:48:11 2021 +0000
description:
Building regexp backend as an external.

This allows not to build PCRE specific code as a part of libnjs.a thus
supporting nginx builds with flags like --with-pcre=PCRE_DIR. When
--no-pcre configure option is provided external code have to implement
methods declared in njs_regex.h.

This also closes #18 issue on Github.

diffstat:

auto/options | 4 +
auto/pcre | 113 ++++----
auto/sources | 14 +-
auto/summary | 5 +-
external/njs_regex.c | 628 +++++++++++++++++++++++++++++++++++++++++++++++++++
nginx/config | 1 +
nginx/config.make | 4 +-
nginx/ngx_js_regex.c | 16 +
src/njs_pcre.c | 408 ---------------------------------
src/njs_pcre2.c | 240 -------------------
10 files changed, 718 insertions(+), 715 deletions(-)

diffs (truncated from 1536 to 1000 lines):

diff -r c72703d60d43 -r 8ef86212d24b auto/options
--- a/auto/options Fri Dec 24 15:48:09 2021 +0000
+++ b/auto/options Fri Dec 24 15:48:11 2021 +0000
@@ -12,6 +12,8 @@ NJS_ADDRESS_SANITIZER=NO
NJS_TEST262=YES

NJS_OPENSSL=YES
+
+NJS_PCRE=YES
NJS_TRY_PCRE2=YES

NJS_CONFIGURE_OPTIONS=
@@ -35,6 +37,8 @@ do
--test262=*) NJS_TEST262="$value" ;;

--no-openssl) NJS_OPENSSL=NO ;;
+
+ --no-pcre) NJS_PCRE=NO ;;
--no-pcre2) NJS_TRY_PCRE2=NO ;;

--help)
diff -r c72703d60d43 -r 8ef86212d24b auto/pcre
--- a/auto/pcre Fri Dec 24 15:48:09 2021 +0000
+++ b/auto/pcre Fri Dec 24 15:48:11 2021 +0000
@@ -2,72 +2,81 @@
# Copyright (C) Igor Sysoev
# Copyright (C) NGINX, Inc.

-njs_found=no
-NJS_HAVE_PCRE2=NO
+NJS_PCRE_CFLAGS=
+NJS_PCRE_LIB=
+
+NJS_HAVE_PCRE=NO
+
+if [ $NJS_PCRE = YES ]; then

-if [ $NJS_TRY_PCRE2 = YES ]; then
- if /bin/sh -c "(pcre2-config --version)" >> $NJS_AUTOCONF_ERR 2>&1; then
+ njs_found=no

- NJS_PCRE_CFLAGS=`pcre2-config --cflags`
- NJS_PCRE_LIB=`pcre2-config --libs8`
+ if [ $NJS_TRY_PCRE2 = YES ]; then
+ if /bin/sh -c "(pcre2-config --version)" >> $NJS_AUTOCONF_ERR 2>&1; then
+
+ NJS_PCRE_CFLAGS=`pcre2-config --cflags`
+ NJS_PCRE_LIB=`pcre2-config --libs8`

- njs_feature="PCRE2 library"
- njs_feature_name=NJS_HAVE_PCRE2
- njs_feature_run=no
- njs_feature_incs="-DPCRE2_CODE_UNIT_WIDTH=8 $NJS_PCRE_CFLAGS"
- njs_feature_libs=$NJS_PCRE_LIB
- njs_feature_test="#include <pcre2.h>
+ njs_feature="PCRE2 library"
+ njs_feature_name=NJS_HAVE_PCRE2
+ njs_feature_run=no
+ njs_feature_incs="-DPCRE2_CODE_UNIT_WIDTH=8 $NJS_PCRE_CFLAGS"
+ njs_feature_libs=$NJS_PCRE_LIB
+ njs_feature_test="#include <pcre2.h>

- int main(void) {
- pcre2_code *re;
+ int main(void) {
+ pcre2_code *re;

- re = pcre2_compile((PCRE2_SPTR)\"\",
- PCRE2_ZERO_TERMINATED, 0,
- NULL, NULL, NULL);
- return (re == NULL);
- }"
+ re = pcre2_compile((PCRE2_SPTR)\"\",
+ PCRE2_ZERO_TERMINATED, 0,
+ NULL, NULL, NULL);
+ return (re == NULL);
+ }"

- . auto/feature
+ . auto/feature

- if [ $njs_found = yes ]; then
- NJS_HAVE_PCRE2=YES
- echo " + PCRE2 version: `pcre2-config --version`"
+ if [ $njs_found = yes ]; then
+ NJS_HAVE_PCRE=YES
+ echo " + PCRE2 version: `pcre2-config --version`"
+ fi
fi
fi
-fi
+
+ if [ $njs_found = no ]; then
+ if /bin/sh -c "(pcre-config --version)" >> $NJS_AUTOCONF_ERR 2>&1; then
+
+ NJS_PCRE_CFLAGS=`pcre-config --cflags`
+ NJS_PCRE_LIB=`pcre-config --libs`

-if [ $njs_found = no ]; then
- if /bin/sh -c "(pcre-config --version)" >> $NJS_AUTOCONF_ERR 2>&1; then
-
- NJS_PCRE_CFLAGS=`pcre-config --cflags`
- NJS_PCRE_LIB=`pcre-config --libs`
+ njs_feature="PCRE library"
+ njs_feature_name=NJS_HAVE_PCRE
+ njs_feature_run=no
+ njs_feature_incs=$NJS_PCRE_CFLAGS
+ njs_feature_libs=$NJS_PCRE_LIB
+ njs_feature_test="#include <pcre.h>

- njs_feature="PCRE library"
- njs_feature_name=NJS_HAVE_PCRE
- njs_feature_run=no
- njs_feature_incs=$NJS_PCRE_CFLAGS
- njs_feature_libs=$NJS_PCRE_LIB
- njs_feature_test="#include <pcre.h>
-
- int main(void) {
- pcre *re;
+ int main(void) {
+ pcre *re;

- re = pcre_compile(NULL, 0, NULL, 0, NULL);
- if (re == NULL)
- return 1;
- return 0;
- }"
- . auto/feature
+ re = pcre_compile(NULL, 0, NULL, 0, NULL);
+ if (re == NULL)
+ return 1;
+ return 0;
+ }"
+ . auto/feature

- if [ $njs_found = yes ]; then
- echo " + PCRE version: `pcre-config --version`"
+ if [ $njs_found = yes ]; then
+ NJS_HAVE_PCRE=YES
+ echo " + PCRE version: `pcre-config --version`"
+ fi
fi
fi
-fi

-if [ $njs_found = no ]; then
- echo
- echo $0: error: no PCRE library found.
- echo
- exit 1;
+ if [ $njs_found = no ]; then
+ echo
+ echo $0: error: no PCRE library found.
+ echo
+ exit 1;
+ fi
+
fi
diff -r c72703d60d43 -r 8ef86212d24b auto/sources
--- a/auto/sources Fri Dec 24 15:48:09 2021 +0000
+++ b/auto/sources Fri Dec 24 15:48:11 2021 +0000
@@ -59,14 +59,6 @@ NJS_LIB_SRCS=" \
src/njs_async.c \
"

-NJS_LIB_PCRE_SRCS=" \
- src/njs_pcre.c \
-"
-
-NJS_LIB_PCRE2_SRCS=" \
- src/njs_pcre2.c \
-"
-
NJS_LIB_TEST_SRCS=" \
src/test/lvlhsh_unit_test.c \
src/test/random_unit_test.c \
@@ -79,10 +71,8 @@ NJS_TEST_SRCS=" \
src/test/njs_benchmark.c \
"

-if [ "$NJS_HAVE_PCRE2" = "YES" ]; then
- NJS_LIB_SRCS="$NJS_LIB_SRCS $NJS_LIB_PCRE2_SRCS"
-else
- NJS_LIB_SRCS="$NJS_LIB_SRCS $NJS_LIB_PCRE_SRCS"
+if [ "$NJS_PCRE" = "YES" ]; then
+ NJS_LIB_SRCS="$NJS_LIB_SRCS external/njs_regex.c"
fi

NJS_TS_SRCS=$(find ts/ -name "*.d.ts" -o -name "*.json")
diff -r c72703d60d43 -r 8ef86212d24b auto/summary
--- a/auto/summary Fri Dec 24 15:48:09 2021 +0000
+++ b/auto/summary Fri Dec 24 15:48:11 2021 +0000
@@ -9,7 +9,10 @@ echo
echo " + using CC: \"$CC\""
echo " + using CFLAGS: \"$NJS_CFLAGS $NJS_CC_OPT $CFLAGS\""
echo
-echo " + using PCRE library: $NJS_PCRE_LIB"
+
+if [ $NJS_HAVE_PCRE = YES ]; then
+ echo " + using PCRE library: $NJS_PCRE_LIB"
+fi

if [ $NJS_HAVE_READLINE = YES ]; then
echo " + using readline library: $NJS_READLINE_LIB"
diff -r c72703d60d43 -r 8ef86212d24b external/njs_regex.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/external/njs_regex.c Fri Dec 24 15:48:11 2021 +0000
@@ -0,0 +1,628 @@
+
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) Dmitry Volyntsev
+ * Copyright (C) NGINX, Inc.
+ */
+
+
+#include <njs_main.h>
+
+#ifdef NJS_HAVE_PCRE2
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+
+static const u_char* njs_regex_pcre2_error(int errcode, u_char buffer[128]);
+
+#else
+
+#include <pcre.h>
+
+
+static void *njs_pcre_malloc(size_t size);
+static void njs_pcre_free(void *p);
+
+
+static njs_regex_generic_ctx_t *regex_context;
+
+#endif
+
+
+njs_regex_generic_ctx_t *
+njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
+ njs_pcre_free_t private_free, void *memory_data)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ return pcre2_general_context_create(private_malloc, private_free,
+ memory_data);
+#else
+
+ njs_regex_generic_ctx_t *ctx;
+
+ ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data);
+
+ if (njs_fast_path(ctx != NULL)) {
+ ctx->private_malloc = private_malloc;
+ ctx->private_free = private_free;
+ ctx->memory_data = memory_data;
+ }
+
+ return ctx;
+
+#endif
+}
+
+
+njs_regex_compile_ctx_t *
+njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ return pcre2_compile_context_create(ctx);
+
+#else
+
+ return ctx;
+
+#endif
+}
+
+
+
+njs_int_t
+njs_regex_escape(njs_mp_t *mp, njs_str_t *text)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ return NJS_OK;
+
+#else
+
+ /*
+ * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid. Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * 2) escaping zero byte characters as "\u0000".
+ *
+ * Escaping it here as a workaround.
+ */
+
+ size_t brackets, zeros;
+ u_char *p, *dst, *start, *end;
+ njs_bool_t in;
+
+ start = text->start;
+ end = text->start + text->length;
+
+ in = 0;
+ zeros = 0;
+ brackets = 0;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ in = 1;
+ break;
+
+ case ']':
+ if (!in) {
+ brackets++;
+ }
+
+ in = 0;
+ break;
+
+ case '\\':
+ p++;
+
+ if (p == end || *p != '\0') {
+ break;
+ }
+
+ /* Fall through. */
+
+ case '\0':
+ zeros++;
+ break;
+ }
+ }
+
+ if (!brackets && !zeros) {
+ return NJS_OK;
+ }
+
+ text->length = text->length + brackets + zeros * njs_length("\\u0000");
+
+ text->start = njs_mp_alloc(mp, text->length);
+ if (njs_slow_path(text->start == NULL)) {
+ return NJS_ERROR;
+ }
+
+ in = 0;
+ dst = text->start;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ in = 1;
+ break;
+
+ case ']':
+ if (!in) {
+ *dst++ = '\\';
+ }
+
+ in = 0;
+ break;
+
+ case '\\':
+ *dst++ = *p++;
+
+ if (p == end) {
+ goto done;
+ }
+
+ if (*p != '\0') {
+ break;
+ }
+
+ /* Fall through. */
+
+ case '\0':
+ dst = njs_cpymem(dst, "\\u0000", 6);
+ continue;
+ }
+
+ *dst++ = *p;
+ }
+
+done:
+
+ text->length = dst - text->start;
+
+ return NJS_OK;
+
+#endif
+}
+
+
+njs_int_t
+njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len,
+ njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ int ret;
+ u_char *error;
+ size_t erroff;
+ njs_uint_t options;
+ u_char errstr[128];
+
+ options = PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF;
+
+ if ((flags & NJS_REGEX_IGNORE_CASE)) {
+ options |= PCRE2_CASELESS;
+ }
+
+ if ((flags & NJS_REGEX_MULTILINE)) {
+ options |= PCRE2_MULTILINE;
+ }
+
+ if ((flags & NJS_REGEX_STICKY)) {
+ options |= PCRE2_ANCHORED;
+ }
+
+ if ((flags & NJS_REGEX_UTF8)) {
+ options |= PCRE2_UTF;
+ }
+
+ regex->code = pcre2_compile(source, len, options, &ret, &erroff, cctx);
+
+ if (njs_slow_path(regex->code == NULL)) {
+ error = &source[erroff];
+
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre_compile2(\"%s\") failed: %s at \"%s\"",
+ source, njs_regex_pcre2_error(ret, errstr), error);
+
+ return NJS_DECLINED;
+ }
+
+ ret = pcre2_pattern_info(regex->code, PCRE2_INFO_CAPTURECOUNT,
+ &regex->ncaptures);
+
+ if (njs_slow_path(ret < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre2_pattern_info(\"%s\", PCRE2_INFO_CAPTURECOUNT) failed: %s",
+ source, njs_regex_pcre2_error(ret, errstr));
+
+ return NJS_ERROR;
+ }
+
+ ret = pcre2_pattern_info(regex->code, PCRE2_INFO_BACKREFMAX,
+ &regex->backrefmax);
+
+ if (njs_slow_path(ret < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre2_pattern_info(\"%s\", PCRE2_INFO_BACKREFMAX) failed: %s",
+ source, njs_regex_pcre2_error(ret, errstr));
+
+ return NJS_ERROR;
+ }
+
+ /* Reserve additional elements for the first "$0" capture. */
+ regex->ncaptures++;
+
+ if (regex->ncaptures > 1) {
+ ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMECOUNT,
+ &regex->nentries);
+
+ if (njs_slow_path(ret < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMECOUNT) failed: %s",
+ source, njs_regex_pcre2_error(ret, errstr));
+
+ return NJS_ERROR;
+ }
+
+ if (regex->nentries != 0) {
+ ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMEENTRYSIZE,
+ &regex->entry_size);
+
+ if (njs_slow_path(ret < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMEENTRYSIZE)"
+ " failed: %s", source,
+ njs_regex_pcre2_error(ret, errstr));
+
+ return NJS_ERROR;
+ }
+
+ ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMETABLE,
+ &regex->entries);
+
+ if (njs_slow_path(ret < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMETABLE) "
+ "failed: %s", source,
+ njs_regex_pcre2_error(ret, errstr));
+
+ return NJS_ERROR;
+ }
+ }
+ }
+
+ return NJS_OK;
+
+#else
+
+ int ret, err, erroff;
+ char *pattern, *error;
+ void *(*saved_malloc)(size_t size);
+ void (*saved_free)(void *p);
+ njs_uint_t options;
+ const char *errstr;
+ njs_regex_generic_ctx_t *ctx;
+
+ ctx = cctx;
+
+ ret = NJS_ERROR;
+
+ saved_malloc = pcre_malloc;
+ pcre_malloc = njs_pcre_malloc;
+ saved_free = pcre_free;
+ pcre_free = njs_pcre_free;
+ regex_context = ctx;
+
+#ifdef PCRE_JAVASCRIPT_COMPAT
+ /* JavaScript compatibility has been introduced in PCRE-7.7. */
+ options = PCRE_JAVASCRIPT_COMPAT;
+#else
+ options = 0;
+#endif
+
+ if ((flags & NJS_REGEX_IGNORE_CASE)) {
+ options |= PCRE_CASELESS;
+ }
+
+ if ((flags & NJS_REGEX_MULTILINE)) {
+ options |= PCRE_MULTILINE;
+ }
+
+ if ((flags & NJS_REGEX_STICKY)) {
+ options |= PCRE_ANCHORED;
+ }
+
+ if ((flags & NJS_REGEX_UTF8)) {
+ options |= PCRE_UTF8;
+ }
+
+ pattern = (char *) source;
+
+ regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL);
+
+ if (njs_slow_path(regex->code == NULL)) {
+ error = pattern + erroff;
+
+ if (*error != '\0') {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre_compile(\"%s\") failed: %s at \"%s\"",
+ pattern, errstr, error);
+
+ } else {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre_compile(\"%s\") failed: %s", pattern, errstr);
+ }
+
+ ret = NJS_DECLINED;
+
+ goto done;
+ }
+
+ regex->extra = pcre_study(regex->code, 0, &errstr);
+
+ if (njs_slow_path(errstr != NULL)) {
+ njs_alert(trace, NJS_LEVEL_WARN,
+ "pcre_study(\"%s\") failed: %s", pattern, errstr);
+ }
+
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_CAPTURECOUNT,
+ &regex->ncaptures);
+
+ if (njs_slow_path(err < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d",
+ pattern, err);
+
+ goto done;
+ }
+
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_BACKREFMAX,
+ &regex->backrefmax);
+
+ if (njs_slow_path(err < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d",
+ pattern, err);
+
+ goto done;
+ }
+
+ /* Reserve additional elements for the first "$0" capture. */
+ regex->ncaptures++;
+
+ if (regex->ncaptures > 1) {
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT,
+ &regex->nentries);
+
+ if (njs_slow_path(err < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR,
+ "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
+ pattern, err);
+
+ goto done;
+ }
+
+ if (regex->nentries != 0) {
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE,
+ &regex->entry_size);
+
+ if (njs_slow_path(err < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+ "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);
+
+ goto done;
+ }
+
+ err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE,
+ &regex->entries);
+
+ if (njs_slow_path(err < 0)) {
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
+ "PCRE_INFO_NAMETABLE) failed: %d", pattern, err);
+
+ goto done;
+ }
+ }
+ }
+
+ ret = NJS_OK;
+
+done:
+
+ pcre_malloc = saved_malloc;
+ pcre_free = saved_free;
+ regex_context = NULL;
+
+ return ret;
+
+#endif
+}
+
+
+njs_bool_t
+njs_regex_is_valid(njs_regex_t *regex)
+{
+ return (regex->code != NULL);
+}
+
+
+njs_int_t
+njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n)
+{
+ char *entry;
+
+ if (name == NULL) {
+ return regex->nentries;
+ }
+
+ if (n >= regex->nentries) {
+ return NJS_ERROR;
+ }
+
+ entry = regex->entries + regex->entry_size * n;
+
+ name->start = (u_char *) entry + 2;
+ name->length = njs_strlen(name->start);
+
+ return (entry[0] << 8) + entry[1];
+}
+
+
+njs_regex_match_data_t *
+njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ if (regex != NULL) {
+ return pcre2_match_data_create_from_pattern(regex->code, ctx);
+ }
+
+ return pcre2_match_data_create(0, ctx);
+
+#else
+
+ size_t size;
+ njs_uint_t ncaptures;
+ njs_regex_match_data_t *match_data;
+
+ if (regex != NULL) {
+ ncaptures = regex->ncaptures - 1;
+
+ } else {
+ ncaptures = 0;
+ }
+
+ /* Each capture is stored in 3 "int" vector elements. */
+ ncaptures *= 3;
+ size = sizeof(njs_regex_match_data_t) + ncaptures * sizeof(int);
+
+ match_data = ctx->private_malloc(size, ctx->memory_data);
+
+ if (njs_fast_path(match_data != NULL)) {
+ match_data->ncaptures = ncaptures + 3;
+ }
+
+ return match_data;
+
+#endif
+}
+
+
+void
+njs_regex_match_data_free(njs_regex_match_data_t *match_data,
+ njs_regex_generic_ctx_t *ctx)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ pcre2_match_data_free(match_data);
+
+#else
+
+ ctx->private_free(match_data, ctx->memory_data);
+
+#endif
+}
+
+
+njs_int_t
+njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
+ size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ int ret;
+ u_char errstr[128];
+
+ ret = pcre2_match(regex->code, subject, len, off, 0, match_data, NULL);
+
+ if (ret < 0) {
+ if (ret == PCRE2_ERROR_NOMATCH) {
+ return NJS_DECLINED;
+ }
+
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre2_match() failed: %s",
+ njs_regex_pcre2_error(ret, errstr));
+ return NJS_ERROR;
+ }
+
+ return ret;
+
+#else
+
+ int ret;
+
+ ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
+ off, 0, match_data->captures, match_data->ncaptures);
+
+ if (ret <= PCRE_ERROR_NOMATCH) {
+ if (ret == PCRE_ERROR_NOMATCH) {
+ return NJS_DECLINED;
+ }
+
+ njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
+ return NJS_ERROR;
+ }
+
+ return ret;
+
+#endif
+}
+
+
+size_t
+njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n)
+{
+#ifdef NJS_HAVE_PCRE2
+
+ size_t c;
+
+ c = pcre2_get_ovector_pointer(match_data)[n];
+
+ if (c == PCRE2_UNSET) {
+ return NJS_REGEX_UNSET;
+ }
+
+ return c;
+
+#else
+
+ return match_data->captures[n];
+
+#endif
+}
+
+#ifdef NJS_HAVE_PCRE2
+
+static const u_char *
+njs_regex_pcre2_error(int errcode, u_char buffer[128])
+{
+ pcre2_get_error_message(errcode, buffer, 128);
+
+ return buffer;
+}
+
+#else
+
+static void *
+njs_pcre_malloc(size_t size)
+{
+ return regex_context->private_malloc(size, regex_context->memory_data);
+}
+
+
+static void
+njs_pcre_free(void *p)
+{
+ regex_context->private_free(p, regex_context->memory_data);
+}
+
+#endif
+
+
diff -r c72703d60d43 -r 8ef86212d24b nginx/config
--- a/nginx/config Fri Dec 24 15:48:09 2021 +0000
+++ b/nginx/config Fri Dec 24 15:48:11 2021 +0000
@@ -4,6 +4,7 @@ NJS_DEPS="$ngx_addon_dir/ngx_js.h \
$ngx_addon_dir/ngx_js_fetch.h"
NJS_SRCS="$ngx_addon_dir/ngx_js.c \
$ngx_addon_dir/ngx_js_fetch.c \
+ $ngx_addon_dir/ngx_js_regex.c \
$ngx_addon_dir/../external/njs_webcrypto_module.c"

if [ $HTTP != NO ]; then
diff -r c72703d60d43 -r 8ef86212d24b nginx/config.make
--- a/nginx/config.make Fri Dec 24 15:48:09 2021 +0000
+++ b/nginx/config.make Fri Dec 24 15:48:11 2021 +0000
@@ -3,7 +3,7 @@ cat << END
$ngx_addon_dir/../build/libnjs.a: $NGX_MAKEFILE
cd $ngx_addon_dir/.. \\
&& if [ -f build/Makefile ]; then \$(MAKE) clean; fi \\
- && CFLAGS="\$(CFLAGS)" CC="\$(CC)" ./configure --no-openssl --no-pcre2 \\
- && \$(MAKE)
+ && CFLAGS="\$(CFLAGS)" CC="\$(CC)" ./configure --no-openssl --no-pcre \\
+ && \$(MAKE) libnjs

END
diff -r c72703d60d43 -r 8ef86212d24b nginx/ngx_js_regex.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nginx/ngx_js_regex.c Fri Dec 24 15:48:11 2021 +0000
@@ -0,0 +1,16 @@
+
+/*
+ * Copyright (C) Dmitry Volyntsev
+ * Copyright (C) NGINX, Inc.
+ */
+
+
+#include <ngx_config.h>
+
+#if (NGX_PCRE2)
+
+#define NJS_HAVE_PCRE2 1
+
+#endif
+
+#include "../external/njs_regex.c"
diff -r c72703d60d43 -r 8ef86212d24b src/njs_pcre.c
--- a/src/njs_pcre.c Fri Dec 24 15:48:09 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,408 +0,0 @@
-
-/*
- * Copyright (C) Igor Sysoev
- * Copyright (C) NGINX, Inc.
- */
-
-
-#include <njs_main.h>
-
-#include <pcre.h>
-
-
-static void *njs_pcre_malloc(size_t size);
-static void njs_pcre_free(void *p);
-
-
-static njs_regex_generic_ctx_t *regex_context;
-
-
-njs_regex_generic_ctx_t *
-njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
- njs_pcre_free_t private_free, void *memory_data)
-{
- njs_regex_generic_ctx_t *ctx;
-
- ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data);
-
- if (njs_fast_path(ctx != NULL)) {
- ctx->private_malloc = private_malloc;
- ctx->private_free = private_free;
- ctx->memory_data = memory_data;
- }
-
- return ctx;
-}
-
-
-njs_regex_compile_ctx_t *
-njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
-{
- return ctx;
-}
-
-
-/*
- * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
- * lone closing square brackets as invalid. Whereas according
- * to ES6: 11.8.5 it is a valid regexp expression.
- *
- * 2) escaping zero byte characters as "\u0000".
- *
- * Escaping it here as a workaround.
- */
-
-njs_int_t
-njs_regex_escape(njs_mp_t *mp, njs_str_t *text)
-{
- size_t brackets, zeros;
- u_char *p, *dst, *start, *end;
- njs_bool_t in;
-
- start = text->start;
- end = text->start + text->length;
-
- in = 0;
- zeros = 0;
- brackets = 0;
-
- for (p = start; p < end; p++) {
-
- switch (*p) {
- case '[':
- in = 1;
- break;
-
- case ']':
- if (!in) {
- brackets++;
- }
-
- in = 0;
- break;
-
- case '\\':
- p++;
-
- if (p == end || *p != '\0') {
- break;
- }
-
- /* Fall through. */
-
- case '\0':
- zeros++;
- break;
- }
- }
-
- if (!brackets && !zeros) {
- return NJS_OK;
- }
-
- text->length = text->length + brackets + zeros * njs_length("\\u0000");
-
- text->start = njs_mp_alloc(mp, text->length);
- if (njs_slow_path(text->start == NULL)) {
- return NJS_ERROR;
- }
-
- in = 0;
- dst = text->start;
-
- for (p = start; p < end; p++) {
-
- switch (*p) {
- case '[':
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Building regexp backend as an external.

Dmitry Volyntsev 281 December 24, 2021 12:00PM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 302
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready