Dmitry Volyntsev
April 17, 2019 11:52AM
details: https://hg.nginx.org/njs/rev/39790a9d9b58
branches:
changeset: 899:39790a9d9b58
user: Dmitry Volyntsev <xeioex@nginx.com>
date: Wed Apr 17 18:43:13 2019 +0300
description:
Fixed handling of unicode only regexp expressions.

This fixes #125 issue on Github.

diffstat:

njs/njs_regexp.c | 31 +++++++--
njs/test/njs_unit_test.c | 144 +++++++++++++++++++++++++++++++++++++---------
nxt/nxt_pcre.c | 2 +
3 files changed, 139 insertions(+), 38 deletions(-)

diffs (259 lines):

diff -r a88bf03264b4 -r 39790a9d9b58 njs/njs_regexp.c
--- a/njs/njs_regexp.c Tue Apr 16 18:34:57 2019 +0300
+++ b/njs/njs_regexp.c Wed Apr 17 18:43:13 2019 +0300
@@ -315,30 +315,43 @@ njs_regexp_pattern_create(njs_vm_t *vm,

ret = njs_regexp_pattern_compile(vm, &pattern->regex[0],
&pattern->source[1], options);
- if (nxt_slow_path(ret < 0)) {
- return NULL;
+
+ if (nxt_fast_path(ret >= 0)) {
+ pattern->ncaptures = ret;
+
+ } else if (ret < 0 && ret != NXT_DECLINED) {
+ goto fail;
}

- pattern->ncaptures = ret;
-
ret = njs_regexp_pattern_compile(vm, &pattern->regex[1],
&pattern->source[1], options | PCRE_UTF8);
if (nxt_fast_path(ret >= 0)) {

- if (nxt_slow_path((u_int) ret != pattern->ncaptures)) {
+ if (nxt_slow_path(nxt_regex_is_valid(&pattern->regex[0])
+ && (u_int) ret != pattern->ncaptures))
+ {
njs_internal_error(vm, "regexp pattern compile failed");
- nxt_mp_free(vm->mem_pool, pattern);
- return NULL;
+ goto fail;
}

} else if (ret != NXT_DECLINED) {
- nxt_mp_free(vm->mem_pool, pattern);
- return NULL;
+ goto fail;
+ }
+
+ if (!nxt_regex_is_valid(&pattern->regex[0])
+ && !nxt_regex_is_valid(&pattern->regex[1]))
+ {
+ goto fail;
}

*end = '/';

return pattern;
+
+fail:
+
+ nxt_mp_free(vm->mem_pool, pattern);
+ return NULL;
}


diff -r a88bf03264b4 -r 39790a9d9b58 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Tue Apr 16 18:34:57 2019 +0300
+++ b/njs/test/njs_unit_test.c Wed Apr 17 18:43:13 2019 +0300
@@ -7,6 +7,7 @@
#include <njs_core.h>
#include <nxt_lvlhsh.h>
#include <nxt_djb_hash.h>
+#include <nxt_pcre.h>
#include <string.h>
#include <stdlib.h>
#include <sys/resource.h>
@@ -4451,6 +4452,12 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("'α'.toUTF8()[0]"),
nxt_string("\xCE") },

+ { nxt_string("var r = /^\\x80$/; r.source + r.source.length"),
+ nxt_string("^\\x80$6") },
+
+ { nxt_string("var r = /^\\\\x80$/; r.source + r.source.length"),
+ nxt_string("^\\\\x80$7") },
+
{ nxt_string("/^\\x80$/.test('\\x80'.toBytes())"),
nxt_string("true") },

@@ -11957,6 +11964,25 @@ static njs_unit_test_t njs_tz_test[] =
};


+static njs_unit_test_t njs_regexp_test[] =
+{
+ { nxt_string("/[\\\\u02E0-\\\\u02E4]/"),
+ nxt_string("/[\\\\u02E0-\\\\u02E4]/") },
+
+ { nxt_string("/[\\u02E0-\\u02E4]/"),
+ nxt_string("/[\\u02E0-\\u02E4]/") },
+
+ { nxt_string("RegExp('[\\\\u02E0-\\\\u02E4]')"),
+ nxt_string("/[\\u02E0-\\u02E4]/") },
+
+ { nxt_string("/[\\u0430-\\u044f]+/.test('тест')"),
+ nxt_string("true") },
+
+ { nxt_string("RegExp('[\\\\u0430-\\\\u044f]+').test('тест')"),
+ nxt_string("true") },
+};
+
+
typedef struct {
nxt_lvlhsh_t hash;
const njs_extern_t *proto;
@@ -12715,6 +12741,85 @@ done:


static nxt_int_t
+njs_timezone_optional_test(nxt_bool_t disassemble, nxt_bool_t verbose)
+{
+ size_t size;
+ u_char buf[16];
+ time_t clock;
+ struct tm tm;
+ nxt_int_t ret;
+
+ /*
+ * Chatham Islands NZ-CHAT time zone.
+ * Standard time: UTC+12:45, Daylight Saving time: UTC+13:45.
+ */
+ (void) putenv((char *) "TZ=Pacific/Chatham");
+ tzset();
+
+ clock = 0;
+ localtime_r(&clock, &tm);
+
+ size = strftime((char *) buf, sizeof(buf), "%z", &tm);
+
+ if (memcmp(buf, "+1245", size) == 0) {
+ ret = njs_unit_test(njs_tz_test, nxt_nitems(njs_tz_test), disassemble,
+ verbose);
+ if (ret != NXT_OK) {
+ return ret;
+ }
+
+ nxt_printf("njs timezone tests passed\n");
+
+ } else {
+ nxt_printf("njs timezone tests skipped, timezone is unavailable\n");
+ }
+
+ return NXT_OK;
+}
+
+static nxt_int_t
+njs_regexp_optional_test(nxt_bool_t disassemble, nxt_bool_t verbose)
+{
+ int erroff;
+ pcre *re1, *re2;
+ njs_ret_t ret;
+ const char *errstr;
+
+ /*
+ * pcre-8.21 crashes when it compiles unicode escape codes inside
+ * square brackets when PCRE_UTF8 option is provided.
+ * Catching it in runtime by compiling it without PCRE_UTF8. Normally it
+ * should return NULL and "character value in \u.... sequence is too large"
+ * error string.
+ */
+ re1 = pcre_compile("/[\\u0410]/", PCRE_JAVASCRIPT_COMPAT, &errstr, &erroff,
+ NULL);
+
+ /*
+ * pcre-7.8 fails to compile unicode escape codes inside square brackets
+ * even when PCRE_UTF8 option is provided.
+ */
+ re2 = pcre_compile("/[\\u0410]/", PCRE_JAVASCRIPT_COMPAT | PCRE_UTF8,
+ &errstr, &erroff, NULL);
+
+ if (re1 == NULL && re2 != NULL) {
+ ret = njs_unit_test(njs_regexp_test, nxt_nitems(njs_regexp_test),
+ disassemble, verbose);
+ if (ret != NXT_OK) {
+ return ret;
+ }
+
+ nxt_printf("njs unicode regexp tests passed\n");
+
+ } else {
+ nxt_printf("njs unicode regexp tests skipped, libpcre fails\n");
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
njs_vm_json_test(nxt_bool_t disassemble, nxt_bool_t verbose)
{
njs_vm_t *vm;
@@ -13025,10 +13130,6 @@ done:
int nxt_cdecl
main(int argc, char **argv)
{
- size_t size;
- u_char buf[16];
- time_t clock;
- struct tm tm;
nxt_int_t ret;
nxt_bool_t disassemble, verbose;

@@ -13059,33 +13160,18 @@ main(int argc, char **argv)
return ret;
}

+ ret = njs_timezone_optional_test(disassemble, verbose);
+ if (ret != NXT_OK) {
+ return ret;
+ }
+
+ ret = njs_regexp_optional_test(disassemble, verbose);
+ if (ret != NXT_OK) {
+ return ret;
+ }
+
nxt_printf("njs unit tests passed\n");

- /*
- * Chatham Islands NZ-CHAT time zone.
- * Standard time: UTC+12:45, Daylight Saving time: UTC+13:45.
- */
- (void) putenv((char *) "TZ=Pacific/Chatham");
- tzset();
-
- clock = 0;
- localtime_r(&clock, &tm);
-
- size = strftime((char *) buf, sizeof(buf), "%z", &tm);
-
- if (memcmp(buf, "+1245", size) == 0) {
- ret = njs_unit_test(njs_tz_test, nxt_nitems(njs_tz_test), disassemble,
- verbose);
- if (ret != NXT_OK) {
- return ret;
- }
-
- nxt_printf("njs timezone tests passed\n");
-
- } else {
- nxt_printf("njs timezone tests skipped, timezone is unavailable\n");
- }
-
ret = njs_vm_json_test(disassemble, verbose);
if (ret != NXT_OK) {
return ret;
diff -r a88bf03264b4 -r 39790a9d9b58 nxt/nxt_pcre.c
--- a/nxt/nxt_pcre.c Tue Apr 16 18:34:57 2019 +0300
+++ b/nxt/nxt_pcre.c Wed Apr 17 18:43:13 2019 +0300
@@ -92,6 +92,8 @@ nxt_regex_compile(nxt_regex_t *regex, u_
"pcre_compile(\"%s\") failed: %s", pattern, errstr);
}

+ ret = NXT_DECLINED;
+
goto done;
}

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Fixed handling of unicode only regexp expressions.

Dmitry Volyntsev 388 April 17, 2019 11:52AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 298
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready