Welcome! Log In Create A New Profile

Advanced

[njs] Escaping lone closing square brackets in RegExp() constructor.

Dmitry Volyntsev
June 04, 2019 05:46AM
details: https://hg.nginx.org/njs/rev/2054b8410a28
branches:
changeset: 997:2054b8410a28
user: Dmitry Volyntsev <xeioex@nginx.com>
date: Thu May 30 20:05:14 2019 +0300
description:
Escaping lone closing square brackets in RegExp() constructor.

This correctly fixes #157. As in 88263426432d this was done only
for regexp literals.

diffstat:

njs/njs_regexp.c | 134 +++++++++++++++++++++++++++-------------------
njs/test/njs_unit_test.c | 15 +++++
2 files changed, 92 insertions(+), 57 deletions(-)

diffs (252 lines):

diff -r 1041e3241457 -r 2054b8410a28 njs/njs_regexp.c
--- a/njs/njs_regexp.c Fri May 31 15:11:39 2019 +0300
+++ b/njs/njs_regexp.c Thu May 30 20:05:14 2019 +0300
@@ -206,56 +206,93 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
}


-nxt_inline njs_ret_t
-njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count)
-{
- size_t length, diff;
- u_char *p, *dst, *start, *end;
+/*
+ * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid. Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * Escaping it here as a workaround.
+ */

- length = text->length + count;
-
- dst = nxt_mp_alloc(vm->mem_pool, length);
- if (nxt_slow_path(dst == NULL)) {
- njs_memory_error(vm);
- return NJS_ERROR;
- }
+nxt_inline njs_ret_t
+njs_regexp_escape(njs_vm_t *vm, nxt_str_t *text)
+{
+ size_t brackets;
+ u_char *p, *dst, *start, *end;
+ nxt_bool_t in;

start = text->start;
end = text->start + text->length;

+ in = 0;
+ brackets = 0;
+
for (p = start; p < end; p++) {

switch (*p) {
case '[':
- while (++p < end && *p != ']') {
- if (*p == '\\') {
- p++;
- }
- }
-
+ in = 1;
break;

case ']':
- diff = p - start;
- dst = nxt_cpymem(dst, start, diff);
- dst = nxt_cpymem(dst, "\\]", 2);
+ if (!in) {
+ brackets++;
+ }

- start = p + 1;
+ in = 0;
break;

case '\\':
p++;
- break;
}
}

- diff = p - start;
- memcpy(dst, start, diff);
+ if (!brackets) {
+ return NXT_OK;
+ }
+
+ text->length = text->length + brackets;
+
+ text->start = nxt_mp_alloc(vm->mem_pool, text->length);
+ if (nxt_slow_path(text->start == NULL)) {
+ njs_memory_error(vm);
+ return NXT_ERROR;
+ }
+
+ in = 0;
+ dst = text->start;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ in = 1;
+ break;

- text->start = dst - (length - diff);
- text->length = length;
+ case ']':
+ if (!in) {
+ *dst++ = '\\';
+ }
+
+ in = 0;
+ break;
+
+ case '\\':
+ *dst++ = *p++;

- return NJS_OK;
+ if (p == end) {
+ goto done;
+ }
+ }
+
+ *dst++ = *p;
+ }
+
+done:
+
+ text->length = dst - text->start;
+
+ return NXT_OK;
}


@@ -263,14 +300,11 @@ njs_token_t
njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
{
u_char *p;
- size_t closing_brackets;
nxt_str_t text;
- njs_ret_t ret;
njs_lexer_t *lexer;
njs_regexp_flags_t flags;
njs_regexp_pattern_t *pattern;

- closing_brackets = 0;
lexer = parser->lexer;

for (p = lexer->start; p < lexer->end; p++) {
@@ -298,10 +332,6 @@ njs_regexp_literal(njs_vm_t *vm, njs_par

break;

- case ']':
- closing_brackets++;
- break;
-
case '\\':
if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
goto failed;
@@ -327,28 +357,9 @@ njs_regexp_literal(njs_vm_t *vm, njs_par

lexer->start = p;

- if (closing_brackets != 0) {
- /*
- * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
- * lone closing square brackets as invalid. Whereas according
- * to ES6: 11.8.5 it is a valid regexp expression.
- *
- * Escaping it here as a workaround.
- */
-
- ret = njs_regexp_escape_bracket(vm, &text, closing_brackets);
- if (nxt_slow_path(ret != NXT_OK)) {
- return NJS_TOKEN_ILLEGAL;
- }
- }
-
pattern = njs_regexp_pattern_create(vm, text.start, text.length,
flags);

- if (closing_brackets != 0) {
- nxt_mp_free(vm->mem_pool, text.start);
- }
-
if (nxt_slow_path(pattern == NULL)) {
return NJS_TOKEN_ILLEGAL;
}
@@ -440,6 +451,7 @@ njs_regexp_pattern_create(njs_vm_t *vm,
int options, ret;
u_char *p, *end;
size_t size;
+ nxt_str_t text;
nxt_uint_t n;
nxt_regex_t *regex;
njs_regexp_group_t *group;
@@ -450,8 +462,16 @@ njs_regexp_pattern_create(njs_vm_t *vm,
size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
size += ((flags & NJS_REGEXP_MULTILINE) != 0);

+ text.start = start;
+ text.length = length;
+
+ ret = njs_regexp_escape(vm, &text);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NULL;
+ }
+
pattern = nxt_mp_zalloc(vm->mem_pool, sizeof(njs_regexp_pattern_t) + 1
- + length + size + 1);
+ + text.length + size + 1);
if (nxt_slow_path(pattern == NULL)) {
njs_memory_error(vm);
return NULL;
@@ -463,8 +483,8 @@ njs_regexp_pattern_create(njs_vm_t *vm,
pattern->source = p;

*p++ = '/';
- p = memcpy(p, start, length);
- p += length;
+ p = memcpy(p, text.start, text.length);
+ p += text.length;
end = p;
*p++ = '\0';

diff -r 1041e3241457 -r 2054b8410a28 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Fri May 31 15:11:39 2019 +0300
+++ b/njs/test/njs_unit_test.c Thu May 30 20:05:14 2019 +0300
@@ -5540,9 +5540,21 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("/]/"),
nxt_string("/\\]/") },

+ { nxt_string("RegExp(']')"),
+ nxt_string("/\\]/") },
+
+ { nxt_string("RegExp('[\\\\\\\\]]')"),
+ nxt_string("/[\\\\]\\]/") },
+
+ { nxt_string("/[\\\\]]/"),
+ nxt_string("/[\\\\]\\]/") },
+
{ nxt_string("/\\]/"),
nxt_string("/\\]/") },

+ { nxt_string("RegExp('\\]')"),
+ nxt_string("/\\]/") },
+
{ nxt_string("/ab]cd/"),
nxt_string("/ab\\]cd/") },

@@ -7441,6 +7453,9 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("new RegExp('[')"),
nxt_string("SyntaxError: pcre_compile(\"[\") failed: missing terminating ] for character class") },

+ { nxt_string("new RegExp('\\\\')"),
+ nxt_string("SyntaxError: pcre_compile(\"\\\") failed: \\ at end of pattern") },
+
{ nxt_string("[0].map(RegExp().toString)"),
nxt_string("TypeError: \"this\" argument is not a regexp") },

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Escaping lone closing square brackets in RegExp() constructor.

Dmitry Volyntsev 312 June 04, 2019 05:46AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 308
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready