Welcome! Log In Create A New Profile

Advanced

[njs] Added escaping to lone closing square brackets in a regexp.

Alexander Borisov
May 17, 2019 02:58PM
details: https://hg.nginx.org/njs/rev/88263426432d
branches:
changeset: 970:88263426432d
user: Alexander Borisov <alexander.borisov@nginx.com>
date: Fri May 17 17:01:10 2019 +0300
description:
Added escaping to lone closing square brackets in a regexp.

PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
lone closing square brackets as invalid. Whereas according
to ES6: 11.8.5 it is a valid regexp expression.

As a workaround, the solution is to escape those lone brackets.

This closes #157 issue on GitHub.

diffstat:

njs/njs_regexp.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
njs/test/njs_unit_test.c | 30 ++++++++++++++++++
2 files changed, 110 insertions(+), 0 deletions(-)

diffs (155 lines):

diff -r e22397fd709a -r 88263426432d njs/njs_regexp.c
--- a/njs/njs_regexp.c Fri May 17 21:16:31 2019 +0300
+++ b/njs/njs_regexp.c Fri May 17 17:01:10 2019 +0300
@@ -206,15 +206,71 @@ njs_regexp_create(njs_vm_t *vm, njs_valu
}


+nxt_inline njs_ret_t
+njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count)
+{
+ size_t length, diff;
+ u_char *p, *dst, *start, *end;
+
+ length = text->length + count;
+
+ dst = nxt_mp_alloc(vm->mem_pool, length);
+ if (nxt_slow_path(dst == NULL)) {
+ njs_memory_error(vm);
+ return NJS_ERROR;
+ }
+
+ start = text->start;
+ end = text->start + text->length;
+
+ for (p = start; p < end; p++) {
+
+ switch (*p) {
+ case '[':
+ while (++p < end && *p != ']') {
+ if (*p == '\\') {
+ p++;
+ }
+ }
+
+ break;
+
+ case ']':
+ diff = p - start;
+ dst = nxt_cpymem(dst, start, diff);
+ dst = nxt_cpymem(dst, "\\]", 2);
+
+ start = p + 1;
+ break;
+
+ case '\\':
+ p++;
+ break;
+ }
+ }
+
+ diff = p - start;
+ memcpy(dst, start, diff);
+
+ text->start = dst - (length - diff);
+ text->length = length;
+
+ return NJS_OK;
+}
+
+
njs_token_t
njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
{
u_char *p;
+ size_t closing_brackets;
nxt_str_t text;
+ njs_ret_t ret;
njs_lexer_t *lexer;
njs_regexp_flags_t flags;
njs_regexp_pattern_t *pattern;

+ closing_brackets = 0;
lexer = parser->lexer;

for (p = lexer->start; p < lexer->end; p++) {
@@ -242,6 +298,10 @@ njs_regexp_literal(njs_vm_t *vm, njs_par

break;

+ case ']':
+ closing_brackets++;
+ break;
+
case '\\':
if (++p < lexer->end && (*p == '\n' || *p == '\r')) {
goto failed;
@@ -267,8 +327,28 @@ njs_regexp_literal(njs_vm_t *vm, njs_par

lexer->start = p;

+ if (closing_brackets != 0) {
+ /*
+ * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
+ * lone closing square brackets as invalid. Whereas according
+ * to ES6: 11.8.5 it is a valid regexp expression.
+ *
+ * Escaping it here as a workaround.
+ */
+
+ ret = njs_regexp_escape_bracket(vm, &text, closing_brackets);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NJS_TOKEN_ILLEGAL;
+ }
+ }
+
pattern = njs_regexp_pattern_create(vm, text.start, text.length,
flags);
+
+ if (closing_brackets != 0) {
+ nxt_mp_free(vm->mem_pool, text.start);
+ }
+
if (nxt_slow_path(pattern == NULL)) {
return NJS_TOKEN_ILLEGAL;
}
diff -r e22397fd709a -r 88263426432d njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Fri May 17 21:16:31 2019 +0300
+++ b/njs/test/njs_unit_test.c Fri May 17 17:01:10 2019 +0300
@@ -5496,6 +5496,36 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("'12345'.replace(3, () => ({toString: () => 'aaaa'}))"),
nxt_string("12aaaa45") },

+ { nxt_string("/]/"),
+ nxt_string("/\\]/") },
+
+ { nxt_string("/\\]/"),
+ nxt_string("/\\]/") },
+
+ { nxt_string("/ab]cd/"),
+ nxt_string("/ab\\]cd/") },
+
+ { nxt_string("/ab]/"),
+ nxt_string("/ab\\]/") },
+
+ { nxt_string("/]cd/"),
+ nxt_string("/\\]cd/") },
+
+ { nxt_string("']'.match(/]/)"),
+ nxt_string("]") },
+
+ { nxt_string("'ab]cd'.match(/]/)"),
+ nxt_string("]") },
+
+ { nxt_string("'ab]'.match(/]/)"),
+ nxt_string("]") },
+
+ { nxt_string("']cd'.match(/]/)"),
+ nxt_string("]") },
+
+ { nxt_string("'ab]cd'.match(/\\]/)"),
+ nxt_string("]") },
+
{ nxt_string("'abc'.match(/a*/g)"),
nxt_string("a,,,") },

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Added escaping to lone closing square brackets in a regexp.

Alexander Borisov 379 May 17, 2019 02:58PM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 176
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready