Welcome! Log In Create A New Profile

Advanced

[njs] Fixed parsing surrogate pair presents as UTF-16 escape sequences.

Alexander Borisov
April 23, 2019 10:48AM
details: https://hg.nginx.org/njs/rev/724c31e77d2a
branches:
changeset: 921:724c31e77d2a
user: Alexander Borisov <alexander.borisov@nginx.com>
date: Mon Apr 22 16:23:50 2019 +0300
description:
Fixed parsing surrogate pair presents as UTF-16 escape sequences.

This closes #96 issue on GitHub.

diffstat:

njs/njs_parser_terminal.c | 55 ++++++++++++++++++++++++++++++++++++++++------
njs/test/njs_unit_test.c | 17 ++++++++++++++
2 files changed, 64 insertions(+), 8 deletions(-)

diffs (137 lines):

diff -r b3eb60707479 -r 724c31e77d2a njs/njs_parser_terminal.c
--- a/njs/njs_parser_terminal.c Mon Apr 22 16:23:43 2019 +0300
+++ b/njs/njs_parser_terminal.c Mon Apr 22 16:23:50 2019 +0300
@@ -926,7 +926,7 @@ njs_parser_escape_string_create(njs_vm_t
{
u_char c, *start, *dst;
size_t size, length, hex_length;
- uint64_t cp;
+ uint64_t cp, cp_pair;
njs_ret_t ret;
nxt_str_t *string;
const u_char *src, *end, *hex_end;
@@ -942,6 +942,7 @@ njs_parser_escape_string_create(njs_vm_t
}

dst = start;
+ cp_pair = 0;

string = njs_parser_text(parser);
src = string->start;
@@ -1041,6 +1042,23 @@ njs_parser_escape_string_create(njs_vm_t
hex:
cp = njs_number_hex_parse(&src, hex_end);

+ /* Skip '}' character. */
+
+ if (hex_length == 0) {
+ src++;
+ }
+
+ /* Surrogate pair. */
+
+ if (cp_pair != 0) {
+ cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00);
+ cp_pair = 0;
+
+ } else if (cp >= 0xd800 && cp <= 0xdfff) {
+ cp_pair = cp;
+ continue;
+ }
+
dst = nxt_utf8_encode(dst, (uint32_t) cp);
if (nxt_slow_path(dst == NULL)) {
njs_parser_syntax_error(vm, parser,
@@ -1049,12 +1067,6 @@ njs_parser_escape_string_create(njs_vm_t

return NJS_TOKEN_ILLEGAL;
}
-
- /* Skip '}' character */
-
- if (hex_length == 0) {
- src++;
- }
}

if (length > NJS_STRING_MAP_STRIDE && length != size) {
@@ -1070,12 +1082,13 @@ njs_parser_escape_string_calc_length(njs
size_t *out_size, size_t *out_length)
{
size_t size, length, hex_length;
- uint64_t cp;
+ uint64_t cp, cp_pair;
nxt_str_t *string;
const u_char *ptr, *src, *end, *hex_end;

size = 0;
length = 0;
+ cp_pair = 0;

string = njs_parser_text(parser);
src = string->start;
@@ -1171,6 +1184,25 @@ njs_parser_escape_string_calc_length(njs
}
}

+ /* Surrogate pair. */
+
+ if (cp_pair != 0) {
+ if (nxt_slow_path(cp < 0xdc00 || cp > 0xdfff)) {
+ goto invalid_pair;
+ }
+
+ cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00);
+ cp_pair = 0;
+
+ } else if (cp >= 0xd800 && cp <= 0xdfff) {
+ if (nxt_slow_path(cp > 0xdbff || src[0] != '\\' || src[1] != 'u')) {
+ goto invalid_pair;
+ }
+
+ cp_pair = cp;
+ continue;
+ }
+
size += nxt_utf8_size(cp);
length++;
}
@@ -1186,4 +1218,11 @@ invalid:
njs_parser_text(parser));

return NJS_ERROR;
+
+invalid_pair:
+
+ njs_parser_syntax_error(vm, parser, "Invalid surrogate pair \"%V\"",
+ njs_parser_text(parser));
+
+ return NJS_ERROR;
}
diff -r b3eb60707479 -r 724c31e77d2a njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Mon Apr 22 16:23:43 2019 +0300
+++ b/njs/test/njs_unit_test.c Mon Apr 22 16:23:50 2019 +0300
@@ -4387,6 +4387,23 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("'привет\\n\\u{61}\\u{3B1}\\u{20AC}'.length"),
nxt_string("10") },

+ { nxt_string("'\\ud83d\\udc4d'"),
+ nxt_string("\xf0\x9f\x91\x8d") },
+
+ { nxt_string("'\\ud83d\\udc4d'.length"),
+ nxt_string("1") },
+
+ { nxt_string("'\\ud83d abc \\udc4d'"),
+ nxt_string("SyntaxError: Invalid surrogate pair "
+ "\"\\ud83d abc \\udc4d\" in 1") },
+
+ { nxt_string("'\\ud83d'"),
+ nxt_string("SyntaxError: Invalid surrogate pair \"\\ud83d\" in 1") },
+
+ { nxt_string("'\\ud83d\\uabcd'"),
+ nxt_string("SyntaxError: Invalid surrogate pair "
+ "\"\\ud83d\\uabcd\" in 1") },
+
{ nxt_string("''.hasOwnProperty('length')"),
nxt_string("true") },

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Fixed parsing surrogate pair presents as UTF-16 escape sequences.

Alexander Borisov 320 April 23, 2019 10:48AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 227
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready