Dmitry Volyntsev
July 27, 2020 10:38AM
details: https://hg.nginx.org/njs/rev/d4c69313ac6c
branches:
changeset: 1482:d4c69313ac6c
user: Dmitry Volyntsev <xeioex@nginx.com>
date: Mon Jul 27 14:34:35 2020 +0000
description:
Improved readability of surrogate pairs handling.

diffstat:

src/njs_json.c | 2 +-
src/njs_parser.c | 4 ++--
src/njs_string.c | 4 ++--
src/njs_string.h | 10 ----------
src/njs_unicode.h | 12 ++++++++++++
src/njs_utf16.c | 11 ++++-------
6 files changed, 21 insertions(+), 22 deletions(-)

diffs (123 lines):

diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_json.c
--- a/src/njs_json.c Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_json.c Mon Jul 27 14:34:35 2020 +0000
@@ -738,7 +738,7 @@ njs_json_parse_string(njs_json_parse_ctx
p += 4;

if (njs_fast_path(njs_surrogate_trailing(utf_low))) {
- utf = njs_string_surrogate_pair(utf, utf_low);
+ utf = njs_surrogate_pair(utf, utf_low);

} else if (njs_surrogate_leading(utf_low)) {
utf = NJS_UNICODE_REPLACEMENT;
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_parser.c
--- a/src/njs_parser.c Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_parser.c Mon Jul 27 14:34:35 2020 +0000
@@ -8088,7 +8088,7 @@ njs_parser_escape_string_create(njs_pars

if (cp_pair != 0) {
if (njs_fast_path(njs_surrogate_trailing(cp))) {
- cp = njs_string_surrogate_pair(cp_pair, cp);
+ cp = njs_surrogate_pair(cp_pair, cp);

} else if (njs_slow_path(njs_surrogate_leading(cp))) {
cp = NJS_UNICODE_REPLACEMENT;
@@ -8238,7 +8238,7 @@ njs_parser_escape_string_calc_length(njs

if (cp_pair != 0) {
if (njs_fast_path(njs_surrogate_trailing(cp))) {
- cp = njs_string_surrogate_pair(cp_pair, cp);
+ cp = njs_surrogate_pair(cp_pair, cp);

} else if (njs_slow_path(njs_surrogate_leading(cp))) {
cp = NJS_UNICODE_REPLACEMENT;
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_string.c
--- a/src/njs_string.c Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_string.c Mon Jul 27 14:34:35 2020 +0000
@@ -4272,7 +4272,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_
goto uri_error;
}

- cp = njs_string_surrogate_pair(cp, cp_low);
+ cp = njs_surrogate_pair(cp, cp_low);
size += njs_utf8_size(cp) * 3;
continue;
}
@@ -4312,7 +4312,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_

if (njs_slow_path(njs_surrogate_leading(cp))) {
cp_low = njs_utf8_decode(&ctx, &src, end);
- cp = njs_string_surrogate_pair(cp, cp_low);
+ cp = njs_surrogate_pair(cp, cp_low);
}

njs_utf8_encode(encode, cp);
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_string.h
--- a/src/njs_string.h Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_string.h Mon Jul 27 14:34:35 2020 +0000
@@ -26,16 +26,6 @@
/* The maximum signed int32_t. */
#define NJS_STRING_MAX_LENGTH 0x7fffffff

-#define njs_surrogate_leading(cp) ((cp) >= 0xd800 && (cp) <= 0xdbff)
-
-#define njs_surrogate_trailing(cp) ((cp) >= 0xdc00 && (cp) <= 0xdfff)
-
-#define njs_surrogate_any(cp) ((cp) >= 0xd800 && (cp) <= 0xdfff)
-
-/* Converting surrogate pair to code point. */
-#define njs_string_surrogate_pair(high, low) \
- (0x10000 + ((high - 0xd800) << 10) + (low - 0xdc00))
-
/*
* NJS_STRING_MAP_STRIDE should be power of two to use shift and binary
* AND operations instead of division and remainder operations but no
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_unicode.h
--- a/src/njs_unicode.h Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_unicode.h Mon Jul 27 14:34:35 2020 +0000
@@ -23,5 +23,17 @@ typedef struct {
u_char upper;
} njs_unicode_decode_t;

+#define njs_surrogate_leading(cp) \
+ (((unsigned) (cp) - 0xd800) <= 0xdbff - 0xd800)
+
+#define njs_surrogate_trailing(cp) \
+ (((unsigned) (cp) - 0xdc00) <= 0xdfff - 0xdc00)
+
+#define njs_surrogate_any(cp) \
+ (((unsigned) (cp) - 0xd800) <= 0xdfff - 0xd800)
+
+#define njs_surrogate_pair(high, low) \
+ (0x10000 + (((high) - 0xd800) << 10) + ((low) - 0xdc00))
+

#endif /* _NJS_UNICODE_H_INCLUDED_ */
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_utf16.c
--- a/src/njs_utf16.c Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_utf16.c Mon Jul 27 14:34:35 2020 +0000
@@ -79,9 +79,8 @@ lead_state:
#endif

if (ctx->codepoint != 0x00) {
- if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
- unit = 0x10000 + ((ctx->codepoint - 0xD800) << 10)
- + (unit - 0xDC00);
+ if (njs_surrogate_trailing(unit)) {
+ unit = njs_surrogate_pair(ctx->codepoint, unit);

ctx->codepoint = 0x00;

@@ -96,10 +95,8 @@ lead_state:
return NJS_UNICODE_ERROR;
}

- /* Surrogate pair. */
-
- if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
- if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
+ if (njs_surrogate_any(unit)) {
+ if (njs_surrogate_trailing(unit)) {
return NJS_UNICODE_ERROR;
}

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Improved readability of surrogate pairs handling.

Dmitry Volyntsev 376 July 27, 2020 10:38AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 156
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready