Welcome! Log In Create A New Profile

Advanced

[njs] Improved UTF-8 offset map related macros (no functional ch...

Valentin Bartenev
November 10, 2016 10:56AM
details: http://hg.nginx.org/njs/rev/187882f1895a
branches:
changeset: 249:187882f1895a
user: Valentin Bartenev <vbart@nginx.com>
date: Thu Nov 10 18:45:10 2016 +0300
description:
Improved UTF-8 offset map related macros (no functional changes).

diffstat:

njs/njs_parser.c | 4 +-
njs/njs_string.c | 61 ++++++++++++++++++++++++++-----------------------------
njs/njs_string.h | 14 ++++++++++--
njs/njs_vm.c | 2 +-
4 files changed, 43 insertions(+), 38 deletions(-)

diffs (250 lines):

diff -r 60c2930eb951 -r 187882f1895a njs/njs_parser.c
--- a/njs/njs_parser.c Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_parser.c Thu Nov 10 18:45:10 2016 +0300
@@ -1984,7 +1984,7 @@ njs_parser_string_create(njs_vm_t *vm, n
if (nxt_fast_path(p != NULL)) {
memcpy(p, src->start, src->length);

- if (length > NJS_STRING_MAP_OFFSET && (size_t) length != src->length) {
+ if (length > NJS_STRING_MAP_STRIDE && (size_t) length != src->length) {
njs_string_offset_map_init(p, src->length);
}

@@ -2144,7 +2144,7 @@ njs_parser_escape_string_create(njs_vm_t
}

if (start != NULL) {
- if (length > NJS_STRING_MAP_OFFSET && length != size) {
+ if (length > NJS_STRING_MAP_STRIDE && length != size) {
njs_string_offset_map_init(start, size);
}

diff -r 60c2930eb951 -r 187882f1895a njs/njs_string.c
--- a/njs/njs_string.c Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_string.c Thu Nov 10 18:45:10 2016 +0300
@@ -179,7 +179,7 @@ njs_string_new(njs_vm_t *vm, njs_value_t
if (nxt_fast_path(p != NULL)) {
memcpy(p, start, size);

- if (size != length && length >= NJS_STRING_MAP_OFFSET) {
+ if (size != length && length >= NJS_STRING_MAP_STRIDE) {
njs_string_offset_map_init(p, size);
}

@@ -216,9 +216,8 @@ njs_string_alloc(njs_vm_t *vm, njs_value
value->data.external0 = 0;
value->data.string_size = size;

- if (size != length && length > NJS_STRING_MAP_OFFSET) {
- total = nxt_align_size(size, sizeof(uint32_t));
- total += ((length - 1) / NJS_STRING_MAP_OFFSET) * sizeof(uint32_t);
+ if (size != length && length > NJS_STRING_MAP_STRIDE) {
+ total = njs_string_map_offset(size) + njs_string_map_size(length);

} else {
total = size;
@@ -293,14 +292,13 @@ njs_string_validate(njs_vm_t *vm, njs_st
return length;
}

- if (length > NJS_STRING_MAP_OFFSET) {
+ if (length > NJS_STRING_MAP_STRIDE) {
/*
* Reallocate the long string with offset map
* after the string.
*/
- new_size = nxt_align_size(size, sizeof(uint32_t));
- new_size += ((length - 1) / NJS_STRING_MAP_OFFSET)
- * sizeof(uint32_t);
+ new_size = njs_string_map_offset(size)
+ + njs_string_map_size(length);

start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size);
if (nxt_slow_path(start == NULL)) {
@@ -473,15 +471,15 @@ njs_string_offset_map_init(const u_char
const u_char *p, *end;

end = start + size;
- map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t));
+ map = njs_string_map_start(end);
p = start;
n = 0;
- offset = NJS_STRING_MAP_OFFSET;
+ offset = NJS_STRING_MAP_STRIDE;

do {
if (offset == 0) {
map[n++] = p - start;
- offset = NJS_STRING_MAP_OFFSET;
+ offset = NJS_STRING_MAP_STRIDE;
}

/* The UTF-8 string should be valid since its length is known. */
@@ -651,7 +649,7 @@ njs_string_prototype_concat(njs_vm_t *vm
p += string.size;
}

- if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+ if (length >= NJS_STRING_MAP_STRIDE && size != length) {
njs_string_offset_map_init(start, size);
}

@@ -685,7 +683,7 @@ njs_string_prototype_from_utf8(njs_vm_t

if (length >= 0) {

- if (length < NJS_STRING_MAP_OFFSET || (size_t) length == slice.length) {
+ if (length < NJS_STRING_MAP_STRIDE || (size_t) length == slice.length) {
/* ASCII or short UTF-8 string. */
return njs_string_create(vm, &vm->retval, string.start,
slice.length, length);
@@ -769,7 +767,7 @@ njs_string_prototype_from_bytes(njs_vm_t
s = nxt_utf8_encode(s, *p);
}

- if (slice.length >= NJS_STRING_MAP_OFFSET || size != slice.length) {
+ if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) {
njs_string_offset_map_init(start, size);
}
}
@@ -1530,13 +1528,13 @@ njs_string_offset(const u_char *start, c
uint32_t *map;
nxt_uint_t skip;

- if (index >= NJS_STRING_MAP_OFFSET) {
- map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t));
-
- start += map[index / NJS_STRING_MAP_OFFSET - 1];
+ if (index >= NJS_STRING_MAP_STRIDE) {
+ map = njs_string_map_start(end);
+
+ start += map[index / NJS_STRING_MAP_STRIDE - 1];
}

- for (skip = index % NJS_STRING_MAP_OFFSET; skip != 0; skip--) {
+ for (skip = index % NJS_STRING_MAP_STRIDE; skip != 0; skip--) {
start = nxt_utf8_next(start, end);
}

@@ -1562,16 +1560,16 @@ njs_string_index(njs_string_prop_t *stri
last = 0;
index = 0;

- if (string->length >= NJS_STRING_MAP_OFFSET) {
+ if (string->length >= NJS_STRING_MAP_STRIDE) {

end = string->start + string->size;
- map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t));
-
- while (index + NJS_STRING_MAP_OFFSET < string->length
+ map = njs_string_map_start(end);
+
+ while (index + NJS_STRING_MAP_STRIDE < string->length
&& *map <= offset)
{
last = *map++;
- index += NJS_STRING_MAP_OFFSET;
+ index += NJS_STRING_MAP_STRIDE;
}
}

@@ -1631,7 +1629,7 @@ njs_string_prototype_to_lower_case(njs_v
size--;
}

- if (string.length >= NJS_STRING_MAP_OFFSET) {
+ if (string.length >= NJS_STRING_MAP_STRIDE) {
njs_string_offset_map_init(start, string.size);
}
}
@@ -1683,7 +1681,7 @@ njs_string_prototype_to_upper_case(njs_v
size--;
}

- if (string.length >= NJS_STRING_MAP_OFFSET) {
+ if (string.length >= NJS_STRING_MAP_STRIDE) {
njs_string_offset_map_init(start, string.size);
}
}
@@ -1867,7 +1865,7 @@ njs_string_prototype_repeat(njs_vm_t *vm
n--;
}

- if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+ if (length >= NJS_STRING_MAP_STRIDE && size != length) {
njs_string_offset_map_init(start, size);
}

@@ -2884,7 +2882,7 @@ njs_string_replace_join(njs_vm_t *vm, nj
/* GC: release valid values. */
}

- if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+ if (length >= NJS_STRING_MAP_STRIDE && size != length) {
njs_string_offset_map_init(string, size);
}

@@ -3655,10 +3653,9 @@ njs_value_index(njs_vm_t *vm, njs_parser

length = src->data.u.string->length;

- if (size != length && length > NJS_STRING_MAP_OFFSET) {
- size = nxt_align_size(size, sizeof(uint32_t));
- size += ((length - 1) / NJS_STRING_MAP_OFFSET)
- * sizeof(uint32_t);
+ if (size != length && length > NJS_STRING_MAP_STRIDE) {
+ size = njs_string_map_offset(size)
+ + njs_string_map_size(length);
}
}

diff -r 60c2930eb951 -r 187882f1895a njs/njs_string.h
--- a/njs/njs_string.h Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_string.h Thu Nov 10 18:45:10 2016 +0300
@@ -31,7 +31,15 @@
* division and remainder operations but no less than 16 because the maximum
* length of short string inlined in njs_value_t is less than 16 bytes.
*/
-#define NJS_STRING_MAP_OFFSET 32
+#define NJS_STRING_MAP_STRIDE 32
+
+#define njs_string_map_offset(size) nxt_align_size((size), sizeof(uint32_t))
+
+#define njs_string_map_start(p) \
+ ((uint32_t *) nxt_align_ptr((p), sizeof(uint32_t)))
+
+#define njs_string_map_size(length) \
+ (((length - 1) / NJS_STRING_MAP_STRIDE) * sizeof(uint32_t))

/*
* The JavaScript standard states that strings are stored in UTF-16.
@@ -44,7 +52,7 @@
* encoding does not allow to get quickly a character at specified position.
* To speed up this search a map of offsets is stored after the UTF-8 string.
* The map is aligned to uint32_t and contains byte positions of each
- * NJS_STRING_MAP_OFFSET UTF-8 character except zero position. The map
+ * NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The map
* can be allocated and updated on demand. If a string come outside
* JavaScript as byte sequnece just to be concatenated or to be used in
* regular expressions the offset map is not required.
@@ -53,7 +61,7 @@
* 1) if the length is zero hence it is a byte string;
* 2) if the size and length are equal so the string contains only ASCII
* characters map is not required;
- * 3) if the length is less than NJS_STRING_MAP_OFFSET.
+ * 3) if the length is less than NJS_STRING_MAP_STRIDE.
*
* The current implementation does not support Unicode surrogate pairs.
* If offset in map points to surrogate pair then the previous offset
diff -r 60c2930eb951 -r 187882f1895a njs/njs_vm.c
--- a/njs/njs_vm.c Thu Nov 10 16:47:52 2016 +0300
+++ b/njs/njs_vm.c Thu Nov 10 18:45:10 2016 +0300
@@ -1556,7 +1556,7 @@ njs_vmcode_addition(njs_vm_t *vm, njs_va
(void) memcpy(start, string1.start, string1.size);
(void) memcpy(start + string1.size, string2.start, string2.size);

- if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+ if (length >= NJS_STRING_MAP_STRIDE && size != length) {
njs_string_offset_map_init(start, size);
}


_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[njs] Improved UTF-8 offset map related macros (no functional ch...

Valentin Bartenev 321 November 10, 2016 10:56AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 236
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready