Welcome! Log In Create A New Profile

Advanced

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin
February 19, 2023 12:18PM
Hello!

On Fri, Feb 17, 2023 at 06:38:54PM +0400, Sergey Kandaurov wrote:

> > On 13 Jan 2023, at 01:35, Maxim Dounin <mdounin@mdounin.ru> wrote:
> >
> > # HG changeset patch
> > # User Maxim Dounin <mdounin@mdounin.ru>
> > # Date 1673548890 -10800
> > # Thu Jan 12 21:41:30 2023 +0300
> > # Node ID 60d845f9505fe1b97c1e04b680523b790e29fdb1
> > # Parent 07b0bee87f32be91a33210bc06973e07c4c1dac9
> > Win32: non-ASCII names support in autoindex (ticket #458).
> >
> > Notably, ngx_open_dir() now supports opening directories with non-ASCII
> > characters, and directory entries returned by ngx_read_dir() are properly
> > converted to UTF-8.
> >
> > diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.c
> > --- a/src/os/win32/ngx_files.c Wed Dec 21 14:53:27 2022 +0300
> > +++ b/src/os/win32/ngx_files.c Thu Jan 12 21:41:30 2023 +0300
> > @@ -13,7 +13,11 @@
> >
> > static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u,
> > size_t len);
> > -static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len);
> > +static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len,
> > + size_t reserved);
> > +static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len,
> > + size_t *allocated);
> > +uint32_t ngx_utf16_decode(u_short **u, size_t n);
> >
> >
> > /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */
> > @@ -28,7 +32,7 @@ ngx_open_file(u_char *name, u_long mode,
> > u_short utf16[NGX_UTF16_BUFLEN];
> >
> > len = NGX_UTF16_BUFLEN;
> > - u = ngx_utf8_to_utf16(utf16, name, &len);
> > + u = ngx_utf8_to_utf16(utf16, name, &len, 0);
> >
> > if (u == NULL) {
> > return INVALID_HANDLE_VALUE;
> > @@ -269,7 +273,7 @@ ngx_file_info(u_char *file, ngx_file_inf
> >
> > len = NGX_UTF16_BUFLEN;
> >
> > - u = ngx_utf8_to_utf16(utf16, file, &len);
> > + u = ngx_utf8_to_utf16(utf16, file, &len, 0);
> >
> > if (u == NULL) {
> > return NGX_FILE_ERROR;
> > @@ -427,49 +431,51 @@ ngx_realpath(u_char *path, u_char *resol
> > ngx_int_t
> > ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir)
> > {
> > - u_char *pattern, *p;
> > + size_t len;
> > + u_short *u, *p;
> > ngx_err_t err;
> > + u_short utf16[NGX_UTF16_BUFLEN];
> >
> > - pattern = malloc(name->len + 3);
> > - if (pattern == NULL) {
> > + len = NGX_UTF16_BUFLEN - 2;
> > + u = ngx_utf8_to_utf16(utf16, name->data, &len, 2);
> > +
> > + if (u == NULL) {
> > return NGX_ERROR;
> > }
> >
> > - p = ngx_cpymem(pattern, name->data, name->len);
> > + if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) {
> > + goto failed;
> > + }
> > +
> > + p = &u[len - 1];
> >
> > *p++ = '/';
> > *p++ = '*';
> > *p = '\0';
> >
> > - dir->dir = FindFirstFile((const char *) pattern, &dir->finddata);
> > + dir->dir = FindFirstFileW(u, &dir->finddata);
> >
> > if (dir->dir == INVALID_HANDLE_VALUE) {
> > - err = ngx_errno;
> > - ngx_free(pattern);
> > - ngx_set_errno(err);
> > - return NGX_ERROR;
> > + goto failed;
> > }
> >
> > - ngx_free(pattern);
> > + if (u != utf16) {
> > + ngx_free(u);
> > + }
> >
> > dir->valid_info = 1;
> > dir->ready = 1;
> > + dir->name = NULL;
> > + dir->allocated = 0;
> >
> > return NGX_OK;
> > -}
> >
> > +failed:
> >
> > -ngx_int_t
> > -ngx_read_dir(ngx_dir_t *dir)
> > -{
> > - if (dir->ready) {
> > - dir->ready = 0;
> > - return NGX_OK;
> > - }
> > -
> > - if (FindNextFile(dir->dir, &dir->finddata) != 0) {
> > - dir->type = 1;
> > - return NGX_OK;
> > + if (u != utf16) {
> > + err = ngx_errno;
> > + ngx_free(u);
> > + ngx_set_errno(err);
> > }
> >
> > return NGX_ERROR;
> > @@ -477,8 +483,56 @@ ngx_read_dir(ngx_dir_t *dir)
> >
> >
> > ngx_int_t
> > +ngx_read_dir(ngx_dir_t *dir)
> > +{
> > + u_char *name;
> > + size_t len, allocated;
> > +
> > + if (dir->ready) {
> > + dir->ready = 0;
> > + goto convert;
> > + }
> > +
> > + if (FindNextFileW(dir->dir, &dir->finddata) != 0) {
> > + dir->type = 1;
> > + goto convert;
> > + }
> > +
> > + return NGX_ERROR;
> > +
> > +convert:
> > +
> > + name = dir->name;
> > + len = dir->allocated;
> > +
> > + name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated);
> > + if (name == NULL) {
>
> Note that you just return on NULL, but see below.
>
> > + return NGX_ERROR;
> > + }
> > +
> > + if (name != dir->name) {
> > +
> > + if (dir->name) {
> > + ngx_free(dir->name);
> > + }
> > +
> > + dir->name = name;
> > + dir->allocated = allocated;
> > + }
> > +
> > + dir->namelen = len - 1;
> > +
> > + return NGX_OK;
> > +}
> > +
> > +
> > +ngx_int_t
> > ngx_close_dir(ngx_dir_t *dir)
> > {
> > + if (dir->name) {
> > + ngx_free(dir->name);
> > + }
> > +
> > if (FindClose(dir->dir) == 0) {
> > return NGX_ERROR;
> > }
> > @@ -816,7 +870,7 @@ failed:
> >
> >
> > static u_short *
> > -ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len)
> > +ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved)
> > {
> > u_char *p;
> > u_short *u, *last;
> > @@ -865,7 +919,7 @@ ngx_utf8_to_utf16(u_short *utf16, u_char
> >
> > /* the given buffer is not enough, allocate a new one */
> >
> > - u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short));
> > + u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * sizeof(u_short));
> > if (u == NULL) {
> > return NULL;
> > }
> > @@ -910,3 +964,170 @@ ngx_utf8_to_utf16(u_short *utf16, u_char
> >
> > /* unreachable */
> > }
> > +
> > +
> > +static u_char *
> > +ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t *allocated)
> > +{
> > + u_char *p, *last;
> > + u_short *u, *j;
> > + uint32_t n;
> > +
> > + u = utf16;
> > + p = utf8;
> > + last = utf8 + *len;
> > +
> > + while (p < last) {
> > +
> > + if (*u < 0x80) {
> > + *p++ = (u_char) *u;
> > +
> > + if (*u == 0) {
> > + *len = p - utf8;
> > + return utf8;
> > + }
> > +
> > + u++;
> > +
> > + continue;
> > + }
> > +
> > + if (p >= last - 4) {
> > + *len = p - utf8;
> > + break;
> > + }
> > +
> > + n = ngx_utf16_decode(&u, 2);
> > +
> > + if (n > 0x10ffff) {
> > + ngx_free(utf8);
> > + ngx_set_errno(NGX_EILSEQ);
>
> This can result in double-free on a subsequent ngx_close_dir() call.

Thanks for catching, that's slipped in from the second loop.
Removed ngx_free() here.

>
> > + return NULL;
> > + }
> > +
> > + if (n >= 0x10000) {
> > + *p++ = (u_char) (0xf0 + (n >> 18));
> > + *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
> > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
> > + *p++ = (u_char) (0x80 + (n & 0x3f));
> > + continue;
> > +
>
> stray ws-only line (one more below)

Fixed both, thanks.

> > + }
> > +
> > + if (n >= 0x0800) {
> > + *p++ = (u_char) (0xe0 + (n >> 12));
> > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
> > + *p++ = (u_char) (0x80 + (n & 0x3f));
> > + continue;
> > + }
> > +
> > + *p++ = (u_char) (0xc0 + (n >> 6));
> > + *p++ = (u_char) (0x80 + (n & 0x3f));
> > + }
> > +
> > + /* the given buffer is not enough, allocate a new one */
> > +
> > + for (j = u; *j; j++) { /* void */ }
> > +
> > + p = malloc((j - utf16) * 4 + 1);
>
> [Note that pointer arithmetics is in u_short (2 bytes) units.]
> It seems you are allocating memory assuming that utf16 -> utf8
> conversion can result in memory expansion up to 4 bytes per unit.
> At first glance that's true, because UTF-8 symbol may take 4 bytes.
> IIUC, 4-byte UTF-8 can result only from UTF-16 surrogate pairs,
> this gives 2x growth (2-paired UTF-16 to 4-bytes UTF-8).
> Otherwise, 1/2/3-byte encoded UTF-8 can result from 2-byte UTF-16,
> which gives from 1x to 3x growth per unit.
> So, "malloc((j - utf16) * 3 + 1)" should be enough there.

That's correct, though this depends on not-directly-visible
encoding details. I would rather prefer to keep 4 here, since it
can be easily verified to be enough from the loop itself.

I don't think that a minor optimization of memory usage by going
from 4x to 3x of the particular name is important here. Further,
extra allocated bytes are likely to be used as a preallocation for
other names.

> > + if (p == NULL) {
> > + return NULL;
> > + }
>
> Old memory is still there, allocated and referenced by dir->name,
> see above. It seems ok, it should be freed by ngx_close_dir() call.
> If we happen to change callers to not call ngx_close_dir() on error,
> this could turn into memory leaks, though.

The ngx_close_dir() is certainly required (including for other
reasons), so this looks perfectly fine.

> > +
> > + if (allocated) {
> > + *allocated = (j - utf16) * 4 + 1;
> > + }
> > +
> > + ngx_memcpy(p, utf8, *len);
> > +
> > + utf8 = p;
> > + p += *len;
> > +
> > + for ( ;; ) {
> > +
> > + if (*u < 0x80) {
> > + *p++ = (u_char) *u;
> > +
> > + if (*u == 0) {
> > + *len = p - utf8;
> > + return utf8;
> > + }
> > +
> > + u++;
> > +
> > + continue;
> > + }
> > +
> > + n = ngx_utf16_decode(&u, 2);
> > +
> > + if (n > 0x10ffff) {
> > + ngx_free(utf8);
> > + ngx_set_errno(NGX_EILSEQ);
> > + return NULL;
> > + }
> > +
> > + if (n >= 0x10000) {
> > + *p++ = (u_char) (0xf0 + (n >> 18));
> > + *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
> > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
> > + *p++ = (u_char) (0x80 + (n & 0x3f));
> > + continue;
> > +
> > + }
> > +
> > + if (n >= 0x0800) {
> > + *p++ = (u_char) (0xe0 + (n >> 12));
> > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
> > + *p++ = (u_char) (0x80 + (n & 0x3f));
> > + continue;
> > + }
> > +
> > + *p++ = (u_char) (0xc0 + (n >> 6));
> > + *p++ = (u_char) (0x80 + (n & 0x3f));
> > + }
> > +
> > + /* unreachable */
> > +}
> > +
> > +
> > +/*
> > + * ngx_utf16_decode() decodes one or two UTF-16 code units
> > + * the return values:
> > + * 0x80 - 0x10ffff valid character
> > + * 0x110000 - 0xfffffffd invalid sequence
> > + * 0xfffffffe incomplete sequence
> > + * 0xffffffff error
> > + */
> > +
> > +uint32_t
> > +ngx_utf16_decode(u_short **u, size_t n)
> > +{
> > + uint32_t k, m;
> > +
> > + k = **u;
> > +
> > + if (k < 0xd800 || k > 0xdfff) {
> > + (*u)++;
> > + return k;
> > + }
> > +
> > + if (k > 0xdbff) {
> > + (*u)++;
> > + return 0xffffffff;
> > + }
> > +
> > + if (n < 2) {
> > + return 0xfffffffe;
> > + }
> > +
> > + (*u)++;
> > +
> > + m = *(*u)++;
> > +
> > + if (m < 0xdc00 || m > 0xdfff) {
> > + return 0xffffffff;
> > +
> > + }
> > +
> > + return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00);
> > +}
> > diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.h
> > --- a/src/os/win32/ngx_files.h Wed Dec 21 14:53:27 2022 +0300
> > +++ b/src/os/win32/ngx_files.h Thu Jan 12 21:41:30 2023 +0300
> > @@ -30,7 +30,11 @@ typedef struct {
> >
> > typedef struct {
> > HANDLE dir;
> > - WIN32_FIND_DATA finddata;
> > + WIN32_FIND_DATAW finddata;
> > +
> > + u_char *name;
> > + size_t namelen;
> > + size_t allocated;
> >
> > unsigned valid_info:1;
> > unsigned type:1;
> > @@ -205,8 +209,8 @@ ngx_int_t ngx_close_dir(ngx_dir_t *dir);
> > #define ngx_dir_access(a) (a)
> >
> >
> > -#define ngx_de_name(dir) ((u_char *) (dir)->finddata.cFileName)
> > -#define ngx_de_namelen(dir) ngx_strlen((dir)->finddata.cFileName)
> > +#define ngx_de_name(dir) (dir)->name
> > +#define ngx_de_namelen(dir) (dir)->namelen
> >
> > ngx_int_t ngx_de_info(u_char *name, ngx_dir_t *dir);
> > #define ngx_de_info_n "dummy()"
>
> Otherwise, looks good.

Fixes:

diff -r d7521ae9e138 src/os/win32/ngx_files.c
--- a/src/os/win32/ngx_files.c Sat Feb 18 12:08:25 2023 +0300
+++ b/src/os/win32/ngx_files.c Sat Feb 18 12:57:02 2023 +0300
@@ -506,6 +506,7 @@ convert:
len = dir->allocated;

name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated);
+
if (name == NULL) {
return NGX_ERROR;
}
@@ -1000,7 +1000,6 @@ ngx_utf16_to_utf8(u_char *utf8, u_short
n = ngx_utf16_decode(&u, 2);

if (n > 0x10ffff) {
- ngx_free(utf8);
ngx_set_errno(NGX_EILSEQ);
return NULL;
}
@@ -1011,7 +1010,6 @@ ngx_utf16_to_utf8(u_char *utf8, u_short
*p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
*p++ = (u_char) (0x80 + (n & 0x3f));
continue;
-
}

if (n >= 0x0800) {
@@ -1072,7 +1070,6 @@ ngx_utf16_to_utf8(u_char *utf8, u_short
*p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
*p++ = (u_char) (0x80 + (n & 0x3f));
continue;
-
}

if (n >= 0x0800) {


--
Maxim Dounin
http://mdounin.ru/
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
https://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[PATCH 00 of 12] win32 non-ASCII names support fixes

Maxim Dounin 822 January 12, 2023 04:40PM

[PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Maxim Dounin 184 January 12, 2023 04:40PM

Re: [PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Sergey Kandaurov 118 February 17, 2023 10:04AM

Re: [PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Maxim Dounin 177 February 19, 2023 12:24PM

Re: [PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Sergey Kandaurov 112 February 22, 2023 11:02AM

[PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin 138 January 12, 2023 04:40PM

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Sergey Kandaurov 127 February 17, 2023 09:40AM

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin 111 February 19, 2023 12:18PM

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Sergey Kandaurov 118 February 22, 2023 10:40AM

[PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Maxim Dounin 145 January 12, 2023 04:40PM

Re: [PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Sergey Kandaurov 141 February 17, 2023 09:54AM

Re: [PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Maxim Dounin 162 February 19, 2023 12:20PM

Re: [PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Sergey Kandaurov 120 February 22, 2023 10:50AM

[PATCH 04 of 12] Win32: non-ASCII directory names support in ngx_create_dir()

Maxim Dounin 150 January 12, 2023 04:40PM

Re: [PATCH 04 of 12] Win32: non-ASCII directory names support in ngx_create_dir()

Sergey Kandaurov 137 February 17, 2023 10:14AM

[PATCH 05 of 12] Win32: non-ASCII directory names support in ngx_delete_dir()

Maxim Dounin 127 January 12, 2023 04:40PM

Re: [PATCH 05 of 12] Win32: non-ASCII directory names support in ngx_delete_dir()

Sergey Kandaurov 126 February 17, 2023 10:14AM

[PATCH 06 of 12] Win32: reworked ngx_win32_rename_file() to check errors

Maxim Dounin 152 January 12, 2023 04:40PM

[PATCH 07 of 12] Win32: reworked ngx_win32_rename_file() to use nginx wrappers

Maxim Dounin 123 January 12, 2023 04:40PM

[PATCH 09 of 12] Win32: non-ASCII names support in ngx_rename_file()

Maxim Dounin 173 January 12, 2023 04:40PM

[PATCH 10 of 12] Win32: non-ASCII names support in ngx_open_tempfile()

Maxim Dounin 132 January 12, 2023 04:40PM

[PATCH 08 of 12] Win32: non-ASCII names support in ngx_delete_file()

Maxim Dounin 126 January 12, 2023 04:40PM

[PATCH 12 of 12] Win32: non-ASCII names in ngx_fs_bsize(), ngx_fs_available()

Maxim Dounin 148 January 12, 2023 04:40PM

[PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Maxim Dounin 150 January 12, 2023 04:40PM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 121 February 17, 2023 10:18AM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Maxim Dounin 125 February 19, 2023 12:24PM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 116 February 22, 2023 11:02AM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Maxim Dounin 119 February 23, 2023 01:48PM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 124 February 24, 2023 05:42AM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 132 March 21, 2023 07:26AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 299
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready