Welcome! Log In Create A New Profile

Advanced

[PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin
January 12, 2023 04:40PM
# HG changeset patch
# User Maxim Dounin <mdounin@mdounin.ru>
# Date 1673548890 -10800
# Thu Jan 12 21:41:30 2023 +0300
# Node ID 60d845f9505fe1b97c1e04b680523b790e29fdb1
# Parent 07b0bee87f32be91a33210bc06973e07c4c1dac9
Win32: non-ASCII names support in autoindex (ticket #458).

Notably, ngx_open_dir() now supports opening directories with non-ASCII
characters, and directory entries returned by ngx_read_dir() are properly
converted to UTF-8.

diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.c
--- a/src/os/win32/ngx_files.c Wed Dec 21 14:53:27 2022 +0300
+++ b/src/os/win32/ngx_files.c Thu Jan 12 21:41:30 2023 +0300
@@ -13,7 +13,11 @@

static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u,
size_t len);
-static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len);
+static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len,
+ size_t reserved);
+static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len,
+ size_t *allocated);
+uint32_t ngx_utf16_decode(u_short **u, size_t n);


/* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */
@@ -28,7 +32,7 @@ ngx_open_file(u_char *name, u_long mode,
u_short utf16[NGX_UTF16_BUFLEN];

len = NGX_UTF16_BUFLEN;
- u = ngx_utf8_to_utf16(utf16, name, &len);
+ u = ngx_utf8_to_utf16(utf16, name, &len, 0);

if (u == NULL) {
return INVALID_HANDLE_VALUE;
@@ -269,7 +273,7 @@ ngx_file_info(u_char *file, ngx_file_inf

len = NGX_UTF16_BUFLEN;

- u = ngx_utf8_to_utf16(utf16, file, &len);
+ u = ngx_utf8_to_utf16(utf16, file, &len, 0);

if (u == NULL) {
return NGX_FILE_ERROR;
@@ -427,49 +431,51 @@ ngx_realpath(u_char *path, u_char *resol
ngx_int_t
ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir)
{
- u_char *pattern, *p;
+ size_t len;
+ u_short *u, *p;
ngx_err_t err;
+ u_short utf16[NGX_UTF16_BUFLEN];

- pattern = malloc(name->len + 3);
- if (pattern == NULL) {
+ len = NGX_UTF16_BUFLEN - 2;
+ u = ngx_utf8_to_utf16(utf16, name->data, &len, 2);
+
+ if (u == NULL) {
return NGX_ERROR;
}

- p = ngx_cpymem(pattern, name->data, name->len);
+ if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) {
+ goto failed;
+ }
+
+ p = &u[len - 1];

*p++ = '/';
*p++ = '*';
*p = '\0';

- dir->dir = FindFirstFile((const char *) pattern, &dir->finddata);
+ dir->dir = FindFirstFileW(u, &dir->finddata);

if (dir->dir == INVALID_HANDLE_VALUE) {
- err = ngx_errno;
- ngx_free(pattern);
- ngx_set_errno(err);
- return NGX_ERROR;
+ goto failed;
}

- ngx_free(pattern);
+ if (u != utf16) {
+ ngx_free(u);
+ }

dir->valid_info = 1;
dir->ready = 1;
+ dir->name = NULL;
+ dir->allocated = 0;

return NGX_OK;
-}

+failed:

-ngx_int_t
-ngx_read_dir(ngx_dir_t *dir)
-{
- if (dir->ready) {
- dir->ready = 0;
- return NGX_OK;
- }
-
- if (FindNextFile(dir->dir, &dir->finddata) != 0) {
- dir->type = 1;
- return NGX_OK;
+ if (u != utf16) {
+ err = ngx_errno;
+ ngx_free(u);
+ ngx_set_errno(err);
}

return NGX_ERROR;
@@ -477,8 +483,56 @@ ngx_read_dir(ngx_dir_t *dir)


ngx_int_t
+ngx_read_dir(ngx_dir_t *dir)
+{
+ u_char *name;
+ size_t len, allocated;
+
+ if (dir->ready) {
+ dir->ready = 0;
+ goto convert;
+ }
+
+ if (FindNextFileW(dir->dir, &dir->finddata) != 0) {
+ dir->type = 1;
+ goto convert;
+ }
+
+ return NGX_ERROR;
+
+convert:
+
+ name = dir->name;
+ len = dir->allocated;
+
+ name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated);
+ if (name == NULL) {
+ return NGX_ERROR;
+ }
+
+ if (name != dir->name) {
+
+ if (dir->name) {
+ ngx_free(dir->name);
+ }
+
+ dir->name = name;
+ dir->allocated = allocated;
+ }
+
+ dir->namelen = len - 1;
+
+ return NGX_OK;
+}
+
+
+ngx_int_t
ngx_close_dir(ngx_dir_t *dir)
{
+ if (dir->name) {
+ ngx_free(dir->name);
+ }
+
if (FindClose(dir->dir) == 0) {
return NGX_ERROR;
}
@@ -816,7 +870,7 @@ failed:


static u_short *
-ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len)
+ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved)
{
u_char *p;
u_short *u, *last;
@@ -865,7 +919,7 @@ ngx_utf8_to_utf16(u_short *utf16, u_char

/* the given buffer is not enough, allocate a new one */

- u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short));
+ u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * sizeof(u_short));
if (u == NULL) {
return NULL;
}
@@ -910,3 +964,170 @@ ngx_utf8_to_utf16(u_short *utf16, u_char

/* unreachable */
}
+
+
+static u_char *
+ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t *allocated)
+{
+ u_char *p, *last;
+ u_short *u, *j;
+ uint32_t n;
+
+ u = utf16;
+ p = utf8;
+ last = utf8 + *len;
+
+ while (p < last) {
+
+ if (*u < 0x80) {
+ *p++ = (u_char) *u;
+
+ if (*u == 0) {
+ *len = p - utf8;
+ return utf8;
+ }
+
+ u++;
+
+ continue;
+ }
+
+ if (p >= last - 4) {
+ *len = p - utf8;
+ break;
+ }
+
+ n = ngx_utf16_decode(&u, 2);
+
+ if (n > 0x10ffff) {
+ ngx_free(utf8);
+ ngx_set_errno(NGX_EILSEQ);
+ return NULL;
+ }
+
+ if (n >= 0x10000) {
+ *p++ = (u_char) (0xf0 + (n >> 18));
+ *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+
+ }
+
+ if (n >= 0x0800) {
+ *p++ = (u_char) (0xe0 + (n >> 12));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+ }
+
+ *p++ = (u_char) (0xc0 + (n >> 6));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ }
+
+ /* the given buffer is not enough, allocate a new one */
+
+ for (j = u; *j; j++) { /* void */ }
+
+ p = malloc((j - utf16) * 4 + 1);
+ if (p == NULL) {
+ return NULL;
+ }
+
+ if (allocated) {
+ *allocated = (j - utf16) * 4 + 1;
+ }
+
+ ngx_memcpy(p, utf8, *len);
+
+ utf8 = p;
+ p += *len;
+
+ for ( ;; ) {
+
+ if (*u < 0x80) {
+ *p++ = (u_char) *u;
+
+ if (*u == 0) {
+ *len = p - utf8;
+ return utf8;
+ }
+
+ u++;
+
+ continue;
+ }
+
+ n = ngx_utf16_decode(&u, 2);
+
+ if (n > 0x10ffff) {
+ ngx_free(utf8);
+ ngx_set_errno(NGX_EILSEQ);
+ return NULL;
+ }
+
+ if (n >= 0x10000) {
+ *p++ = (u_char) (0xf0 + (n >> 18));
+ *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+
+ }
+
+ if (n >= 0x0800) {
+ *p++ = (u_char) (0xe0 + (n >> 12));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+ }
+
+ *p++ = (u_char) (0xc0 + (n >> 6));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ }
+
+ /* unreachable */
+}
+
+
+/*
+ * ngx_utf16_decode() decodes one or two UTF-16 code units
+ * the return values:
+ * 0x80 - 0x10ffff valid character
+ * 0x110000 - 0xfffffffd invalid sequence
+ * 0xfffffffe incomplete sequence
+ * 0xffffffff error
+ */
+
+uint32_t
+ngx_utf16_decode(u_short **u, size_t n)
+{
+ uint32_t k, m;
+
+ k = **u;
+
+ if (k < 0xd800 || k > 0xdfff) {
+ (*u)++;
+ return k;
+ }
+
+ if (k > 0xdbff) {
+ (*u)++;
+ return 0xffffffff;
+ }
+
+ if (n < 2) {
+ return 0xfffffffe;
+ }
+
+ (*u)++;
+
+ m = *(*u)++;
+
+ if (m < 0xdc00 || m > 0xdfff) {
+ return 0xffffffff;
+
+ }
+
+ return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00);
+}
diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.h
--- a/src/os/win32/ngx_files.h Wed Dec 21 14:53:27 2022 +0300
+++ b/src/os/win32/ngx_files.h Thu Jan 12 21:41:30 2023 +0300
@@ -30,7 +30,11 @@ typedef struct {

typedef struct {
HANDLE dir;
- WIN32_FIND_DATA finddata;
+ WIN32_FIND_DATAW finddata;
+
+ u_char *name;
+ size_t namelen;
+ size_t allocated;

unsigned valid_info:1;
unsigned type:1;
@@ -205,8 +209,8 @@ ngx_int_t ngx_close_dir(ngx_dir_t *dir);
#define ngx_dir_access(a) (a)


-#define ngx_de_name(dir) ((u_char *) (dir)->finddata.cFileName)
-#define ngx_de_namelen(dir) ngx_strlen((dir)->finddata.cFileName)
+#define ngx_de_name(dir) (dir)->name
+#define ngx_de_namelen(dir) (dir)->namelen

ngx_int_t ngx_de_info(u_char *name, ngx_dir_t *dir);
#define ngx_de_info_n "dummy()"
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
https://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[PATCH 00 of 12] win32 non-ASCII names support fixes

Maxim Dounin 825 January 12, 2023 04:40PM

[PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Maxim Dounin 184 January 12, 2023 04:40PM

Re: [PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Sergey Kandaurov 118 February 17, 2023 10:04AM

Re: [PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Maxim Dounin 177 February 19, 2023 12:24PM

Re: [PATCH 03 of 12] Win32: non-ASCII directory names support in ngx_getcwd()

Sergey Kandaurov 112 February 22, 2023 11:02AM

[PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin 138 January 12, 2023 04:40PM

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Sergey Kandaurov 128 February 17, 2023 09:40AM

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin 112 February 19, 2023 12:18PM

Re: [PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Sergey Kandaurov 118 February 22, 2023 10:40AM

[PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Maxim Dounin 146 January 12, 2023 04:40PM

Re: [PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Sergey Kandaurov 142 February 17, 2023 09:54AM

Re: [PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Maxim Dounin 162 February 19, 2023 12:20PM

Re: [PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Sergey Kandaurov 120 February 22, 2023 10:50AM

[PATCH 04 of 12] Win32: non-ASCII directory names support in ngx_create_dir()

Maxim Dounin 150 January 12, 2023 04:40PM

Re: [PATCH 04 of 12] Win32: non-ASCII directory names support in ngx_create_dir()

Sergey Kandaurov 137 February 17, 2023 10:14AM

[PATCH 05 of 12] Win32: non-ASCII directory names support in ngx_delete_dir()

Maxim Dounin 127 January 12, 2023 04:40PM

Re: [PATCH 05 of 12] Win32: non-ASCII directory names support in ngx_delete_dir()

Sergey Kandaurov 126 February 17, 2023 10:14AM

[PATCH 06 of 12] Win32: reworked ngx_win32_rename_file() to check errors

Maxim Dounin 152 January 12, 2023 04:40PM

[PATCH 07 of 12] Win32: reworked ngx_win32_rename_file() to use nginx wrappers

Maxim Dounin 124 January 12, 2023 04:40PM

[PATCH 09 of 12] Win32: non-ASCII names support in ngx_rename_file()

Maxim Dounin 173 January 12, 2023 04:40PM

[PATCH 10 of 12] Win32: non-ASCII names support in ngx_open_tempfile()

Maxim Dounin 132 January 12, 2023 04:40PM

[PATCH 08 of 12] Win32: non-ASCII names support in ngx_delete_file()

Maxim Dounin 126 January 12, 2023 04:40PM

[PATCH 12 of 12] Win32: non-ASCII names in ngx_fs_bsize(), ngx_fs_available()

Maxim Dounin 149 January 12, 2023 04:40PM

[PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Maxim Dounin 150 January 12, 2023 04:40PM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 121 February 17, 2023 10:18AM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Maxim Dounin 125 February 19, 2023 12:24PM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 116 February 22, 2023 11:02AM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Maxim Dounin 119 February 23, 2023 01:48PM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 125 February 24, 2023 05:42AM

Re: [PATCH 11 of 12] Win32: fixed ngx_fs_bsize() for symlinks

Sergey Kandaurov 132 March 21, 2023 07:26AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 317
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready