[PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)
Maxim Dounin
mdounin at mdounin.ru
Thu Jan 12 21:35:24 UTC 2023
# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1673548890 -10800
# Thu Jan 12 21:41:30 2023 +0300
# Node ID 60d845f9505fe1b97c1e04b680523b790e29fdb1
# Parent 07b0bee87f32be91a33210bc06973e07c4c1dac9
Win32: non-ASCII names support in autoindex (ticket #458).
Notably, ngx_open_dir() now supports opening directories with non-ASCII
characters, and directory entries returned by ngx_read_dir() are properly
converted to UTF-8.
diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.c
--- a/src/os/win32/ngx_files.c Wed Dec 21 14:53:27 2022 +0300
+++ b/src/os/win32/ngx_files.c Thu Jan 12 21:41:30 2023 +0300
@@ -13,7 +13,11 @@
static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u,
size_t len);
-static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len);
+static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len,
+ size_t reserved);
+static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len,
+ size_t *allocated);
+uint32_t ngx_utf16_decode(u_short **u, size_t n);
/* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */
@@ -28,7 +32,7 @@ ngx_open_file(u_char *name, u_long mode,
u_short utf16[NGX_UTF16_BUFLEN];
len = NGX_UTF16_BUFLEN;
- u = ngx_utf8_to_utf16(utf16, name, &len);
+ u = ngx_utf8_to_utf16(utf16, name, &len, 0);
if (u == NULL) {
return INVALID_HANDLE_VALUE;
@@ -269,7 +273,7 @@ ngx_file_info(u_char *file, ngx_file_inf
len = NGX_UTF16_BUFLEN;
- u = ngx_utf8_to_utf16(utf16, file, &len);
+ u = ngx_utf8_to_utf16(utf16, file, &len, 0);
if (u == NULL) {
return NGX_FILE_ERROR;
@@ -427,49 +431,51 @@ ngx_realpath(u_char *path, u_char *resol
ngx_int_t
ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir)
{
- u_char *pattern, *p;
+ size_t len;
+ u_short *u, *p;
ngx_err_t err;
+ u_short utf16[NGX_UTF16_BUFLEN];
- pattern = malloc(name->len + 3);
- if (pattern == NULL) {
+ len = NGX_UTF16_BUFLEN - 2;
+ u = ngx_utf8_to_utf16(utf16, name->data, &len, 2);
+
+ if (u == NULL) {
return NGX_ERROR;
}
- p = ngx_cpymem(pattern, name->data, name->len);
+ if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) {
+ goto failed;
+ }
+
+ p = &u[len - 1];
*p++ = '/';
*p++ = '*';
*p = '\0';
- dir->dir = FindFirstFile((const char *) pattern, &dir->finddata);
+ dir->dir = FindFirstFileW(u, &dir->finddata);
if (dir->dir == INVALID_HANDLE_VALUE) {
- err = ngx_errno;
- ngx_free(pattern);
- ngx_set_errno(err);
- return NGX_ERROR;
+ goto failed;
}
- ngx_free(pattern);
+ if (u != utf16) {
+ ngx_free(u);
+ }
dir->valid_info = 1;
dir->ready = 1;
+ dir->name = NULL;
+ dir->allocated = 0;
return NGX_OK;
-}
+failed:
-ngx_int_t
-ngx_read_dir(ngx_dir_t *dir)
-{
- if (dir->ready) {
- dir->ready = 0;
- return NGX_OK;
- }
-
- if (FindNextFile(dir->dir, &dir->finddata) != 0) {
- dir->type = 1;
- return NGX_OK;
+ if (u != utf16) {
+ err = ngx_errno;
+ ngx_free(u);
+ ngx_set_errno(err);
}
return NGX_ERROR;
@@ -477,8 +483,56 @@ ngx_read_dir(ngx_dir_t *dir)
ngx_int_t
+ngx_read_dir(ngx_dir_t *dir)
+{
+ u_char *name;
+ size_t len, allocated;
+
+ if (dir->ready) {
+ dir->ready = 0;
+ goto convert;
+ }
+
+ if (FindNextFileW(dir->dir, &dir->finddata) != 0) {
+ dir->type = 1;
+ goto convert;
+ }
+
+ return NGX_ERROR;
+
+convert:
+
+ name = dir->name;
+ len = dir->allocated;
+
+ name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated);
+ if (name == NULL) {
+ return NGX_ERROR;
+ }
+
+ if (name != dir->name) {
+
+ if (dir->name) {
+ ngx_free(dir->name);
+ }
+
+ dir->name = name;
+ dir->allocated = allocated;
+ }
+
+ dir->namelen = len - 1;
+
+ return NGX_OK;
+}
+
+
+ngx_int_t
ngx_close_dir(ngx_dir_t *dir)
{
+ if (dir->name) {
+ ngx_free(dir->name);
+ }
+
if (FindClose(dir->dir) == 0) {
return NGX_ERROR;
}
@@ -816,7 +870,7 @@ failed:
static u_short *
-ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len)
+ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved)
{
u_char *p;
u_short *u, *last;
@@ -865,7 +919,7 @@ ngx_utf8_to_utf16(u_short *utf16, u_char
/* the given buffer is not enough, allocate a new one */
- u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short));
+ u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * sizeof(u_short));
if (u == NULL) {
return NULL;
}
@@ -910,3 +964,170 @@ ngx_utf8_to_utf16(u_short *utf16, u_char
/* unreachable */
}
+
+
+static u_char *
+ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t *allocated)
+{
+ u_char *p, *last;
+ u_short *u, *j;
+ uint32_t n;
+
+ u = utf16;
+ p = utf8;
+ last = utf8 + *len;
+
+ while (p < last) {
+
+ if (*u < 0x80) {
+ *p++ = (u_char) *u;
+
+ if (*u == 0) {
+ *len = p - utf8;
+ return utf8;
+ }
+
+ u++;
+
+ continue;
+ }
+
+ if (p >= last - 4) {
+ *len = p - utf8;
+ break;
+ }
+
+ n = ngx_utf16_decode(&u, 2);
+
+ if (n > 0x10ffff) {
+ ngx_free(utf8);
+ ngx_set_errno(NGX_EILSEQ);
+ return NULL;
+ }
+
+ if (n >= 0x10000) {
+ *p++ = (u_char) (0xf0 + (n >> 18));
+ *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+
+ }
+
+ if (n >= 0x0800) {
+ *p++ = (u_char) (0xe0 + (n >> 12));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+ }
+
+ *p++ = (u_char) (0xc0 + (n >> 6));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ }
+
+ /* the given buffer is not enough, allocate a new one */
+
+ for (j = u; *j; j++) { /* void */ }
+
+ p = malloc((j - utf16) * 4 + 1);
+ if (p == NULL) {
+ return NULL;
+ }
+
+ if (allocated) {
+ *allocated = (j - utf16) * 4 + 1;
+ }
+
+ ngx_memcpy(p, utf8, *len);
+
+ utf8 = p;
+ p += *len;
+
+ for ( ;; ) {
+
+ if (*u < 0x80) {
+ *p++ = (u_char) *u;
+
+ if (*u == 0) {
+ *len = p - utf8;
+ return utf8;
+ }
+
+ u++;
+
+ continue;
+ }
+
+ n = ngx_utf16_decode(&u, 2);
+
+ if (n > 0x10ffff) {
+ ngx_free(utf8);
+ ngx_set_errno(NGX_EILSEQ);
+ return NULL;
+ }
+
+ if (n >= 0x10000) {
+ *p++ = (u_char) (0xf0 + (n >> 18));
+ *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+
+ }
+
+ if (n >= 0x0800) {
+ *p++ = (u_char) (0xe0 + (n >> 12));
+ *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ continue;
+ }
+
+ *p++ = (u_char) (0xc0 + (n >> 6));
+ *p++ = (u_char) (0x80 + (n & 0x3f));
+ }
+
+ /* unreachable */
+}
+
+
+/*
+ * ngx_utf16_decode() decodes one or two UTF-16 code units
+ * the return values:
+ * 0x80 - 0x10ffff valid character
+ * 0x110000 - 0xfffffffd invalid sequence
+ * 0xfffffffe incomplete sequence
+ * 0xffffffff error
+ */
+
+uint32_t
+ngx_utf16_decode(u_short **u, size_t n)
+{
+ uint32_t k, m;
+
+ k = **u;
+
+ if (k < 0xd800 || k > 0xdfff) {
+ (*u)++;
+ return k;
+ }
+
+ if (k > 0xdbff) {
+ (*u)++;
+ return 0xffffffff;
+ }
+
+ if (n < 2) {
+ return 0xfffffffe;
+ }
+
+ (*u)++;
+
+ m = *(*u)++;
+
+ if (m < 0xdc00 || m > 0xdfff) {
+ return 0xffffffff;
+
+ }
+
+ return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00);
+}
diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.h
--- a/src/os/win32/ngx_files.h Wed Dec 21 14:53:27 2022 +0300
+++ b/src/os/win32/ngx_files.h Thu Jan 12 21:41:30 2023 +0300
@@ -30,7 +30,11 @@ typedef struct {
typedef struct {
HANDLE dir;
- WIN32_FIND_DATA finddata;
+ WIN32_FIND_DATAW finddata;
+
+ u_char *name;
+ size_t namelen;
+ size_t allocated;
unsigned valid_info:1;
unsigned type:1;
@@ -205,8 +209,8 @@ ngx_int_t ngx_close_dir(ngx_dir_t *dir);
#define ngx_dir_access(a) (a)
-#define ngx_de_name(dir) ((u_char *) (dir)->finddata.cFileName)
-#define ngx_de_namelen(dir) ngx_strlen((dir)->finddata.cFileName)
+#define ngx_de_name(dir) (dir)->name
+#define ngx_de_namelen(dir) (dir)->namelen
ngx_int_t ngx_de_info(u_char *name, ngx_dir_t *dir);
#define ngx_de_info_n "dummy()"
More information about the nginx-devel
mailing list