[PATCH 01 of 12] Win32: non-ASCII names support in autoindex (ticket #458)

Maxim Dounin mdounin at mdounin.ru
Thu Jan 12 21:35:24 UTC 2023


# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1673548890 -10800
#      Thu Jan 12 21:41:30 2023 +0300
# Node ID 60d845f9505fe1b97c1e04b680523b790e29fdb1
# Parent  07b0bee87f32be91a33210bc06973e07c4c1dac9
Win32: non-ASCII names support in autoindex (ticket #458).

Notably, ngx_open_dir() now supports opening directories with non-ASCII
characters, and directory entries returned by ngx_read_dir() are properly
converted to UTF-8.

diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.c
--- a/src/os/win32/ngx_files.c	Wed Dec 21 14:53:27 2022 +0300
+++ b/src/os/win32/ngx_files.c	Thu Jan 12 21:41:30 2023 +0300
@@ -13,7 +13,11 @@
 
 static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u,
     size_t len);
-static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len);
+static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len,
+    size_t reserved);
+static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len,
+    size_t *allocated);
+uint32_t ngx_utf16_decode(u_short **u, size_t n);
 
 
 /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */
@@ -28,7 +32,7 @@ ngx_open_file(u_char *name, u_long mode,
     u_short     utf16[NGX_UTF16_BUFLEN];
 
     len = NGX_UTF16_BUFLEN;
-    u = ngx_utf8_to_utf16(utf16, name, &len);
+    u = ngx_utf8_to_utf16(utf16, name, &len, 0);
 
     if (u == NULL) {
         return INVALID_HANDLE_VALUE;
@@ -269,7 +273,7 @@ ngx_file_info(u_char *file, ngx_file_inf
 
     len = NGX_UTF16_BUFLEN;
 
-    u = ngx_utf8_to_utf16(utf16, file, &len);
+    u = ngx_utf8_to_utf16(utf16, file, &len, 0);
 
     if (u == NULL) {
         return NGX_FILE_ERROR;
@@ -427,49 +431,51 @@ ngx_realpath(u_char *path, u_char *resol
 ngx_int_t
 ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir)
 {
-    u_char     *pattern, *p;
+    size_t      len;
+    u_short    *u, *p;
     ngx_err_t   err;
+    u_short     utf16[NGX_UTF16_BUFLEN];
 
-    pattern = malloc(name->len + 3);
-    if (pattern == NULL) {
+    len = NGX_UTF16_BUFLEN - 2;
+    u = ngx_utf8_to_utf16(utf16, name->data, &len, 2);
+
+    if (u == NULL) {
         return NGX_ERROR;
     }
 
-    p = ngx_cpymem(pattern, name->data, name->len);
+    if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) {
+        goto failed;
+    }
+
+    p = &u[len - 1];
 
     *p++ = '/';
     *p++ = '*';
     *p = '\0';
 
-    dir->dir = FindFirstFile((const char *) pattern, &dir->finddata);
+    dir->dir = FindFirstFileW(u, &dir->finddata);
 
     if (dir->dir == INVALID_HANDLE_VALUE) {
-        err = ngx_errno;
-        ngx_free(pattern);
-        ngx_set_errno(err);
-        return NGX_ERROR;
+        goto failed;
     }
 
-    ngx_free(pattern);
+    if (u != utf16) {
+        ngx_free(u);
+    }
 
     dir->valid_info = 1;
     dir->ready = 1;
+    dir->name = NULL;
+    dir->allocated = 0;
 
     return NGX_OK;
-}
 
+failed:
 
-ngx_int_t
-ngx_read_dir(ngx_dir_t *dir)
-{
-    if (dir->ready) {
-        dir->ready = 0;
-        return NGX_OK;
-    }
-
-    if (FindNextFile(dir->dir, &dir->finddata) != 0) {
-        dir->type = 1;
-        return NGX_OK;
+    if (u != utf16) {
+        err = ngx_errno;
+        ngx_free(u);
+        ngx_set_errno(err);
     }
 
     return NGX_ERROR;
@@ -477,8 +483,56 @@ ngx_read_dir(ngx_dir_t *dir)
 
 
 ngx_int_t
+ngx_read_dir(ngx_dir_t *dir)
+{
+    u_char  *name;
+    size_t   len, allocated;
+
+    if (dir->ready) {
+        dir->ready = 0;
+        goto convert;
+    }
+
+    if (FindNextFileW(dir->dir, &dir->finddata) != 0) {
+        dir->type = 1;
+        goto convert;
+    }
+
+    return NGX_ERROR;
+
+convert:
+
+    name = dir->name;
+    len = dir->allocated;
+
+    name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated);
+    if (name == NULL) {
+        return NGX_ERROR;
+    }
+
+    if (name != dir->name) {
+
+        if (dir->name) {
+            ngx_free(dir->name);
+        }
+
+        dir->name = name;
+        dir->allocated = allocated;
+    }
+
+    dir->namelen = len - 1;
+
+    return NGX_OK;
+}
+
+
+ngx_int_t
 ngx_close_dir(ngx_dir_t *dir)
 {
+    if (dir->name) {
+        ngx_free(dir->name);
+    }
+
     if (FindClose(dir->dir) == 0) {
         return NGX_ERROR;
     }
@@ -816,7 +870,7 @@ failed:
 
 
 static u_short *
-ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len)
+ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved)
 {
     u_char    *p;
     u_short   *u, *last;
@@ -865,7 +919,7 @@ ngx_utf8_to_utf16(u_short *utf16, u_char
 
     /* the given buffer is not enough, allocate a new one */
 
-    u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short));
+    u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * sizeof(u_short));
     if (u == NULL) {
         return NULL;
     }
@@ -910,3 +964,170 @@ ngx_utf8_to_utf16(u_short *utf16, u_char
 
     /* unreachable */
 }
+
+
+static u_char *
+ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t *allocated)
+{
+    u_char    *p, *last;
+    u_short   *u, *j;
+    uint32_t   n;
+
+    u = utf16;
+    p = utf8;
+    last = utf8 + *len;
+
+    while (p < last) {
+
+        if (*u < 0x80) {
+            *p++ = (u_char) *u;
+
+            if (*u == 0) {
+                *len = p - utf8;
+                return utf8;
+            }
+
+            u++;
+
+            continue;
+        }
+
+        if (p >= last - 4) {
+            *len = p - utf8;
+            break;
+        }
+
+        n = ngx_utf16_decode(&u, 2);
+
+        if (n > 0x10ffff) {
+            ngx_free(utf8);
+            ngx_set_errno(NGX_EILSEQ);
+            return NULL;
+        }
+
+        if (n >= 0x10000) {
+            *p++ = (u_char) (0xf0 + (n >> 18));
+            *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
+            *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+            *p++ = (u_char) (0x80 + (n & 0x3f));
+            continue;
+            
+        }
+
+        if (n >= 0x0800) {
+            *p++ = (u_char) (0xe0 + (n >> 12));
+            *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+            *p++ = (u_char) (0x80 + (n & 0x3f));
+            continue;
+        }
+
+        *p++ = (u_char) (0xc0 + (n >> 6));
+        *p++ = (u_char) (0x80 + (n & 0x3f));
+    }
+
+    /* the given buffer is not enough, allocate a new one */
+
+    for (j = u; *j; j++) { /* void */ }
+
+    p = malloc((j - utf16) * 4 + 1);
+    if (p == NULL) {
+        return NULL;
+    }
+
+    if (allocated) {
+        *allocated = (j - utf16) * 4 + 1;
+    }
+
+    ngx_memcpy(p, utf8, *len);
+
+    utf8 = p;
+    p += *len;
+
+    for ( ;; ) {
+
+        if (*u < 0x80) {
+            *p++ = (u_char) *u;
+
+            if (*u == 0) {
+                *len = p - utf8;
+                return utf8;
+            }
+
+            u++;
+
+            continue;
+        }
+
+        n = ngx_utf16_decode(&u, 2);
+
+        if (n > 0x10ffff) {
+            ngx_free(utf8);
+            ngx_set_errno(NGX_EILSEQ);
+            return NULL;
+        }
+
+        if (n >= 0x10000) {
+            *p++ = (u_char) (0xf0 + (n >> 18));
+            *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
+            *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+            *p++ = (u_char) (0x80 + (n & 0x3f));
+            continue;
+            
+        }
+
+        if (n >= 0x0800) {
+            *p++ = (u_char) (0xe0 + (n >> 12));
+            *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
+            *p++ = (u_char) (0x80 + (n & 0x3f));
+            continue;
+        }
+
+        *p++ = (u_char) (0xc0 + (n >> 6));
+        *p++ = (u_char) (0x80 + (n & 0x3f));
+    }
+
+    /* unreachable */
+}
+
+
+/*
+ * ngx_utf16_decode() decodes one or two UTF-16 code units
+ * the return values:
+ *    0x80 - 0x10ffff         valid character
+ *    0x110000 - 0xfffffffd   invalid sequence
+ *    0xfffffffe              incomplete sequence
+ *    0xffffffff              error
+ */
+
+uint32_t
+ngx_utf16_decode(u_short **u, size_t n)
+{
+    uint32_t  k, m;
+
+    k = **u;
+
+    if (k < 0xd800 || k > 0xdfff) {
+        (*u)++;
+        return k;
+    }
+
+    if (k > 0xdbff) {
+        (*u)++;
+        return 0xffffffff;
+    }
+
+    if (n < 2) {
+        return 0xfffffffe;
+    }
+
+    (*u)++;
+
+    m = *(*u)++;
+
+    if (m < 0xdc00 || m > 0xdfff) {
+        return 0xffffffff;
+
+    }
+
+    return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00);
+}
diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.h
--- a/src/os/win32/ngx_files.h	Wed Dec 21 14:53:27 2022 +0300
+++ b/src/os/win32/ngx_files.h	Thu Jan 12 21:41:30 2023 +0300
@@ -30,7 +30,11 @@ typedef struct {
 
 typedef struct {
     HANDLE                          dir;
-    WIN32_FIND_DATA                 finddata;
+    WIN32_FIND_DATAW                finddata;
+
+    u_char                         *name;
+    size_t                          namelen;
+    size_t                          allocated;
 
     unsigned                        valid_info:1;
     unsigned                        type:1;
@@ -205,8 +209,8 @@ ngx_int_t ngx_close_dir(ngx_dir_t *dir);
 #define ngx_dir_access(a)           (a)
 
 
-#define ngx_de_name(dir)            ((u_char *) (dir)->finddata.cFileName)
-#define ngx_de_namelen(dir)         ngx_strlen((dir)->finddata.cFileName)
+#define ngx_de_name(dir)            (dir)->name
+#define ngx_de_namelen(dir)         (dir)->namelen
 
 ngx_int_t ngx_de_info(u_char *name, ngx_dir_t *dir);
 #define ngx_de_info_n               "dummy()"


More information about the nginx-devel mailing list