[PATCH 02 of 12] Win32: non-ASCII names support in "include" with wildcards

Sergey Kandaurov pluknet at nginx.com
Fri Feb 17 14:53:11 UTC 2023


> On 13 Jan 2023, at 01:35, Maxim Dounin <mdounin at mdounin.ru> wrote:
> 
> # HG changeset patch
> # User Maxim Dounin <mdounin at mdounin.ru>
> # Date 1673548899 -10800
> #      Thu Jan 12 21:41:39 2023 +0300
> # Node ID d05c0adf5890aecc68ce8906ef19ca07502ed071
> # Parent  60d845f9505fe1b97c1e04b680523b790e29fdb1
> Win32: non-ASCII names support in "include" with wildcards.
> 
> Notably, ngx_open_glob() now supports opening directories with non-ASCII
> characters, and pathnames returned by ngx_read_glob() are converted to UTF-8.
> 
> diff -r 60d845f9505f -r d05c0adf5890 src/os/win32/ngx_files.c
> --- a/src/os/win32/ngx_files.c	Thu Jan 12 21:41:30 2023 +0300
> +++ b/src/os/win32/ngx_files.c	Thu Jan 12 21:41:39 2023 +0300
> @@ -546,14 +546,27 @@ ngx_open_glob(ngx_glob_t *gl)
> {
>     u_char     *p;
>     size_t      len;
> +    u_short    *u;
>     ngx_err_t   err;
> +    u_short     utf16[NGX_UTF16_BUFLEN];
> 
> -    gl->dir = FindFirstFile((const char *) gl->pattern, &gl->finddata);
> +    len = NGX_UTF16_BUFLEN;
> +    u = ngx_utf8_to_utf16(utf16, gl->pattern, &len, 0);
> +
> +    if (u == NULL) {
> +        return NGX_ERROR;
> +    }
> +
> +    gl->dir = FindFirstFileW(u, &gl->finddata);
> 
>     if (gl->dir == INVALID_HANDLE_VALUE) {
> 
>         err = ngx_errno;
> 
> +        if (u != utf16) {
> +            ngx_free(u);
> +        }
> +
>         if ((err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND)
>              && gl->test)
>         {
> @@ -561,6 +574,8 @@ ngx_open_glob(ngx_glob_t *gl)
>             return NGX_OK;
>         }
> 
> +        ngx_set_errno(err);
> +
>         return NGX_ERROR;
>     }
> 
> @@ -570,18 +585,10 @@ ngx_open_glob(ngx_glob_t *gl)
>         }
>     }
> 
> -    len = ngx_strlen(gl->finddata.cFileName);
> -    gl->name.len = gl->last + len;
> -
> -    gl->name.data = ngx_alloc(gl->name.len + 1, gl->log);
> -    if (gl->name.data == NULL) {
> -        return NGX_ERROR;
> +    if (u != utf16) {
> +        ngx_free(u);
>     }
> 
> -    ngx_memcpy(gl->name.data, gl->pattern, gl->last);
> -    ngx_cpystrn(gl->name.data + gl->last, (u_char *) gl->finddata.cFileName,
> -                len + 1);
> -
>     gl->ready = 1;
> 
>     return NGX_OK;
> @@ -591,40 +598,25 @@ ngx_open_glob(ngx_glob_t *gl)
> ngx_int_t
> ngx_read_glob(ngx_glob_t *gl, ngx_str_t *name)
> {
> -    size_t     len;
> -    ngx_err_t  err;
> +    u_char     *p;
> +    size_t      len;
> +    ngx_err_t   err;
> +    u_char      utf8[NGX_UTF16_BUFLEN];

Mixing UTF16-specific macro with u_char utf8[] looks suspicious.
It may have sense to replace this with dynamic allocation
inside ngx_utf16_to_utf8(), similar to ngx_read_dir() 1st call,
or use NGX_MAX_PATH.

> 
>     if (gl->no_match) {
>         return NGX_DONE;
>     }
> 
>     if (gl->ready) {
> -        *name = gl->name;
> -
>         gl->ready = 0;
> -        return NGX_OK;
> +        goto convert;
>     }
> 
>     ngx_free(gl->name.data);
>     gl->name.data = NULL;
> 
> -    if (FindNextFile(gl->dir, &gl->finddata) != 0) {
> -
> -        len = ngx_strlen(gl->finddata.cFileName);
> -        gl->name.len = gl->last + len;
> -
> -        gl->name.data = ngx_alloc(gl->name.len + 1, gl->log);
> -        if (gl->name.data == NULL) {
> -            return NGX_ERROR;
> -        }
> -
> -        ngx_memcpy(gl->name.data, gl->pattern, gl->last);
> -        ngx_cpystrn(gl->name.data + gl->last, (u_char *) gl->finddata.cFileName,
> -                    len + 1);
> -
> -        *name = gl->name;
> -
> -        return NGX_OK;
> +    if (FindNextFileW(gl->dir, &gl->finddata) != 0) {
> +        goto convert;
>     }
> 
>     err = ngx_errno;
> @@ -637,6 +629,43 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t 
>                   "FindNextFile(%s) failed", gl->pattern);
> 
>     return NGX_ERROR;
> +
> +convert:
> +
> +    len = NGX_UTF16_BUFLEN;
> +    p = ngx_utf16_to_utf8(utf8, gl->finddata.cFileName, &len, NULL);

Note that passing utf8 off the stack can result in an attempt to free
the stack pointer inside ngx_utf16_to_utf8() on NGX_EILSEQ.

> +

Nitpicking: there (and in subsequent patches) you're starting to add
an empty line after ngx_utf16_to_utf8(); it's not in the 1st path.

> +    if (p == NULL) {
> +        return NGX_ERROR;
> +    }
> +
> +    gl->name.len = gl->last + len - 1;
> +
> +    gl->name.data = ngx_alloc(gl->name.len + 1, gl->log);
> +    if (gl->name.data == NULL) {
> +        goto failed;
> +    }
> +
> +    ngx_memcpy(gl->name.data, gl->pattern, gl->last);
> +    ngx_cpystrn(gl->name.data + gl->last, p, len);
> +
> +    if (p != utf8) {
> +        ngx_free(p);
> +    }
> +
> +    *name = gl->name;
> +
> +    return NGX_OK;
> +
> +failed:
> +
> +    if (p != utf8) {
> +        err = ngx_errno;
> +        ngx_free(p);
> +        ngx_set_errno(err);
> +    }
> +
> +    return NGX_ERROR;
> }
> 
> 
> diff -r 60d845f9505f -r d05c0adf5890 src/os/win32/ngx_files.h
> --- a/src/os/win32/ngx_files.h	Thu Jan 12 21:41:30 2023 +0300
> +++ b/src/os/win32/ngx_files.h	Thu Jan 12 21:41:39 2023 +0300
> @@ -44,7 +44,7 @@ typedef struct {
> 
> typedef struct {
>     HANDLE                          dir;
> -    WIN32_FIND_DATA                 finddata;
> +    WIN32_FIND_DATAW                finddata;
> 
>     unsigned                        ready:1;
>     unsigned                        test:1;

-- 
Sergey Kandaurov


More information about the nginx-devel mailing list