cache: move open to thread pool

Maxim Dounin mdounin at mdounin.ru
Wed Aug 8 18:16:53 UTC 2018


Hello!

On Tue, Aug 07, 2018 at 02:26:01PM -0700, Ka-Hing Cheung via nginx-devel wrote:

> commit 0bcfefa040abdff674047c15701d237ff16a5f2b
> Author: Ka-Hing Cheung <kahing at cloudflare.com>
> Date:   Fri Aug 3 13:37:58 2018 -0700
> 
>     move open to thread pool
> 
>     At cloudflare we found that open() can block a long time, especially
>     at p99 and p999. Moving it to thread pool improved overall p99 by 6x
>     or so during peak time.
> 
>     This has the side effect of disabling nginx's open_file_cache when
>     "aio threads" is turned on. For us we found that to have no impact
>     (probably because we have way too many files for open file cache to
>     be effective anyway).
> 
>     thread pool does increase CPU usage somewhat but we have not measured
>     difference in CPU usage for stock "aio threads" compared to after this
>     patch.
> 
>     Only the cache hit open() is moved to thread pool.
> 
>     We wrote more about it here:
> https://blog.cloudflare.com/how-we-scaled-nginx-and-saved-the-world-54-years-every-day/

First of all, thank you for your work.  The numbers provided in 
the article looks promising.

We've thought about offloading more operations to thread pools, 
and that's basically why "aio_write" is a separate directive, so 
more directives like "aio_<some_operation>" can be added in the 
future.  It's good to see this is finally happening.

Unfortunately, there are number problems with the patch, and it 
needs more work before it can be committed.  In particular:

- The open() calls are offloaded to thread pools unconditionally.  
  This may actually reduce performance in various typical 
  workloads where open() does not block.  Instead, there should be 
  an "aio_open on|off" directive, similar to "aio_write".

- The code bypasses open file cache, and uses a direct call 
  in the http cache code instead.  While it might be ok in your 
  setup, it looks like an incomplete solution from the generic point 
  of view.  A better solution would be to introduce a generic 
  interface in ngx_open_cached_file() to allow use of thread 
  pools.

- The code calls ngx_open_and_stat_file() whithin a thread, which 
  is relatively complex function never designed to be thread safe.  
  While it looks like currently it is, a better solution would be to 
  introduce a separate simple function to execute within a thread, 
  similar to ngx_thread_read_handler().

- In the ngx_http_file_cache_thread_open() function you allocate 
  thread task of size sizeof(ngx_thread_file_open_ctx_t) and store 
  it in the file->thread_task.  There is no guarantee that this 
  task will be compatible with other tasks stored in 
  file->thread_task.  A better solution would be to extend 
  ngx_thread_file_ctx_t and use the same context for all 
  file-related threaded operations.

- A better place for the thread-specific code would be 
  src/os/unix/ngx_files.c, where ngx_thread_file_ctx_t and 
  existing threaded file operations are defined.

- The code uses memory pool operations wihin a thread, 
  specifically ngx_pool_cleanup_add().  Memory pools are not 
  thread safe, and the behaviour of this code is therefore 
  undefined.

- Also there are various style issues, including wrong 
  capitalization of the commit log summary line, strange commit 
  log indentation and header, missing empty lines.  Please take a 
  look at http://nginx.org/en/docs/contributing_changes.html for 
  various hints.  In particular, you may want to use Mercurial to 
  submit patches.

See also below for some in-line comments.

> 
> diff --git src/core/ngx_open_file_cache.c src/core/ngx_open_file_cache.c
> index b23ee78d..9177ebfd 100644
> --- src/core/ngx_open_file_cache.c
> +++ src/core/ngx_open_file_cache.c
> @@ -35,8 +35,6 @@ static ngx_fd_t ngx_open_file_wrapper(ngx_str_t *name,
>      ngx_int_t access, ngx_log_t *log);
>  static ngx_int_t ngx_file_info_wrapper(ngx_str_t *name,
>      ngx_open_file_info_t *of, ngx_file_info_t *fi, ngx_log_t *log);
> -static ngx_int_t ngx_open_and_stat_file(ngx_str_t *name,
> -    ngx_open_file_info_t *of, ngx_log_t *log);
>  static void ngx_open_file_add_event(ngx_open_file_cache_t *cache,
>      ngx_cached_open_file_t *file, ngx_open_file_info_t *of, ngx_log_t *log);
>  static void ngx_open_file_cleanup(void *data);
> @@ -836,7 +834,7 @@ ngx_file_info_wrapper(ngx_str_t *name,
> ngx_open_file_info_t *of,
>  }
> 
> 
> -static ngx_int_t
> +ngx_int_t
>  ngx_open_and_stat_file(ngx_str_t *name, ngx_open_file_info_t *of,
>      ngx_log_t *log)
>  {
> diff --git src/core/ngx_open_file_cache.h src/core/ngx_open_file_cache.h
> index d119c129..94b4d552 100644
> --- src/core/ngx_open_file_cache.h
> +++ src/core/ngx_open_file_cache.h
> @@ -124,6 +124,8 @@ ngx_open_file_cache_t
> *ngx_open_file_cache_init(ngx_pool_t *pool,
>      ngx_uint_t max, time_t inactive);
>  ngx_int_t ngx_open_cached_file(ngx_open_file_cache_t *cache, ngx_str_t *name,
>      ngx_open_file_info_t *of, ngx_pool_t *pool);
> +ngx_int_t ngx_open_and_stat_file(ngx_str_t *name,
> +    ngx_open_file_info_t *of, ngx_log_t *log);
> 
> 
>  #endif /* _NGX_OPEN_FILE_CACHE_H_INCLUDED_ */
> diff --git src/http/ngx_http_cache.h src/http/ngx_http_cache.h
> index f9e96640..2910f6a1 100644
> --- src/http/ngx_http_cache.h
> +++ src/http/ngx_http_cache.h
> @@ -114,6 +114,7 @@ struct ngx_http_cache_s {
>      unsigned                         exists:1;
>      unsigned                         temp_file:1;
>      unsigned                         purged:1;
> +    unsigned                         opening:1;
>      unsigned                         reading:1;
>      unsigned                         secondary:1;
>      unsigned                         background:1;
> diff --git src/http/ngx_http_file_cache.c src/http/ngx_http_file_cache.c
> index 56866fa4..9cb110bf 100644
> --- src/http/ngx_http_file_cache.c
> +++ src/http/ngx_http_file_cache.c
> @@ -18,6 +18,10 @@ static void
> ngx_http_file_cache_lock_wait(ngx_http_request_t *r,
>      ngx_http_cache_t *c);
>  static ngx_int_t ngx_http_file_cache_read(ngx_http_request_t *r,
>      ngx_http_cache_t *c);
> +static ngx_int_t ngx_http_file_cache_aio_open(ngx_http_request_t *r,
> +    ngx_http_cache_t *c, ngx_int_t *rv);
> +static ngx_int_t ngx_http_file_cache_do_aio_open(ngx_http_request_t *r,
> +    ngx_http_cache_t *c, ngx_open_file_info_t *of, ngx_int_t *rv);
>  static ssize_t ngx_http_file_cache_aio_read(ngx_http_request_t *r,
>      ngx_http_cache_t *c);
>  #if (NGX_HAVE_FILE_AIO)
> @@ -268,9 +272,7 @@ ngx_http_file_cache_open(ngx_http_request_t *r)
>      ngx_uint_t                 test;
>      ngx_http_cache_t          *c;
>      ngx_pool_cleanup_t        *cln;
> -    ngx_open_file_info_t       of;
>      ngx_http_file_cache_t     *cache;
> -    ngx_http_core_loc_conf_t  *clcf;
> 
>      c = r->cache;
> 
> @@ -278,6 +280,10 @@ ngx_http_file_cache_open(ngx_http_request_t *r)
>          return NGX_AGAIN;
>      }
> 
> +    if (c->opening) {
> +        return ngx_http_file_cache_aio_open(r, c, &rv);
> +    }
> +

The value of rv is uninitialized here.  While it is not strictly a 
bug, but this code indicate bad design.  Also, duplicate handling 
of "rv == NGX_DECLINED" below and in the 
ngx_http_file_cache_aio_open() function contributes to the 
problem, as well as passing the rv value to the threaded operation 
where it is not needed.  It might be a good idea to re-think the 
design.

>      if (c->reading) {
>          return ngx_http_file_cache_read(r, c);
>      }
> @@ -343,23 +349,33 @@ ngx_http_file_cache_open(ngx_http_request_t *r)
>          goto done;
>      }
> 
> -    clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);
> +    return ngx_http_file_cache_aio_open(r, c, &rv);
> 
> -    ngx_memzero(&of, sizeof(ngx_open_file_info_t));
> +done:
> 
> -    of.uniq = c->uniq;
> -    of.valid = clcf->open_file_cache_valid;
> -    of.min_uses = clcf->open_file_cache_min_uses;
> -    of.events = clcf->open_file_cache_events;
> -    of.directio = NGX_OPEN_FILE_DIRECTIO_OFF;
> -    of.read_ahead = clcf->read_ahead;
> +    if (rv == NGX_DECLINED) {
> +        return ngx_http_file_cache_lock(r, c);
> +    }
> 
> -    if (ngx_open_cached_file(clcf->open_file_cache, &c->file.name,
> &of, r->pool)
> -        != NGX_OK)
> -    {
> -        switch (of.err) {
> +    return rv;
> +}
> +
> +static ngx_int_t

Style: there should be two empty lines between functions.

> +ngx_http_file_cache_aio_open(ngx_http_request_t *r, ngx_http_cache_t *c,
> +                             ngx_int_t *rv)

Style: function arguments should be indented with 4 spaces.

> +{
> +    ngx_int_t                  rc;
> +    ngx_open_file_info_t       of;
> +    ngx_http_file_cache_t     *cache = c->file_cache;
> 
> -        case 0:
> +    rc = ngx_http_file_cache_do_aio_open(r, c, &of, rv);
> +    if (rc == NGX_AGAIN) {
> +        return rc;
> +    }
> +
> +    if (rc != NGX_OK) {
> +        switch (of.err) {
> +        case NGX_OK:
>              return NGX_ERROR;
> 
>          case NGX_ENOENT:
> @@ -391,14 +407,13 @@ ngx_http_file_cache_open(ngx_http_request_t *r)
> 
>  done:
> 
> -    if (rv == NGX_DECLINED) {
> +    if (*rv == NGX_DECLINED) {
>          return ngx_http_file_cache_lock(r, c);
>      }
> 
> -    return rv;
> +    return *rv;
>  }
> 
> -
>  static ngx_int_t

Style: there should be two empty lines between functions:

>  ngx_http_file_cache_lock(ngx_http_request_t *r, ngx_http_cache_t *c)
>  {
> @@ -663,6 +678,127 @@ ngx_http_file_cache_read(ngx_http_request_t *r,
> ngx_http_cache_t *c)
>  }
> 
> 
> +#if (NGX_THREADS)
> +typedef struct {

Style: as long as #if contains something complex, it should be 
separated with empty lines.

> +    ngx_str_t               name;
> +    ngx_pool_t             *pool;
> +    ngx_open_file_info_t    of;
> +    ngx_int_t               rv;
> +} ngx_thread_file_open_ctx_t;
> +
> +static void
> +ngx_http_file_cache_thread_open_handler(void *data, ngx_log_t *log)
> +{
> +    ngx_thread_file_open_ctx_t      *ctx = data;
> +    ngx_pool_t                      *pool = ctx->pool;
> +    ngx_open_file_info_t            *of = &ctx->of;
> +    ngx_pool_cleanup_t              *cln;
> +    ngx_pool_cleanup_file_t         *clnf;
> +    ngx_int_t                        rc;

Style: variable types should be sorted from shortest to longest; 
variable names should be separated with two spaces from the 
longest type; assignments should be written separately, except 
some specific cases where one assignment is allowed in a separate 
variables block.

> +
> +    ngx_log_debug0(NGX_LOG_DEBUG_CORE, log, 0, "thread read handler");

The "read" here is incorrect, should be "open".

> +
> +    cln = ngx_pool_cleanup_add(pool, sizeof(ngx_pool_cleanup_file_t));
> +    if (cln == NULL) {
> +        ctx->of.err = NGX_ERROR;
> +        return;
> +    }

The code is executed in a thread, and using request pool here is 
not allowed.

> +
> +    of->fd = NGX_INVALID_FILE;
> +
> +    rc = ngx_open_and_stat_file(&ctx->name, of, pool->log);
> +    if (rc == NGX_OK && !of->is_dir) {
> +        cln->handler = ngx_pool_cleanup_file;
> +        clnf = cln->data;
> +
> +        clnf->fd = of->fd;
> +        clnf->name = ctx->name.data;
> +        clnf->log = pool->log;
> +    }
> +}
> +
> +
> +static ngx_int_t
> +ngx_http_file_cache_thread_open(ngx_http_cache_t *c, ngx_open_file_info_t *of,
> +                                ngx_int_t *rv, ngx_pool_t *pool)
> +{
> +    ngx_thread_task_t           *task;
> +    ngx_thread_file_open_ctx_t  *ctx;
> +    ngx_file_t                  *file = &c->file;

Style, see above.

> +
> +    task = file->thread_task;
> +
> +    if (task == NULL) {
> +        task = ngx_thread_task_alloc(pool, sizeof(ngx_thread_file_open_ctx_t));
> +        if (task == NULL) {
> +            return NGX_ERROR;
> +        }
> +
> +        file->thread_task = task;
> +    }

See above, this needs some guarantee that what's stored in 
file->thread_task is compatible with all operations.

> +
> +    ctx = task->ctx;
> +
> +    if (task->event.complete) {
> +        task->event.complete = 0;
> +
> +        *of = ctx->of;
> +        *rv = ctx->rv;
> +        return of->err;
> +    }
> +
> +    task->handler = ngx_http_file_cache_thread_open_handler;
> +
> +    ctx->pool = pool;
> +    ctx->name = file->name;
> +    ctx->of = *of;
> +    ctx->rv = *rv;
> +
> +    if (file->thread_handler(task, file) != NGX_OK) {
> +        return NGX_ERROR;
> +    }
> +
> +    return NGX_AGAIN;
> +}
> +#endif
> +
> +static ngx_int_t

Style: there should be an empty line before "#endif", and two 
empty lines between "#endif" and the next function.

> +ngx_http_file_cache_do_aio_open(ngx_http_request_t *r, ngx_http_cache_t *c,
> +    ngx_open_file_info_t *of, ngx_int_t *rv)
> +{
> +    ngx_http_core_loc_conf_t  *clcf;
> +
> +    clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);
> +
> +    ngx_memzero(of, sizeof(ngx_open_file_info_t));
> +
> +    of->uniq = c->uniq;
> +    of->valid = clcf->open_file_cache_valid;
> +    of->min_uses = clcf->open_file_cache_min_uses;
> +    of->events = clcf->open_file_cache_events;
> +    of->directio = NGX_OPEN_FILE_DIRECTIO_OFF;
> +    of->read_ahead = clcf->read_ahead;
> +
> +#if (NGX_THREADS)
> +    if (clcf->aio == NGX_HTTP_AIO_THREADS) {
> +        ngx_int_t rc;

Style: there should be two spaces between a type and a variable 
name; there should be an empty line after #if (and before #endif).

> +
> +        c->file.thread_task = c->thread_task;
> +        c->file.thread_handler = ngx_http_cache_thread_handler;
> +        c->file.thread_ctx = r;
> +
> +        rc = ngx_http_file_cache_thread_open(c, of, rv, r->pool);
> +
> +        c->thread_task = c->file.thread_task;
> +        c->opening = (rc == NGX_AGAIN);
> +
> +        return rc;
> +    }
> +#endif
> +
> +    return ngx_open_cached_file(clcf->open_file_cache, &c->file.name,
> of, r->pool);

Style: maximum text width is 80 characters.

> +}
> +
>  static ssize_t
>  ngx_http_file_cache_aio_read(ngx_http_request_t *r, ngx_http_cache_t *c)
>  {
> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> http://mailman.nginx.org/mailman/listinfo/nginx-devel

-- 
Maxim Dounin
http://mdounin.ru/


More information about the nginx-devel mailing list