remove unneeded io_getevents syscall.

Maxim Dounin mdounin at mdounin.ru
Mon Jan 7 04:24:48 UTC 2019


Hello!

On Mon, Jan 07, 2019 at 12:40:28AM +0300, Vadim Fedorenko wrote:

> # HG changeset patch
> # User Vadim Fedorenko <vadimjunk at gmail.com>
> # Date 1546810494 0
> #      Sun Jan 06 21:34:54 2019 +0000
> # Node ID 3e538f6b8267d36f97b7dd67714c6ba0fba5e5bc
> # Parent  6d15e452fa2eaf19408e24a0d0fcc3a31344a289
> remove unneeded io_getevents syscall.
> 
> This work is based on cloudflare's work on optimization Linux AIO.
> https://blog.cloudflare.com/io_submit-the-epoll-alternative-youve-never-heard-about/
> The code is compilation of libaio library, it eliminates unnecessary
> context-switch on getting new AIO events, doing all possible work in
> userspace.
> 
> diff -r 6d15e452fa2e -r 3e538f6b8267 src/event/modules/ngx_epoll_module.c
> --- a/src/event/modules/ngx_epoll_module.c    Tue Dec 25 17:53:03 2018 +0300
> +++ b/src/event/modules/ngx_epoll_module.c    Sun Jan 06 21:34:54 2019 +0000
> @@ -100,6 +100,35 @@
>      ngx_uint_t  aio_requests;
>  } ngx_epoll_conf_t;
> 
> +#if (NGX_HAVE_FILE_AIO)
> +
> +/* Stolen from kernel arch/x86_64.h */
> +#ifdef __x86_64__
> +#define read_barrier() __asm__ __volatile__("lfence" ::: "memory")
> +#else
> +#ifdef __i386__
> +#define read_barrier() __asm__ __volatile__("" : : : "memory")
> +#else
> +#define read_barrier() __sync_synchronize()
> +#endif
> +#endif
> +
> +/* Stolen from kernel fs/aio.c */
> +#define AIO_RING_MAGIC                  0xa10a10a1
> +struct aio_ring {
> +    unsigned        id;     /* kernel internal index number */
> +    unsigned        nr;     /* number of io_events */
> +    unsigned        head;
> +    unsigned        tail;
> +    unsigned        magic;
> +    unsigned        compat_features;
> +    unsigned        incompat_features;
> +    unsigned        header_length;  /* size of aio_ring */
> +    struct io_event events[0];
> +};
> +
> +#endif
> +
> 
>  static ngx_int_t ngx_epoll_init(ngx_cycle_t *cycle, ngx_msec_t timer);
>  #if (NGX_HAVE_EVENTFD)
> @@ -241,6 +270,39 @@
>  io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events,
>      struct timespec *tmo)
>  {
> +    /* Code based on cloudflare-blog */
> +    ngx_int_t i = 0;
> +
> +    struct aio_ring *ring = (struct aio_ring *)ctx;
> +    if (ring == NULL || ring->magic != AIO_RING_MAGIC) {
> +        goto do_syscall;
> +    }
> +
> +    while (i < nr) {
> +        unsigned head = ring->head;
> +        if (head == ring->tail) {
> +            /* There are no more completions */
> +            break;
> +        } else {
> +            /* There is another completion to reap */
> +            events[i] = ring->events[head];
> +            read_barrier();
> +            ring->head = (head + 1) % ring->nr;
> +            i++;
> +        }
> +    }
> +
> +    if (i == 0 && tmo != NULL && tmo->tv_sec == 0 &&
> +        tmo->tv_nsec == 0) {
> +        /* Requested non blocking operation. */
> +        return 0;
> +    }
> +
> +    if (i && i >= min_nr) {
> +        return i;
> +    }
> +
> +do_syscall:
>      return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo);
>  }

Thank you for the patch.

This looks like a hack which relies on implementation details of 
the kernel code.  Even if the hack is indeed correct for the 
current kernel code (I actually doubt it is, since it uses 
unlocked accesses to the structure which seems to be locked in the 
kernel), I do not think we want such code in nginx.

-- 
Maxim Dounin
http://mdounin.ru/


More information about the nginx-devel mailing list