[PATCH] Use io_uring for async io access
Zhao, Ping
ping.zhao at intel.com
Wed Jan 13 04:46:29 UTC 2021
Hi Vladimir,
It could because some new delivery on the tree. I developed the patch based on 1.19.6 tag(-r 83c4622053b0), I saw several new patches merged at Jan. 12th. It's ok, I'll submit a new patch with both legacy libaio and io_uring. So user can still work on libaio if they want. Hope you can review the new one when it's ready.
Thanks,
Ping
-----Original Message-----
From: nginx-devel <nginx-devel-bounces at nginx.org> On Behalf Of Vladimir Homutov
Sent: Tuesday, January 12, 2021 9:46 PM
To: nginx-devel at nginx.org
Subject: Re: [PATCH] Use io_uring for async io access
On Mon, Jan 11, 2021 at 07:05:28AM +0000, Zhao, Ping wrote:
> Hello Nginx Developers,
>
> This is a patch of Nginx io_uring for async io access. Would like to receive your comments.
>
> Thanks,
> Ping
Hi Zhao Ping,
Unfortunately I was not able to apply the patch properly, it shows a lot of rejections. But nevertheless I took a look and from what I see you are trying to completely replace AIO implementation with some new code.
It would be nice to see some modular approach that adds new aio method without breaking existing code. Thank you for sharing!
>
> # HG changeset patch
> # User Ping Zhao <ping.zhao at intel.com<mailto:ping.zhao at intel.com>>
> # Date 1610370434 18000
> # Mon Jan 11 08:07:14 2021 -0500
> # Node ID 3677cf19b98b054614030b80f73728b02fdda832
> # Parent 82228f955153527fba12211f52bf102c90f38dfb
> Use io_uring for async io access.
>
> Replace aio with io_uring in async disk io access.
>
> Io_uring is a new kernel feature to async io access. Nginx can use it
> for legacy disk aio access(for example, disk cache file access)
>
> Check with iostat that shows nvme disk io has 30%+ performance improvement with 1 thread.
> Test with wrk with 100 threads 200 connections(-t 100 -c 1000) with 25000 random requests.
>
> iostat(B/s)
> libaio 1.0 GB/s
> io_uring 1.3+ GB/s
>
> Patch contributor: Carter Li, Ping Zhao
>
> diff -r 82228f955153 -r 3677cf19b98b auto/unix
> --- a/auto/unix Tue Dec 15 17:41:39 2020 +0300
> +++ b/auto/unix Mon Jan 11 08:07:14 2021 -0500
> @@ -532,44 +532,23 @@
>
> if [ $ngx_found = no ]; then
>
> - ngx_feature="Linux AIO support"
> + ngx_feature="Linux io_uring support (liburing)"
> ngx_feature_name="NGX_HAVE_FILE_AIO"
> ngx_feature_run=no
> - ngx_feature_incs="#include <linux/aio_abi.h>
> - #include <sys/eventfd.h>"
> + ngx_feature_incs="#include <liburing.h>"
> ngx_feature_path=
> - ngx_feature_libs=
> - ngx_feature_test="struct iocb iocb;
> - iocb.aio_lio_opcode = IOCB_CMD_PREAD;
> - iocb.aio_flags = IOCB_FLAG_RESFD;
> - iocb.aio_resfd = -1;
> - (void) iocb;
> - (void) eventfd(0, 0)"
> + ngx_feature_libs="-luring"
> + ngx_feature_test="struct io_uring ring;
> + int ret = io_uring_queue_init(64, &ring, 0);
> + if (ret < 0) return 1;
> + io_uring_queue_exit(&ring);"
> . auto/feature
>
> if [ $ngx_found = yes ]; then
> have=NGX_HAVE_EVENTFD . auto/have
> have=NGX_HAVE_SYS_EVENTFD_H . auto/have
> CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
> - fi
> - fi
> -
> - if [ $ngx_found = no ]; then
> -
> - ngx_feature="Linux AIO support (SYS_eventfd)"
> - ngx_feature_incs="#include <linux/aio_abi.h>
> - #include <sys/syscall.h>"
> - ngx_feature_test="struct iocb iocb;
> - iocb.aio_lio_opcode = IOCB_CMD_PREAD;
> - iocb.aio_flags = IOCB_FLAG_RESFD;
> - iocb.aio_resfd = -1;
> - (void) iocb;
> - (void) SYS_eventfd"
> - . auto/feature
> -
> - if [ $ngx_found = yes ]; then
> - have=NGX_HAVE_EVENTFD . auto/have
> - CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
> + CORE_LIBS="$CORE_LIBS -luring"
> fi
> fi
>
> @@ -577,7 +556,7 @@
> cat << END
>
> $0: no supported file AIO was found
> -Currently file AIO is supported on FreeBSD 4.3+ and Linux 2.6.22+
> only
> +Currently file AIO is supported on FreeBSD 4.3+ and Linux 5.1.0+
> +(requires liburing) only
>
> END
> exit 1
> diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_open_file_cache.c
> --- a/src/core/ngx_open_file_cache.c Tue Dec 15 17:41:39 2020 +0300
> +++ b/src/core/ngx_open_file_cache.c Mon Jan 11 08:07:14 2021 -0500
> @@ -869,8 +869,8 @@
> if (!of->log) {
>
> /*
> - * Use non-blocking open() not to hang on FIFO files, etc.
> - * This flag has no effect on a regular files.
> + * Differs from plain read, IORING_OP_READV with O_NONBLOCK
> + * will return -EAGAIN if the operation may block.
> */
>
> fd = ngx_open_file_wrapper(name, of,
> NGX_FILE_RDONLY|NGX_FILE_NONBLOCK,
> diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_output_chain.c
> --- a/src/core/ngx_output_chain.c Tue Dec 15 17:41:39 2020 +0300
> +++ b/src/core/ngx_output_chain.c Mon Jan 11 08:07:14 2021 -0500
> @@ -589,6 +589,20 @@
> if (ctx->aio_handler) {
> n = ngx_file_aio_read(src->file, dst->pos, (size_t) size,
> src->file_pos, ctx->pool);
> +
> + if (n > 0 && n < size) {
> + ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0,
> + ngx_read_file_n " Try again, read only %z of %O from \"%s\"",
> + n, size, src->file->name.data);
> +
> + src->file_pos += n;
> + dst->last += n;
> +
> + n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n,
> + src->file_pos, ctx->pool);
> +
> + }
> +
> if (n == NGX_AGAIN) {
> ctx->aio_handler(ctx, src->file);
> return NGX_AGAIN;
> diff -r 82228f955153 -r 3677cf19b98b src/event/modules/ngx_epoll_module.c
> --- a/src/event/modules/ngx_epoll_module.c Tue Dec 15 17:41:39 2020 +0300
> +++ b/src/event/modules/ngx_epoll_module.c Mon Jan 11 08:07:14 2021
> +++ -0500
> @@ -9,6 +9,10 @@
> #include <ngx_core.h>
> #include <ngx_event.h>
>
> +#if (NGX_HAVE_FILE_AIO)
> +#include <liburing.h>
> +#endif
> +
>
> #if (NGX_TEST_BUILD_EPOLL)
>
> @@ -75,23 +79,6 @@
> #define SYS_eventfd 323
> #endif
>
> -#if (NGX_HAVE_FILE_AIO)
> -
> -#define SYS_io_setup 245
> -#define SYS_io_destroy 246
> -#define SYS_io_getevents 247
> -
> -typedef u_int aio_context_t;
> -
> -struct io_event {
> - uint64_t data; /* the data field from the iocb */
> - uint64_t obj; /* what iocb this event came from */
> - int64_t res; /* result code for this event */
> - int64_t res2; /* secondary result */
> -};
> -
> -
> -#endif
> #endif /* NGX_TEST_BUILD_EPOLL */
>
>
> @@ -124,7 +111,7 @@
> ngx_uint_t flags);
>
> #if (NGX_HAVE_FILE_AIO)
> -static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
> +static void ngx_epoll_io_uring_handler(ngx_event_t *ev);
> #endif
>
> static void *ngx_epoll_create_conf(ngx_cycle_t *cycle); @@ -141,13
> +128,11 @@ #endif
>
> #if (NGX_HAVE_FILE_AIO)
> -
> -int ngx_eventfd = -1;
> -aio_context_t ngx_aio_ctx = 0;
> +struct io_uring ngx_ring;
> +struct io_uring_params ngx_ring_params;
>
> -static ngx_event_t ngx_eventfd_event;
> -static ngx_connection_t ngx_eventfd_conn;
> -
> +static ngx_event_t ngx_ring_event;
> +static ngx_connection_t ngx_ring_conn;
> #endif
>
> #if (NGX_HAVE_EPOLLRDHUP)
> @@ -217,102 +202,40 @@
>
> #if (NGX_HAVE_FILE_AIO)
>
> -/*
> - * We call io_setup(), io_destroy() io_submit(), and io_getevents()
> directly
> - * as syscalls instead of libaio usage, because the library header
> file
> - * supports eventfd() since 0.3.107 version only.
> - */
> -
> -static int
> -io_setup(u_int nr_reqs, aio_context_t *ctx) -{
> - return syscall(SYS_io_setup, nr_reqs, ctx);
> -}
> -
> -
> -static int
> -io_destroy(aio_context_t ctx)
> -{
> - return syscall(SYS_io_destroy, ctx);
> -}
> -
> -
> -static int
> -io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events,
> - struct timespec *tmo)
> -{
> - return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo);
> -}
> -
> -
> static void
> ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf) {
> - int n;
> struct epoll_event ee;
>
> -#if (NGX_HAVE_SYS_EVENTFD_H)
> - ngx_eventfd = eventfd(0, 0);
> -#else
> - ngx_eventfd = syscall(SYS_eventfd, 0);
> -#endif
> -
> - if (ngx_eventfd == -1) {
> + if (io_uring_queue_init_params(32763, &ngx_ring,
> + &ngx_ring_params) < 0) {
> ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
> - "eventfd() failed");
> - ngx_file_aio = 0;
> - return;
> - }
> -
> - ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
> - "eventfd: %d", ngx_eventfd);
> -
> - n = 1;
> -
> - if (ioctl(ngx_eventfd, FIONBIO, &n) == -1) {
> - ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
> - "ioctl(eventfd, FIONBIO) failed");
> + "io_uring_queue_init_params() failed");
> goto failed;
> }
>
> - if (io_setup(epcf->aio_requests, &ngx_aio_ctx) == -1) {
> - ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
> - "io_setup() failed");
> - goto failed;
> - }
> -
> - ngx_eventfd_event.data = &ngx_eventfd_conn;
> - ngx_eventfd_event.handler = ngx_epoll_eventfd_handler;
> - ngx_eventfd_event.log = cycle->log;
> - ngx_eventfd_event.active = 1;
> - ngx_eventfd_conn.fd = ngx_eventfd;
> - ngx_eventfd_conn.read = &ngx_eventfd_event;
> - ngx_eventfd_conn.log = cycle->log;
> + ngx_ring_event.data = &ngx_ring_conn;
> + ngx_ring_event.handler = ngx_epoll_io_uring_handler;
> + ngx_ring_event.log = cycle->log;
> + ngx_ring_event.active = 1;
> + ngx_ring_conn.fd = ngx_ring.ring_fd;
> + ngx_ring_conn.read = &ngx_ring_event;
> + ngx_ring_conn.log = cycle->log;
>
> ee.events = EPOLLIN|EPOLLET;
> - ee.data.ptr = &ngx_eventfd_conn;
> + ee.data.ptr = &ngx_ring_conn;
>
> - if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_eventfd, &ee) != -1) {
> + if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) {
> return;
> }
>
> ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
> "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");
>
> - if (io_destroy(ngx_aio_ctx) == -1) {
> - ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
> - "io_destroy() failed");
> - }
> + io_uring_queue_exit(&ngx_ring);
>
> failed:
>
> - if (close(ngx_eventfd) == -1) {
> - ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
> - "eventfd close() failed");
> - }
> -
> - ngx_eventfd = -1;
> - ngx_aio_ctx = 0;
> + ngx_ring.ring_fd = 0;
> ngx_file_aio = 0;
> }
>
> @@ -549,23 +472,11 @@
>
> #if (NGX_HAVE_FILE_AIO)
>
> - if (ngx_eventfd != -1) {
> -
> - if (io_destroy(ngx_aio_ctx) == -1) {
> - ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
> - "io_destroy() failed");
> - }
> -
> - if (close(ngx_eventfd) == -1) {
> - ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
> - "eventfd close() failed");
> - }
> -
> - ngx_eventfd = -1;
> + if (ngx_ring.ring_fd != 0) {
> + io_uring_queue_exit(&ngx_ring);
> + ngx_ring.ring_fd = 0;
> }
>
> - ngx_aio_ctx = 0;
> -
> #endif
>
> ngx_free(event_list);
> @@ -939,84 +850,36 @@
> #if (NGX_HAVE_FILE_AIO)
>
> static void
> -ngx_epoll_eventfd_handler(ngx_event_t *ev)
> +ngx_epoll_io_uring_handler(ngx_event_t *ev)
> {
> - int n, events;
> - long i;
> - uint64_t ready;
> - ngx_err_t err;
> ngx_event_t *e;
> + struct io_uring_cqe *cqe;
> + unsigned head;
> + unsigned cqe_count = 0;
> ngx_event_aio_t *aio;
> - struct io_event event[64];
> - struct timespec ts;
>
> - ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd handler");
> -
> - n = read(ngx_eventfd, &ready, 8);
> + ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0,
> + "io_uring_peek_cqe: START");
>
> - err = ngx_errno;
> -
> - ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd: %d", n);
> + io_uring_for_each_cqe(&ngx_ring, head, cqe) {
> + ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0,
> + "io_event: %p %d %d",
> + cqe->user_data, cqe->res, cqe->flags);
>
> - if (n != 8) {
> - if (n == -1) {
> - if (err == NGX_EAGAIN) {
> - return;
> - }
> + e = (ngx_event_t *) io_uring_cqe_get_data(cqe);
> + e->complete = 1;
> + e->active = 0;
> + e->ready = 1;
>
> - ngx_log_error(NGX_LOG_ALERT, ev->log, err, "read(eventfd) failed");
> - return;
> - }
> + aio = e->data;
> + aio->res = cqe->res;
>
> - ngx_log_error(NGX_LOG_ALERT, ev->log, 0,
> - "read(eventfd) returned only %d bytes", n);
> - return;
> + ++cqe_count;
> +
> + ngx_post_event(e, &ngx_posted_events);
> }
>
> - ts.tv_sec = 0;
> - ts.tv_nsec = 0;
> -
> - while (ready) {
> -
> - events = io_getevents(ngx_aio_ctx, 1, 64, event, &ts);
> -
> - ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
> - "io_getevents: %d", events);
> -
> - if (events > 0) {
> - ready -= events;
> -
> - for (i = 0; i < events; i++) {
> -
> - ngx_log_debug4(NGX_LOG_DEBUG_EVENT, ev->log, 0,
> - "io_event: %XL %XL %L %L",
> - event[i].data, event[i].obj,
> - event[i].res, event[i].res2);
> -
> - e = (ngx_event_t *) (uintptr_t) event[i].data;
> -
> - e->complete = 1;
> - e->active = 0;
> - e->ready = 1;
> -
> - aio = e->data;
> - aio->res = event[i].res;
> -
> - ngx_post_event(e, &ngx_posted_events);
> - }
> -
> - continue;
> - }
> -
> - if (events == 0) {
> - return;
> - }
> -
> - /* events == -1 */
> - ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno,
> - "io_getevents() failed");
> - return;
> - }
> + io_uring_cq_advance(&ngx_ring, cqe_count);
> }
>
> #endif
> diff -r 82228f955153 -r 3677cf19b98b src/event/ngx_event.h
> --- a/src/event/ngx_event.h Tue Dec 15 17:41:39 2020 +0300
> +++ b/src/event/ngx_event.h Mon Jan 11 08:07:14 2021 -0500
> @@ -160,7 +160,9 @@
> size_t nbytes;
> #endif
>
> - ngx_aiocb_t aiocb;
> + /* Make sure that this iov has the same lifecycle with its associated aio event */
> + struct iovec iov;
> +
> ngx_event_t event;
> };
>
> diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_aio_read.c
> --- a/src/os/unix/ngx_linux_aio_read.c Tue Dec 15 17:41:39 2020 +0300
> +++ b/src/os/unix/ngx_linux_aio_read.c Mon Jan 11 08:07:14 2021 -0500
> @@ -9,20 +9,16 @@
> #include <ngx_core.h>
> #include <ngx_event.h>
>
> +#include <liburing.h>
>
> -extern int ngx_eventfd;
> -extern aio_context_t ngx_aio_ctx;
> +
> +extern struct io_uring ngx_ring;
> +extern struct io_uring_params ngx_ring_params;
>
>
> static void ngx_file_aio_event_handler(ngx_event_t *ev);
>
>
> -static int
> -io_submit(aio_context_t ctx, long n, struct iocb **paiocb) -{
> - return syscall(SYS_io_submit, ctx, n, paiocb);
> -}
> -
>
> ngx_int_t
> ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool) @@ -50,10 +46,10
> @@ ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t
> offset,
> ngx_pool_t *pool)
> {
> - ngx_err_t err;
> - struct iocb *piocb[1];
> - ngx_event_t *ev;
> - ngx_event_aio_t *aio;
> + ngx_err_t err;
> + ngx_event_t *ev;
> + ngx_event_aio_t *aio;
> + struct io_uring_sqe *sqe;
>
> if (!ngx_file_aio) {
> return ngx_read_file(file, buf, size, offset); @@ -93,22
> +89,41 @@
> return NGX_ERROR;
> }
>
> - ngx_memzero(&aio->aiocb, sizeof(struct iocb));
> + sqe = io_uring_get_sqe(&ngx_ring);
> +
> + if (!sqe) {
> + ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
> + "aio no sqe left:%d @%O:%uz %V",
> + ev->complete, offset, size, &file->name);
> + return ngx_read_file(file, buf, size, offset);
> + }
>
> - aio->aiocb.aio_data = (uint64_t) (uintptr_t) ev;
> - aio->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
> - aio->aiocb.aio_fildes = file->fd;
> - aio->aiocb.aio_buf = (uint64_t) (uintptr_t) buf;
> - aio->aiocb.aio_nbytes = size;
> - aio->aiocb.aio_offset = offset;
> - aio->aiocb.aio_flags = IOCB_FLAG_RESFD;
> - aio->aiocb.aio_resfd = ngx_eventfd;
> + if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) {
> + /*
> + * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel
> + * doesn't need to import iovecs in advance.
> + *
> + * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support
> + * non-vectored read/write commands too.
> + *
> + * It's not perfect, but avoids an extra feature-test syscall.
> + */
> + io_uring_prep_read(sqe, file->fd, buf, size, offset);
> + } else {
> + /*
> + * We must store iov into heap to prevent kernel from returning -EFAULT
> + * in case `IORING_FEAT_SUBMIT_STABLE` is not supported
> + */
> + aio->iov.iov_base = buf;
> + aio->iov.iov_len = size;
> + io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset);
> + }
> + io_uring_sqe_set_data(sqe, ev);
> +
>
> ev->handler = ngx_file_aio_event_handler;
>
> - piocb[0] = &aio->aiocb;
> -
> - if (io_submit(ngx_aio_ctx, 1, piocb) == 1) {
> + if (io_uring_submit(&ngx_ring) == 1) {
> ev->active = 1;
> ev->ready = 0;
> ev->complete = 0;
> diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_config.h
> --- a/src/os/unix/ngx_linux_config.h Tue Dec 15 17:41:39 2020 +0300
> +++ b/src/os/unix/ngx_linux_config.h Mon Jan 11 08:07:14 2021 -0500
> @@ -93,10 +93,6 @@
> #include <sys/eventfd.h>
> #endif
> #include <sys/syscall.h>
> -#if (NGX_HAVE_FILE_AIO)
> -#include <linux/aio_abi.h>
> -typedef struct iocb ngx_aiocb_t;
> -#endif
>
>
> #if (NGX_HAVE_CAPABILITIES)
>
> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> http://mailman.nginx.org/mailman/listinfo/nginx-devel
_______________________________________________
nginx-devel mailing list
nginx-devel at nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
More information about the nginx-devel
mailing list