[PATCH] Add io_uring support in AIO(async io) module
Zhao, Ping
ping.zhao at intel.com
Thu Jan 14 05:53:17 UTC 2021
# HG changeset patch
# User Ping Zhao <ping.zhao at intel.com>
# Date 1610554205 18000
# Wed Jan 13 11:10:05 2021 -0500
# Node ID 95886c3353dc80a3da215027c1e0f2141e47e911
# Parent b055bb6ef87e49232a7fcb4e5334b8efda3b6499
Add io_uring support in AIO(async io) module.
Hello, This is a patch to support io_uring in AIO(async io) module.
Basically you don't need change your configurations. If you're using new kernel(above v5.1) which supports io_uring, and you have "aio on" in your configuration. Nginx will use io_uring for FILE_AIO access which can achieve performance improvement than legacy libaio.
Checked with iostat which shows nvme disk io has 30%+ performance improvement with 1 thread.
Use wrk with 100 threads 200 connections(-t 100 -c 200) with 25000 random requests.
iostat(B/s)
libaio ~1.0 GB/s
io_uring 1.3+ GB/s
diff -r b055bb6ef87e -r 95886c3353dc auto/unix
--- a/auto/unix Mon Jan 11 22:06:27 2021 +0300
+++ b/auto/unix Wed Jan 13 11:10:05 2021 -0500
@@ -531,6 +531,30 @@
fi
if [ $ngx_found = no ]; then
+ ngx_feature="Linux AIO support(IO_URING)"
+ ngx_feature_name="NGX_HAVE_FILE_AIO"
+ ngx_feature_incs="#include <liburing.h>"
+ ngx_feature_path=
+ ngx_feature_libs="-luring"
+ ngx_feature_test="struct io_uring ring;
+ struct io_uring_params params;
+ int ret;
+ memset(¶ms, 0, sizeof(params));
+ ret = io_uring_queue_init_params(64, &ring, ¶ms);
+ if (ret < 0) return 1;
+ if (!(params.features & IORING_FEAT_FAST_POLL)) return 1;
+ io_uring_queue_exit(&ring)"
+ . auto/feature
+
+ if [ $ngx_found = yes ]; then
+ have=NGX_HAVE_EVENTFD . auto/have
+ have=NGX_HAVE_FILE_IOURING . auto/have
+ CORE_LIBS="$CORE_LIBS -luring"
+ CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
+ fi
+ fi
+
+ if [ $ngx_found = no ]; then
ngx_feature="Linux AIO support"
ngx_feature_name="NGX_HAVE_FILE_AIO"
diff -r b055bb6ef87e -r 95886c3353dc src/core/ngx_output_chain.c
--- a/src/core/ngx_output_chain.c Mon Jan 11 22:06:27 2021 +0300
+++ b/src/core/ngx_output_chain.c Wed Jan 13 11:10:05 2021 -0500
@@ -589,6 +589,20 @@
if (ctx->aio_handler) {
n = ngx_file_aio_read(src->file, dst->pos, (size_t) size,
src->file_pos, ctx->pool);
+#if (NGX_HAVE_FILE_IOURING)
+ if (n > 0 && n < size) {
+ ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0,
+ ngx_read_file_n " Try again, only read %z of %O from \"%s\"",
+ n, size, src->file->name.data);
+
+ src->file_pos += n;
+ dst->last += n;
+
+ n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n,
+ src->file_pos, ctx->pool);
+
+ }
+#endif
if (n == NGX_AGAIN) {
ctx->aio_handler(ctx, src->file);
return NGX_AGAIN;
diff -r b055bb6ef87e -r 95886c3353dc src/event/modules/ngx_epoll_module.c
--- a/src/event/modules/ngx_epoll_module.c Mon Jan 11 22:06:27 2021 +0300
+++ b/src/event/modules/ngx_epoll_module.c Wed Jan 13 11:10:05 2021 -0500
@@ -9,6 +9,9 @@
#include <ngx_core.h>
#include <ngx_event.h>
+#if (NGX_HAVE_FILE_IOURING)
+#include <liburing.h>
+#endif
#if (NGX_TEST_BUILD_EPOLL)
@@ -77,6 +80,9 @@
#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+#else
+
#define SYS_io_setup 245
#define SYS_io_destroy 246
#define SYS_io_getevents 247
@@ -89,9 +95,9 @@
int64_t res; /* result code for this event */
int64_t res2; /* secondary result */
};
-
+#endif /* NGX_HAVE_FILE_IOURING */
+#endif /* NGX_HAVE_FILE_AIO */
-#endif
#endif /* NGX_TEST_BUILD_EPOLL */
@@ -124,8 +130,25 @@
ngx_uint_t flags);
#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+static void ngx_epoll_io_uring_handler(ngx_event_t *ev);
+
+struct io_uring ngx_ring;
+struct io_uring_params ngx_ring_params;
+
+static ngx_event_t ngx_ring_event;
+static ngx_connection_t ngx_ring_conn;
+
+#else
static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
-#endif
+
+int ngx_eventfd = -1;
+aio_context_t ngx_aio_ctx = 0;
+
+static ngx_event_t ngx_eventfd_event;
+static ngx_connection_t ngx_eventfd_conn;
+#endif /* NGX_HAVE_FILE_IOURING */
+#endif /* NGX_HAVE_FILE_AIO */
static void *ngx_epoll_create_conf(ngx_cycle_t *cycle);
static char *ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf);
@@ -140,16 +163,6 @@
static ngx_connection_t notify_conn;
#endif
-#if (NGX_HAVE_FILE_AIO)
-
-int ngx_eventfd = -1;
-aio_context_t ngx_aio_ctx = 0;
-
-static ngx_event_t ngx_eventfd_event;
-static ngx_connection_t ngx_eventfd_conn;
-
-#endif
-
#if (NGX_HAVE_EPOLLRDHUP)
ngx_uint_t ngx_use_epoll_rdhup;
#endif
@@ -217,6 +230,47 @@
#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+
+static void
+ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf)
+{
+ struct epoll_event ee;
+
+ if (io_uring_queue_init_params(32763, &ngx_ring, &ngx_ring_params) < 0) {
+ ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
+ "io_uring_queue_init_params() failed");
+ goto failed;
+ }
+
+ ngx_ring_event.data = &ngx_ring_conn;
+ ngx_ring_event.handler = ngx_epoll_io_uring_handler;
+ ngx_ring_event.log = cycle->log;
+ ngx_ring_event.active = 1;
+ ngx_ring_conn.fd = ngx_ring.ring_fd;
+ ngx_ring_conn.read = &ngx_ring_event;
+ ngx_ring_conn.log = cycle->log;
+
+ ee.events = EPOLLIN|EPOLLET;
+ ee.data.ptr = &ngx_ring_conn;
+
+ if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) {
+ return;
+ }
+
+ ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
+ "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");
+
+ io_uring_queue_exit(&ngx_ring);
+
+failed:
+
+ ngx_ring.ring_fd = 0;
+ ngx_file_aio = 0;
+}
+
+#else
+
/*
* We call io_setup(), io_destroy() io_submit(), and io_getevents() directly
* as syscalls instead of libaio usage, because the library header file
@@ -316,8 +370,8 @@
ngx_file_aio = 0;
}
-#endif
-
+#endif /*NGX_HAVE_FILE_IOURING*/
+#endif /*NGX_HAVE_FILE_AIO*/
static ngx_int_t
ngx_epoll_init(ngx_cycle_t *cycle, ngx_msec_t timer)
@@ -548,6 +602,13 @@
#endif
#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+ if (ngx_ring.ring_fd != 0) {
+ io_uring_queue_exit(&ngx_ring);
+ ngx_ring.ring_fd = 0;
+ }
+
+#else
if (ngx_eventfd != -1) {
@@ -566,7 +627,8 @@
ngx_aio_ctx = 0;
-#endif
+#endif /*NGX_HAVE_FILE_IOURING*/
+#endif /*NGX_HAVE_FILE_AIO*/
ngx_free(event_list);
@@ -935,8 +997,42 @@
return NGX_OK;
}
+#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+static void
+ngx_epoll_io_uring_handler(ngx_event_t *ev)
+{
+ ngx_event_t *e;
+ struct io_uring_cqe *cqe;
+ unsigned head;
+ unsigned cqe_count = 0;
+ ngx_event_aio_t *aio;
-#if (NGX_HAVE_FILE_AIO)
+ ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+ "io_uring_peek_cqe: START");
+
+ io_uring_for_each_cqe(&ngx_ring, head, cqe) {
+ ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+ "io_event: %p %d %d",
+ cqe->user_data, cqe->res, cqe->flags);
+
+ e = (ngx_event_t *) io_uring_cqe_get_data(cqe);
+ e->complete = 1;
+ e->active = 0;
+ e->ready = 1;
+
+ aio = e->data;
+ aio->res = cqe->res;
+
+ ++cqe_count;
+
+ ngx_post_event(e, &ngx_posted_events);
+ }
+
+ io_uring_cq_advance(&ngx_ring, cqe_count);
+}
+
+#else
static void
ngx_epoll_eventfd_handler(ngx_event_t *ev)
@@ -1019,8 +1115,8 @@
}
}
-#endif
-
+#endif /*NGX_HAVE_FILE_IOURING*/
+#endif /*NGX_HAVE_FILE_AIO*/
static void *
ngx_epoll_create_conf(ngx_cycle_t *cycle)
diff -r b055bb6ef87e -r 95886c3353dc src/event/ngx_event.h
--- a/src/event/ngx_event.h Mon Jan 11 22:06:27 2021 +0300
+++ b/src/event/ngx_event.h Wed Jan 13 11:10:05 2021 -0500
@@ -160,7 +160,11 @@
size_t nbytes;
#endif
+#if (NGX_HAVE_FILE_IOURING)
+ struct iovec iov;
+#else
ngx_aiocb_t aiocb;
+#endif
ngx_event_t event;
};
diff -r b055bb6ef87e -r 95886c3353dc src/os/unix/ngx_linux_aio_read.c
--- a/src/os/unix/ngx_linux_aio_read.c Mon Jan 11 22:06:27 2021 +0300
+++ b/src/os/unix/ngx_linux_aio_read.c Wed Jan 13 11:10:05 2021 -0500
@@ -9,20 +9,24 @@
#include <ngx_core.h>
#include <ngx_event.h>
+#if (NGX_HAVE_FILE_IOURING)
+#include <liburing.h>
+extern struct io_uring ngx_ring;
+extern struct io_uring_params ngx_ring_params;
+
+#else
extern int ngx_eventfd;
extern aio_context_t ngx_aio_ctx;
-
-static void ngx_file_aio_event_handler(ngx_event_t *ev);
-
-
static int
io_submit(aio_context_t ctx, long n, struct iocb **paiocb)
{
return syscall(SYS_io_submit, ctx, n, paiocb);
}
+#endif
+static void ngx_file_aio_event_handler(ngx_event_t *ev);
ngx_int_t
ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool)
@@ -45,7 +49,114 @@
return NGX_OK;
}
+#if (NGX_HAVE_FILE_IOURING)
+ssize_t
+ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
+ ngx_pool_t *pool)
+{
+ ngx_err_t err;
+ ngx_event_t *ev;
+ ngx_event_aio_t *aio;
+ struct io_uring_sqe *sqe;
+ if (!ngx_file_aio) {
+ return ngx_read_file(file, buf, size, offset);
+ }
+
+ if (file->aio == NULL && ngx_file_aio_init(file, pool) != NGX_OK) {
+ return NGX_ERROR;
+ }
+
+ aio = file->aio;
+ ev = &aio->event;
+
+ if (!ev->ready) {
+ ngx_log_error(NGX_LOG_ALERT, file->log, 0,
+ "second aio post for \"%V\"", &file->name);
+ return NGX_AGAIN;
+ }
+
+ ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+ "aio complete:%d @%O:%uz %V",
+ ev->complete, offset, size, &file->name);
+
+ if (ev->complete) {
+ ev->active = 0;
+ ev->complete = 0;
+
+ if (aio->res >= 0) {
+ ngx_set_errno(0);
+ return aio->res;
+ }
+
+ ngx_set_errno(-aio->res);
+
+ ngx_log_error(NGX_LOG_CRIT, file->log, ngx_errno,
+ "aio read \"%s\" failed", file->name.data);
+
+ return NGX_ERROR;
+ }
+
+ sqe = io_uring_get_sqe(&ngx_ring);
+
+ if (!sqe) {
+ ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+ "aio no sqe left:%d @%O:%uz %V",
+ ev->complete, offset, size, &file->name);
+ return ngx_read_file(file, buf, size, offset);
+ }
+
+ if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) {
+ /*
+ * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel
+ * doesn't need to import iovecs in advance.
+ *
+ * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support
+ * non-vectored read/write commands too.
+ *
+ * It's not perfect, but avoids an extra feature-test syscall.
+ */
+ io_uring_prep_read(sqe, file->fd, buf, size, offset);
+ } else {
+ /*
+ * We must store iov into heap to prevent kernel from returning -EFAULT
+ * in case `IORING_FEAT_SUBMIT_STABLE` is not supported
+ */
+ aio->iov.iov_base = buf;
+ aio->iov.iov_len = size;
+ io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset);
+ }
+ io_uring_sqe_set_data(sqe, ev);
+
+
+ ev->handler = ngx_file_aio_event_handler;
+
+ if (io_uring_submit(&ngx_ring) == 1) {
+ ev->active = 1;
+ ev->ready = 0;
+ ev->complete = 0;
+
+ return NGX_AGAIN;
+ }
+
+ err = ngx_errno;
+
+ if (err == NGX_EAGAIN) {
+ return ngx_read_file(file, buf, size, offset);
+ }
+
+ ngx_log_error(NGX_LOG_CRIT, file->log, err,
+ "io_submit(\"%V\") failed", &file->name);
+
+ if (err == NGX_ENOSYS) {
+ ngx_file_aio = 0;
+ return ngx_read_file(file, buf, size, offset);
+ }
+
+ return NGX_ERROR;
+}
+
+#else
ssize_t
ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
ngx_pool_t *pool)
@@ -132,7 +243,7 @@
return NGX_ERROR;
}
-
+#endif
static void
ngx_file_aio_event_handler(ngx_event_t *ev)
diff -r b055bb6ef87e -r 95886c3353dc src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h Mon Jan 11 22:06:27 2021 +0300
+++ b/src/os/unix/ngx_linux_config.h Wed Jan 13 11:10:05 2021 -0500
@@ -93,11 +93,15 @@
#include <sys/eventfd.h>
#endif
#include <sys/syscall.h>
+
#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+
+#else
#include <linux/aio_abi.h>
typedef struct iocb ngx_aiocb_t;
#endif
-
+#endif
#if (NGX_HAVE_CAPABILITIES)
#include <linux/capability.h>
More information about the nginx-devel
mailing list