[PATCH] Use io_uring for async io access
Zhao, Ping
ping.zhao at intel.com
Tue Jan 12 01:32:53 UTC 2021
There's a typo in the mail:
Test with wrk with 100 threads 200 connections(-t 100 -c 1000) with 25000 random requests.
Should be "-c 200". In fact with -c 1000, io_uring performance gain is even more significant because libaio performance drop more with 1000 connections than 200 connections.
Regards,
Ping
From: nginx-devel <nginx-devel-bounces at nginx.org> On Behalf Of Zhao, Ping
Sent: Monday, January 11, 2021 3:05 PM
To: nginx-devel at nginx.org
Subject: [PATCH] Use io_uring for async io access
Hello Nginx Developers,
This is a patch of Nginx io_uring for async io access. Would like to receive your comments.
Thanks,
Ping
# HG changeset patch
# User Ping Zhao <ping.zhao at intel.com<mailto:ping.zhao at intel.com>>
# Date 1610370434 18000
# Mon Jan 11 08:07:14 2021 -0500
# Node ID 3677cf19b98b054614030b80f73728b02fdda832
# Parent 82228f955153527fba12211f52bf102c90f38dfb
Use io_uring for async io access.
Replace aio with io_uring in async disk io access.
Io_uring is a new kernel feature to async io access. Nginx can use it for legacy disk aio access(for example, disk cache file access)
Check with iostat that shows nvme disk io has 30%+ performance improvement with 1 thread.
Test with wrk with 100 threads 200 connections(-t 100 -c 1000) with 25000 random requests.
iostat(B/s)
libaio 1.0 GB/s
io_uring 1.3+ GB/s
Patch contributor: Carter Li, Ping Zhao
diff -r 82228f955153 -r 3677cf19b98b auto/unix
--- a/auto/unix Tue Dec 15 17:41:39 2020 +0300
+++ b/auto/unix Mon Jan 11 08:07:14 2021 -0500
@@ -532,44 +532,23 @@
if [ $ngx_found = no ]; then
- ngx_feature="Linux AIO support"
+ ngx_feature="Linux io_uring support (liburing)"
ngx_feature_name="NGX_HAVE_FILE_AIO"
ngx_feature_run=no
- ngx_feature_incs="#include <linux/aio_abi.h>
- #include <sys/eventfd.h>"
+ ngx_feature_incs="#include <liburing.h>"
ngx_feature_path=
- ngx_feature_libs=
- ngx_feature_test="struct iocb iocb;
- iocb.aio_lio_opcode = IOCB_CMD_PREAD;
- iocb.aio_flags = IOCB_FLAG_RESFD;
- iocb.aio_resfd = -1;
- (void) iocb;
- (void) eventfd(0, 0)"
+ ngx_feature_libs="-luring"
+ ngx_feature_test="struct io_uring ring;
+ int ret = io_uring_queue_init(64, &ring, 0);
+ if (ret < 0) return 1;
+ io_uring_queue_exit(&ring);"
. auto/feature
if [ $ngx_found = yes ]; then
have=NGX_HAVE_EVENTFD . auto/have
have=NGX_HAVE_SYS_EVENTFD_H . auto/have
CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
- fi
- fi
-
- if [ $ngx_found = no ]; then
-
- ngx_feature="Linux AIO support (SYS_eventfd)"
- ngx_feature_incs="#include <linux/aio_abi.h>
- #include <sys/syscall.h>"
- ngx_feature_test="struct iocb iocb;
- iocb.aio_lio_opcode = IOCB_CMD_PREAD;
- iocb.aio_flags = IOCB_FLAG_RESFD;
- iocb.aio_resfd = -1;
- (void) iocb;
- (void) SYS_eventfd"
- . auto/feature
-
- if [ $ngx_found = yes ]; then
- have=NGX_HAVE_EVENTFD . auto/have
- CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
+ CORE_LIBS="$CORE_LIBS -luring"
fi
fi
@@ -577,7 +556,7 @@
cat << END
$0: no supported file AIO was found
-Currently file AIO is supported on FreeBSD 4.3+ and Linux 2.6.22+ only
+Currently file AIO is supported on FreeBSD 4.3+ and Linux 5.1.0+ (requires liburing) only
END
exit 1
diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_open_file_cache.c
--- a/src/core/ngx_open_file_cache.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/core/ngx_open_file_cache.c Mon Jan 11 08:07:14 2021 -0500
@@ -869,8 +869,8 @@
if (!of->log) {
/*
- * Use non-blocking open() not to hang on FIFO files, etc.
- * This flag has no effect on a regular files.
+ * Differs from plain read, IORING_OP_READV with O_NONBLOCK
+ * will return -EAGAIN if the operation may block.
*/
fd = ngx_open_file_wrapper(name, of, NGX_FILE_RDONLY|NGX_FILE_NONBLOCK,
diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_output_chain.c
--- a/src/core/ngx_output_chain.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/core/ngx_output_chain.c Mon Jan 11 08:07:14 2021 -0500
@@ -589,6 +589,20 @@
if (ctx->aio_handler) {
n = ngx_file_aio_read(src->file, dst->pos, (size_t) size,
src->file_pos, ctx->pool);
+
+ if (n > 0 && n < size) {
+ ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0,
+ ngx_read_file_n " Try again, read only %z of %O from \"%s\"",
+ n, size, src->file->name.data);
+
+ src->file_pos += n;
+ dst->last += n;
+
+ n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n,
+ src->file_pos, ctx->pool);
+
+ }
+
if (n == NGX_AGAIN) {
ctx->aio_handler(ctx, src->file);
return NGX_AGAIN;
diff -r 82228f955153 -r 3677cf19b98b src/event/modules/ngx_epoll_module.c
--- a/src/event/modules/ngx_epoll_module.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/event/modules/ngx_epoll_module.c Mon Jan 11 08:07:14 2021 -0500
@@ -9,6 +9,10 @@
#include <ngx_core.h>
#include <ngx_event.h>
+#if (NGX_HAVE_FILE_AIO)
+#include <liburing.h>
+#endif
+
#if (NGX_TEST_BUILD_EPOLL)
@@ -75,23 +79,6 @@
#define SYS_eventfd 323
#endif
-#if (NGX_HAVE_FILE_AIO)
-
-#define SYS_io_setup 245
-#define SYS_io_destroy 246
-#define SYS_io_getevents 247
-
-typedef u_int aio_context_t;
-
-struct io_event {
- uint64_t data; /* the data field from the iocb */
- uint64_t obj; /* what iocb this event came from */
- int64_t res; /* result code for this event */
- int64_t res2; /* secondary result */
-};
-
-
-#endif
#endif /* NGX_TEST_BUILD_EPOLL */
@@ -124,7 +111,7 @@
ngx_uint_t flags);
#if (NGX_HAVE_FILE_AIO)
-static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
+static void ngx_epoll_io_uring_handler(ngx_event_t *ev);
#endif
static void *ngx_epoll_create_conf(ngx_cycle_t *cycle);
@@ -141,13 +128,11 @@
#endif
#if (NGX_HAVE_FILE_AIO)
-
-int ngx_eventfd = -1;
-aio_context_t ngx_aio_ctx = 0;
+struct io_uring ngx_ring;
+struct io_uring_params ngx_ring_params;
-static ngx_event_t ngx_eventfd_event;
-static ngx_connection_t ngx_eventfd_conn;
-
+static ngx_event_t ngx_ring_event;
+static ngx_connection_t ngx_ring_conn;
#endif
#if (NGX_HAVE_EPOLLRDHUP)
@@ -217,102 +202,40 @@
#if (NGX_HAVE_FILE_AIO)
-/*
- * We call io_setup(), io_destroy() io_submit(), and io_getevents() directly
- * as syscalls instead of libaio usage, because the library header file
- * supports eventfd() since 0.3.107 version only.
- */
-
-static int
-io_setup(u_int nr_reqs, aio_context_t *ctx)
-{
- return syscall(SYS_io_setup, nr_reqs, ctx);
-}
-
-
-static int
-io_destroy(aio_context_t ctx)
-{
- return syscall(SYS_io_destroy, ctx);
-}
-
-
-static int
-io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events,
- struct timespec *tmo)
-{
- return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo);
-}
-
-
static void
ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf)
{
- int n;
struct epoll_event ee;
-#if (NGX_HAVE_SYS_EVENTFD_H)
- ngx_eventfd = eventfd(0, 0);
-#else
- ngx_eventfd = syscall(SYS_eventfd, 0);
-#endif
-
- if (ngx_eventfd == -1) {
+ if (io_uring_queue_init_params(32763, &ngx_ring, &ngx_ring_params) < 0) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
- "eventfd() failed");
- ngx_file_aio = 0;
- return;
- }
-
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
- "eventfd: %d", ngx_eventfd);
-
- n = 1;
-
- if (ioctl(ngx_eventfd, FIONBIO, &n) == -1) {
- ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
- "ioctl(eventfd, FIONBIO) failed");
+ "io_uring_queue_init_params() failed");
goto failed;
}
- if (io_setup(epcf->aio_requests, &ngx_aio_ctx) == -1) {
- ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
- "io_setup() failed");
- goto failed;
- }
-
- ngx_eventfd_event.data = &ngx_eventfd_conn;
- ngx_eventfd_event.handler = ngx_epoll_eventfd_handler;
- ngx_eventfd_event.log = cycle->log;
- ngx_eventfd_event.active = 1;
- ngx_eventfd_conn.fd = ngx_eventfd;
- ngx_eventfd_conn.read = &ngx_eventfd_event;
- ngx_eventfd_conn.log = cycle->log;
+ ngx_ring_event.data = &ngx_ring_conn;
+ ngx_ring_event.handler = ngx_epoll_io_uring_handler;
+ ngx_ring_event.log = cycle->log;
+ ngx_ring_event.active = 1;
+ ngx_ring_conn.fd = ngx_ring.ring_fd;
+ ngx_ring_conn.read = &ngx_ring_event;
+ ngx_ring_conn.log = cycle->log;
ee.events = EPOLLIN|EPOLLET;
- ee.data.ptr = &ngx_eventfd_conn;
+ ee.data.ptr = &ngx_ring_conn;
- if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_eventfd, &ee) != -1) {
+ if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) {
return;
}
ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
"epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");
- if (io_destroy(ngx_aio_ctx) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "io_destroy() failed");
- }
+ io_uring_queue_exit(&ngx_ring);
failed:
- if (close(ngx_eventfd) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "eventfd close() failed");
- }
-
- ngx_eventfd = -1;
- ngx_aio_ctx = 0;
+ ngx_ring.ring_fd = 0;
ngx_file_aio = 0;
}
@@ -549,23 +472,11 @@
#if (NGX_HAVE_FILE_AIO)
- if (ngx_eventfd != -1) {
-
- if (io_destroy(ngx_aio_ctx) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "io_destroy() failed");
- }
-
- if (close(ngx_eventfd) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "eventfd close() failed");
- }
-
- ngx_eventfd = -1;
+ if (ngx_ring.ring_fd != 0) {
+ io_uring_queue_exit(&ngx_ring);
+ ngx_ring.ring_fd = 0;
}
- ngx_aio_ctx = 0;
-
#endif
ngx_free(event_list);
@@ -939,84 +850,36 @@
#if (NGX_HAVE_FILE_AIO)
static void
-ngx_epoll_eventfd_handler(ngx_event_t *ev)
+ngx_epoll_io_uring_handler(ngx_event_t *ev)
{
- int n, events;
- long i;
- uint64_t ready;
- ngx_err_t err;
ngx_event_t *e;
+ struct io_uring_cqe *cqe;
+ unsigned head;
+ unsigned cqe_count = 0;
ngx_event_aio_t *aio;
- struct io_event event[64];
- struct timespec ts;
- ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd handler");
-
- n = read(ngx_eventfd, &ready, 8);
+ ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+ "io_uring_peek_cqe: START");
- err = ngx_errno;
-
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd: %d", n);
+ io_uring_for_each_cqe(&ngx_ring, head, cqe) {
+ ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+ "io_event: %p %d %d",
+ cqe->user_data, cqe->res, cqe->flags);
- if (n != 8) {
- if (n == -1) {
- if (err == NGX_EAGAIN) {
- return;
- }
+ e = (ngx_event_t *) io_uring_cqe_get_data(cqe);
+ e->complete = 1;
+ e->active = 0;
+ e->ready = 1;
- ngx_log_error(NGX_LOG_ALERT, ev->log, err, "read(eventfd) failed");
- return;
- }
+ aio = e->data;
+ aio->res = cqe->res;
- ngx_log_error(NGX_LOG_ALERT, ev->log, 0,
- "read(eventfd) returned only %d bytes", n);
- return;
+ ++cqe_count;
+
+ ngx_post_event(e, &ngx_posted_events);
}
- ts.tv_sec = 0;
- ts.tv_nsec = 0;
-
- while (ready) {
-
- events = io_getevents(ngx_aio_ctx, 1, 64, event, &ts);
-
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
- "io_getevents: %d", events);
-
- if (events > 0) {
- ready -= events;
-
- for (i = 0; i < events; i++) {
-
- ngx_log_debug4(NGX_LOG_DEBUG_EVENT, ev->log, 0,
- "io_event: %XL %XL %L %L",
- event[i].data, event[i].obj,
- event[i].res, event[i].res2);
-
- e = (ngx_event_t *) (uintptr_t) event[i].data;
-
- e->complete = 1;
- e->active = 0;
- e->ready = 1;
-
- aio = e->data;
- aio->res = event[i].res;
-
- ngx_post_event(e, &ngx_posted_events);
- }
-
- continue;
- }
-
- if (events == 0) {
- return;
- }
-
- /* events == -1 */
- ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno,
- "io_getevents() failed");
- return;
- }
+ io_uring_cq_advance(&ngx_ring, cqe_count);
}
#endif
diff -r 82228f955153 -r 3677cf19b98b src/event/ngx_event.h
--- a/src/event/ngx_event.h Tue Dec 15 17:41:39 2020 +0300
+++ b/src/event/ngx_event.h Mon Jan 11 08:07:14 2021 -0500
@@ -160,7 +160,9 @@
size_t nbytes;
#endif
- ngx_aiocb_t aiocb;
+ /* Make sure that this iov has the same lifecycle with its associated aio event */
+ struct iovec iov;
+
ngx_event_t event;
};
diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_aio_read.c
--- a/src/os/unix/ngx_linux_aio_read.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/os/unix/ngx_linux_aio_read.c Mon Jan 11 08:07:14 2021 -0500
@@ -9,20 +9,16 @@
#include <ngx_core.h>
#include <ngx_event.h>
+#include <liburing.h>
-extern int ngx_eventfd;
-extern aio_context_t ngx_aio_ctx;
+
+extern struct io_uring ngx_ring;
+extern struct io_uring_params ngx_ring_params;
static void ngx_file_aio_event_handler(ngx_event_t *ev);
-static int
-io_submit(aio_context_t ctx, long n, struct iocb **paiocb)
-{
- return syscall(SYS_io_submit, ctx, n, paiocb);
-}
-
ngx_int_t
ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool)
@@ -50,10 +46,10 @@
ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
ngx_pool_t *pool)
{
- ngx_err_t err;
- struct iocb *piocb[1];
- ngx_event_t *ev;
- ngx_event_aio_t *aio;
+ ngx_err_t err;
+ ngx_event_t *ev;
+ ngx_event_aio_t *aio;
+ struct io_uring_sqe *sqe;
if (!ngx_file_aio) {
return ngx_read_file(file, buf, size, offset);
@@ -93,22 +89,41 @@
return NGX_ERROR;
}
- ngx_memzero(&aio->aiocb, sizeof(struct iocb));
+ sqe = io_uring_get_sqe(&ngx_ring);
+
+ if (!sqe) {
+ ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+ "aio no sqe left:%d @%O:%uz %V",
+ ev->complete, offset, size, &file->name);
+ return ngx_read_file(file, buf, size, offset);
+ }
- aio->aiocb.aio_data = (uint64_t) (uintptr_t) ev;
- aio->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
- aio->aiocb.aio_fildes = file->fd;
- aio->aiocb.aio_buf = (uint64_t) (uintptr_t) buf;
- aio->aiocb.aio_nbytes = size;
- aio->aiocb.aio_offset = offset;
- aio->aiocb.aio_flags = IOCB_FLAG_RESFD;
- aio->aiocb.aio_resfd = ngx_eventfd;
+ if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) {
+ /*
+ * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel
+ * doesn't need to import iovecs in advance.
+ *
+ * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support
+ * non-vectored read/write commands too.
+ *
+ * It's not perfect, but avoids an extra feature-test syscall.
+ */
+ io_uring_prep_read(sqe, file->fd, buf, size, offset);
+ } else {
+ /*
+ * We must store iov into heap to prevent kernel from returning -EFAULT
+ * in case `IORING_FEAT_SUBMIT_STABLE` is not supported
+ */
+ aio->iov.iov_base = buf;
+ aio->iov.iov_len = size;
+ io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset);
+ }
+ io_uring_sqe_set_data(sqe, ev);
+
ev->handler = ngx_file_aio_event_handler;
- piocb[0] = &aio->aiocb;
-
- if (io_submit(ngx_aio_ctx, 1, piocb) == 1) {
+ if (io_uring_submit(&ngx_ring) == 1) {
ev->active = 1;
ev->ready = 0;
ev->complete = 0;
diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h Tue Dec 15 17:41:39 2020 +0300
+++ b/src/os/unix/ngx_linux_config.h Mon Jan 11 08:07:14 2021 -0500
@@ -93,10 +93,6 @@
#include <sys/eventfd.h>
#endif
#include <sys/syscall.h>
-#if (NGX_HAVE_FILE_AIO)
-#include <linux/aio_abi.h>
-typedef struct iocb ngx_aiocb_t;
-#endif
#if (NGX_HAVE_CAPABILITIES)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20210112/7039b288/attachment-0001.htm>
More information about the nginx-devel
mailing list