[PATCH] Add io_uring support in AIO(async io) module

Zhao, Ping ping.zhao at intel.com
Thu Jan 14 05:53:17 UTC 2021


# HG changeset patch
# User Ping Zhao <ping.zhao at intel.com>
# Date 1610554205 18000
#      Wed Jan 13 11:10:05 2021 -0500
# Node ID 95886c3353dc80a3da215027c1e0f2141e47e911
# Parent  b055bb6ef87e49232a7fcb4e5334b8efda3b6499
Add io_uring support in AIO(async io) module.

Hello, This is a patch to support io_uring in AIO(async io) module.
Basically you don't need change your configurations. If you're using new kernel(above v5.1) which supports io_uring, and you have   "aio on" in your configuration. Nginx will use io_uring for FILE_AIO access which can achieve performance improvement than legacy   libaio.

Checked with iostat which shows nvme disk io has 30%+ performance improvement with 1 thread.
Use wrk with 100 threads 200 connections(-t 100 -c 200) with 25000 random requests.

                  iostat(B/s)
libaio        ~1.0 GB/s
io_uring   1.3+ GB/s

diff -r b055bb6ef87e -r 95886c3353dc auto/unix
--- a/auto/unix	Mon Jan 11 22:06:27 2021 +0300
+++ b/auto/unix	Wed Jan 13 11:10:05 2021 -0500
@@ -531,6 +531,30 @@
     fi
 
     if [ $ngx_found = no ]; then
+        ngx_feature="Linux AIO support(IO_URING)"
+        ngx_feature_name="NGX_HAVE_FILE_AIO"
+        ngx_feature_incs="#include <liburing.h>"
+        ngx_feature_path=
+        ngx_feature_libs="-luring"
+        ngx_feature_test="struct io_uring ring;
+                        struct io_uring_params params;
+                        int ret;
+                        memset(&params, 0, sizeof(params));
+                        ret = io_uring_queue_init_params(64, &ring, &params);
+                        if (ret < 0) return 1;
+                        if (!(params.features & IORING_FEAT_FAST_POLL)) return 1;
+                        io_uring_queue_exit(&ring)"
+        . auto/feature
+
+        if [ $ngx_found = yes ]; then
+            have=NGX_HAVE_EVENTFD . auto/have
+            have=NGX_HAVE_FILE_IOURING . auto/have
+            CORE_LIBS="$CORE_LIBS -luring"
+            CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
+        fi
+    fi
+
+    if [ $ngx_found = no ]; then
 
         ngx_feature="Linux AIO support"
         ngx_feature_name="NGX_HAVE_FILE_AIO"
diff -r b055bb6ef87e -r 95886c3353dc src/core/ngx_output_chain.c
--- a/src/core/ngx_output_chain.c	Mon Jan 11 22:06:27 2021 +0300
+++ b/src/core/ngx_output_chain.c	Wed Jan 13 11:10:05 2021 -0500
@@ -589,6 +589,20 @@
         if (ctx->aio_handler) {
             n = ngx_file_aio_read(src->file, dst->pos, (size_t) size,
                                   src->file_pos, ctx->pool);
+#if (NGX_HAVE_FILE_IOURING)
+           if (n > 0 && n < size) {
+                ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0,
+                      ngx_read_file_n " Try again, only read %z of %O from \"%s\"",
+                      n, size, src->file->name.data);
+
+                src->file_pos += n;
+                dst->last += n;
+
+                n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n,
+                                  src->file_pos, ctx->pool);
+
+            }
+#endif
             if (n == NGX_AGAIN) {
                 ctx->aio_handler(ctx, src->file);
                 return NGX_AGAIN;
diff -r b055bb6ef87e -r 95886c3353dc src/event/modules/ngx_epoll_module.c
--- a/src/event/modules/ngx_epoll_module.c	Mon Jan 11 22:06:27 2021 +0300
+++ b/src/event/modules/ngx_epoll_module.c	Wed Jan 13 11:10:05 2021 -0500
@@ -9,6 +9,9 @@
 #include <ngx_core.h>
 #include <ngx_event.h>
 
+#if (NGX_HAVE_FILE_IOURING)
+#include <liburing.h>
+#endif
 
 #if (NGX_TEST_BUILD_EPOLL)
 
@@ -77,6 +80,9 @@
 
 #if (NGX_HAVE_FILE_AIO)
 
+#if (NGX_HAVE_FILE_IOURING)
+#else
+
 #define SYS_io_setup      245
 #define SYS_io_destroy    246
 #define SYS_io_getevents  247
@@ -89,9 +95,9 @@
     int64_t   res;   /* result code for this event */
     int64_t   res2;  /* secondary result */
 };
-
+#endif  /* NGX_HAVE_FILE_IOURING */
+#endif  /* NGX_HAVE_FILE_AIO */
 
-#endif
 #endif /* NGX_TEST_BUILD_EPOLL */
 
 
@@ -124,8 +130,25 @@
     ngx_uint_t flags);
 
 #if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+static void ngx_epoll_io_uring_handler(ngx_event_t *ev);
+
+struct io_uring             ngx_ring;
+struct io_uring_params      ngx_ring_params;
+
+static ngx_event_t          ngx_ring_event;
+static ngx_connection_t     ngx_ring_conn;
+
+#else
 static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
-#endif
+
+int                         ngx_eventfd = -1;
+aio_context_t               ngx_aio_ctx = 0;
+
+static ngx_event_t          ngx_eventfd_event;
+static ngx_connection_t     ngx_eventfd_conn;
+#endif  /* NGX_HAVE_FILE_IOURING */
+#endif  /* NGX_HAVE_FILE_AIO */
 
 static void *ngx_epoll_create_conf(ngx_cycle_t *cycle);
 static char *ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf);
@@ -140,16 +163,6 @@
 static ngx_connection_t     notify_conn;
 #endif
 
-#if (NGX_HAVE_FILE_AIO)
-
-int                         ngx_eventfd = -1;
-aio_context_t               ngx_aio_ctx = 0;
-
-static ngx_event_t          ngx_eventfd_event;
-static ngx_connection_t     ngx_eventfd_conn;
-
-#endif
-
 #if (NGX_HAVE_EPOLLRDHUP)
 ngx_uint_t                  ngx_use_epoll_rdhup;
 #endif
@@ -217,6 +230,47 @@
 
 #if (NGX_HAVE_FILE_AIO)
 
+#if (NGX_HAVE_FILE_IOURING)
+
+static void
+ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf)
+{
+    struct epoll_event  ee;
+
+    if (io_uring_queue_init_params(32763, &ngx_ring, &ngx_ring_params) < 0) {
+        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
+                      "io_uring_queue_init_params() failed");
+        goto failed;
+    }
+
+    ngx_ring_event.data = &ngx_ring_conn;
+    ngx_ring_event.handler = ngx_epoll_io_uring_handler;
+    ngx_ring_event.log = cycle->log;
+    ngx_ring_event.active = 1;
+    ngx_ring_conn.fd = ngx_ring.ring_fd;
+    ngx_ring_conn.read = &ngx_ring_event;
+    ngx_ring_conn.log = cycle->log;
+
+    ee.events = EPOLLIN|EPOLLET;
+    ee.data.ptr = &ngx_ring_conn;
+
+    if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) {
+        return;
+    }
+
+    ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
+                  "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");
+
+    io_uring_queue_exit(&ngx_ring);
+
+failed:
+
+    ngx_ring.ring_fd = 0;
+    ngx_file_aio = 0;
+}
+
+#else
+
 /*
  * We call io_setup(), io_destroy() io_submit(), and io_getevents() directly
  * as syscalls instead of libaio usage, because the library header file
@@ -316,8 +370,8 @@
     ngx_file_aio = 0;
 }
 
-#endif
-
+#endif  /*NGX_HAVE_FILE_IOURING*/
+#endif  /*NGX_HAVE_FILE_AIO*/
 
 static ngx_int_t
 ngx_epoll_init(ngx_cycle_t *cycle, ngx_msec_t timer)
@@ -548,6 +602,13 @@
 #endif
 
 #if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+    if (ngx_ring.ring_fd != 0) {
+        io_uring_queue_exit(&ngx_ring);
+        ngx_ring.ring_fd = 0;
+    }
+
+#else
 
     if (ngx_eventfd != -1) {
 
@@ -566,7 +627,8 @@
 
     ngx_aio_ctx = 0;
 
-#endif
+#endif  /*NGX_HAVE_FILE_IOURING*/
+#endif  /*NGX_HAVE_FILE_AIO*/
 
     ngx_free(event_list);
 
@@ -935,8 +997,42 @@
     return NGX_OK;
 }
 
+#if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+static void
+ngx_epoll_io_uring_handler(ngx_event_t *ev)
+{
+    ngx_event_t      *e;
+    struct io_uring_cqe  *cqe;
+    unsigned head;
+    unsigned cqe_count = 0;
+    ngx_event_aio_t  *aio;
 
-#if (NGX_HAVE_FILE_AIO)
+    ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+                   "io_uring_peek_cqe: START");
+
+    io_uring_for_each_cqe(&ngx_ring, head, cqe) {
+        ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+                       "io_event: %p %d %d",
+                       cqe->user_data, cqe->res, cqe->flags);
+
+        e = (ngx_event_t *) io_uring_cqe_get_data(cqe);
+        e->complete = 1;
+        e->active = 0;
+        e->ready = 1;
+
+        aio = e->data;
+        aio->res = cqe->res;
+
+        ++cqe_count;
+
+        ngx_post_event(e, &ngx_posted_events);
+    }
+
+    io_uring_cq_advance(&ngx_ring, cqe_count);
+}
+
+#else
 
 static void
 ngx_epoll_eventfd_handler(ngx_event_t *ev)
@@ -1019,8 +1115,8 @@
     }
 }
 
-#endif
-
+#endif  /*NGX_HAVE_FILE_IOURING*/
+#endif  /*NGX_HAVE_FILE_AIO*/
 
 static void *
 ngx_epoll_create_conf(ngx_cycle_t *cycle)
diff -r b055bb6ef87e -r 95886c3353dc src/event/ngx_event.h
--- a/src/event/ngx_event.h	Mon Jan 11 22:06:27 2021 +0300
+++ b/src/event/ngx_event.h	Wed Jan 13 11:10:05 2021 -0500
@@ -160,7 +160,11 @@
     size_t                     nbytes;
 #endif
 
+#if (NGX_HAVE_FILE_IOURING)
+    struct iovec               iov;
+#else
     ngx_aiocb_t                aiocb;
+#endif
     ngx_event_t                event;
 };
 
diff -r b055bb6ef87e -r 95886c3353dc src/os/unix/ngx_linux_aio_read.c
--- a/src/os/unix/ngx_linux_aio_read.c	Mon Jan 11 22:06:27 2021 +0300
+++ b/src/os/unix/ngx_linux_aio_read.c	Wed Jan 13 11:10:05 2021 -0500
@@ -9,20 +9,24 @@
 #include <ngx_core.h>
 #include <ngx_event.h>
 
+#if (NGX_HAVE_FILE_IOURING)
+#include <liburing.h>
 
+extern struct io_uring          ngx_ring;
+extern struct io_uring_params   ngx_ring_params;
+
+#else
 extern int            ngx_eventfd;
 extern aio_context_t  ngx_aio_ctx;
 
-
-static void ngx_file_aio_event_handler(ngx_event_t *ev);
-
-
 static int
 io_submit(aio_context_t ctx, long n, struct iocb **paiocb)
 {
     return syscall(SYS_io_submit, ctx, n, paiocb);
 }
+#endif
 
+static void ngx_file_aio_event_handler(ngx_event_t *ev);
 
 ngx_int_t
 ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool)
@@ -45,7 +49,114 @@
     return NGX_OK;
 }
 
+#if (NGX_HAVE_FILE_IOURING)
+ssize_t
+ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
+    ngx_pool_t *pool)
+{
+    ngx_err_t             err;
+    ngx_event_t          *ev;
+    ngx_event_aio_t      *aio;
+    struct io_uring_sqe  *sqe;
 
+    if (!ngx_file_aio) {
+        return ngx_read_file(file, buf, size, offset);
+    }
+
+    if (file->aio == NULL && ngx_file_aio_init(file, pool) != NGX_OK) {
+        return NGX_ERROR;
+    }
+
+    aio = file->aio;
+    ev = &aio->event;
+
+    if (!ev->ready) {
+        ngx_log_error(NGX_LOG_ALERT, file->log, 0,
+                      "second aio post for \"%V\"", &file->name);
+        return NGX_AGAIN;
+    }
+
+    ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+                   "aio complete:%d @%O:%uz %V",
+                   ev->complete, offset, size, &file->name);
+
+    if (ev->complete) {
+        ev->active = 0;
+        ev->complete = 0;
+
+        if (aio->res >= 0) {
+            ngx_set_errno(0);
+            return aio->res;
+        }
+
+        ngx_set_errno(-aio->res);
+
+        ngx_log_error(NGX_LOG_CRIT, file->log, ngx_errno,
+                      "aio read \"%s\" failed", file->name.data);
+
+        return NGX_ERROR;
+    }
+
+    sqe = io_uring_get_sqe(&ngx_ring);
+
+    if (!sqe) {
+        ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+                       "aio no sqe left:%d @%O:%uz %V",
+                       ev->complete, offset, size, &file->name);
+        return ngx_read_file(file, buf, size, offset);
+    }
+
+    if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) {
+        /*
+         * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel
+         * doesn't need to import iovecs in advance.
+         *
+         * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support
+         * non-vectored read/write commands too.
+         *
+         * It's not perfect, but avoids an extra feature-test syscall.
+         */
+        io_uring_prep_read(sqe, file->fd, buf, size, offset);
+    } else {
+        /*
+         * We must store iov into heap to prevent kernel from returning -EFAULT
+         * in case `IORING_FEAT_SUBMIT_STABLE` is not supported
+         */
+        aio->iov.iov_base = buf;
+        aio->iov.iov_len = size;
+        io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset);
+    }
+    io_uring_sqe_set_data(sqe, ev);
+
+
+    ev->handler = ngx_file_aio_event_handler;
+
+    if (io_uring_submit(&ngx_ring) == 1) {
+        ev->active = 1;
+        ev->ready = 0;
+        ev->complete = 0;
+
+        return NGX_AGAIN;
+    }
+
+    err = ngx_errno;
+
+    if (err == NGX_EAGAIN) {
+        return ngx_read_file(file, buf, size, offset);
+    }
+
+    ngx_log_error(NGX_LOG_CRIT, file->log, err,
+                  "io_submit(\"%V\") failed", &file->name);
+
+    if (err == NGX_ENOSYS) {
+        ngx_file_aio = 0;
+        return ngx_read_file(file, buf, size, offset);
+    }
+
+    return NGX_ERROR;
+}
+
+#else
 ssize_t
 ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
     ngx_pool_t *pool)
@@ -132,7 +243,7 @@
 
     return NGX_ERROR;
 }
-
+#endif
 
 static void
 ngx_file_aio_event_handler(ngx_event_t *ev)
diff -r b055bb6ef87e -r 95886c3353dc src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h	Mon Jan 11 22:06:27 2021 +0300
+++ b/src/os/unix/ngx_linux_config.h	Wed Jan 13 11:10:05 2021 -0500
@@ -93,11 +93,15 @@
 #include <sys/eventfd.h>
 #endif
 #include <sys/syscall.h>
+
 #if (NGX_HAVE_FILE_AIO)
+#if (NGX_HAVE_FILE_IOURING)
+
+#else
 #include <linux/aio_abi.h>
 typedef struct iocb  ngx_aiocb_t;
 #endif
-
+#endif
 
 #if (NGX_HAVE_CAPABILITIES)
 #include <linux/capability.h>


More information about the nginx-devel mailing list