[PATCH] Use io_uring for async io access

Zhao, Ping ping.zhao at intel.com
Mon Jan 11 07:05:28 UTC 2021


Hello Nginx Developers,

This is a patch of Nginx io_uring for async io access. Would like to receive your comments.

Thanks,
Ping

# HG changeset patch
# User Ping Zhao <ping.zhao at intel.com<mailto:ping.zhao at intel.com>>
# Date 1610370434 18000
#      Mon Jan 11 08:07:14 2021 -0500
# Node ID 3677cf19b98b054614030b80f73728b02fdda832
# Parent  82228f955153527fba12211f52bf102c90f38dfb
Use io_uring for async io access.

Replace aio with io_uring in async disk io access.

Io_uring is a new kernel feature to async io access. Nginx can use it for legacy disk aio access(for example, disk cache file access)

Check with iostat that shows nvme disk io has 30%+ performance improvement with 1 thread.
Test with wrk with 100 threads 200 connections(-t 100 -c 1000) with 25000 random requests.

                             iostat(B/s)
libaio                   1.0  GB/s
io_uring              1.3+ GB/s

Patch contributor: Carter Li, Ping Zhao

diff -r 82228f955153 -r 3677cf19b98b auto/unix
--- a/auto/unix  Tue Dec 15 17:41:39 2020 +0300
+++ b/auto/unix              Mon Jan 11 08:07:14 2021 -0500
@@ -532,44 +532,23 @@

     if [ $ngx_found = no ]; then

-        ngx_feature="Linux AIO support"
+        ngx_feature="Linux io_uring support (liburing)"
         ngx_feature_name="NGX_HAVE_FILE_AIO"
         ngx_feature_run=no
-        ngx_feature_incs="#include <linux/aio_abi.h>
-                          #include <sys/eventfd.h>"
+        ngx_feature_incs="#include <liburing.h>"
         ngx_feature_path=
-        ngx_feature_libs=
-        ngx_feature_test="struct iocb  iocb;
-                          iocb.aio_lio_opcode = IOCB_CMD_PREAD;
-                          iocb.aio_flags = IOCB_FLAG_RESFD;
-                          iocb.aio_resfd = -1;
-                          (void) iocb;
-                          (void) eventfd(0, 0)"
+        ngx_feature_libs="-luring"
+        ngx_feature_test="struct io_uring  ring;
+                          int ret = io_uring_queue_init(64, &ring, 0);
+                          if (ret < 0) return 1;
+                          io_uring_queue_exit(&ring);"
         . auto/feature

         if [ $ngx_found = yes ]; then
             have=NGX_HAVE_EVENTFD . auto/have
             have=NGX_HAVE_SYS_EVENTFD_H . auto/have
             CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
-        fi
-    fi
-
-    if [ $ngx_found = no ]; then
-
-        ngx_feature="Linux AIO support (SYS_eventfd)"
-        ngx_feature_incs="#include <linux/aio_abi.h>
-                          #include <sys/syscall.h>"
-        ngx_feature_test="struct iocb  iocb;
-                          iocb.aio_lio_opcode = IOCB_CMD_PREAD;
-                          iocb.aio_flags = IOCB_FLAG_RESFD;
-                          iocb.aio_resfd = -1;
-                          (void) iocb;
-                          (void) SYS_eventfd"
-        . auto/feature
-
-        if [ $ngx_found = yes ]; then
-            have=NGX_HAVE_EVENTFD . auto/have
-            CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
+            CORE_LIBS="$CORE_LIBS -luring"
         fi
     fi

@@ -577,7 +556,7 @@
         cat << END

 $0: no supported file AIO was found
-Currently file AIO is supported on FreeBSD 4.3+ and Linux 2.6.22+ only
+Currently file AIO is supported on FreeBSD 4.3+ and Linux 5.1.0+ (requires liburing) only

 END
         exit 1
diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_open_file_cache.c
--- a/src/core/ngx_open_file_cache.c     Tue Dec 15 17:41:39 2020 +0300
+++ b/src/core/ngx_open_file_cache.c  Mon Jan 11 08:07:14 2021 -0500
@@ -869,8 +869,8 @@
     if (!of->log) {

         /*
-         * Use non-blocking open() not to hang on FIFO files, etc.
-         * This flag has no effect on a regular files.
+         * Differs from plain read, IORING_OP_READV with O_NONBLOCK
+         * will return -EAGAIN if the operation may block.
          */

         fd = ngx_open_file_wrapper(name, of, NGX_FILE_RDONLY|NGX_FILE_NONBLOCK,
diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_output_chain.c
--- a/src/core/ngx_output_chain.c           Tue Dec 15 17:41:39 2020 +0300
+++ b/src/core/ngx_output_chain.c        Mon Jan 11 08:07:14 2021 -0500
@@ -589,6 +589,20 @@
         if (ctx->aio_handler) {
             n = ngx_file_aio_read(src->file, dst->pos, (size_t) size,
                                   src->file_pos, ctx->pool);
+
+                if (n > 0 && n < size) {
+             ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0,
+                      ngx_read_file_n " Try again, read only %z of %O from \"%s\"",
+                      n, size, src->file->name.data);
+
+                           src->file_pos += n;
+                 dst->last += n;
+
+                            n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n,
+                                  src->file_pos, ctx->pool);
+
+            }
+
             if (n == NGX_AGAIN) {
                 ctx->aio_handler(ctx, src->file);
                 return NGX_AGAIN;
diff -r 82228f955153 -r 3677cf19b98b src/event/modules/ngx_epoll_module.c
--- a/src/event/modules/ngx_epoll_module.c     Tue Dec 15 17:41:39 2020 +0300
+++ b/src/event/modules/ngx_epoll_module.c  Mon Jan 11 08:07:14 2021 -0500
@@ -9,6 +9,10 @@
#include <ngx_core.h>
#include <ngx_event.h>

+#if (NGX_HAVE_FILE_AIO)
+#include <liburing.h>
+#endif
+

 #if (NGX_TEST_BUILD_EPOLL)

@@ -75,23 +79,6 @@
#define SYS_eventfd       323
#endif

-#if (NGX_HAVE_FILE_AIO)
-
-#define SYS_io_setup      245
-#define SYS_io_destroy    246
-#define SYS_io_getevents  247
-
-typedef u_int  aio_context_t;
-
-struct io_event {
-    uint64_t  data;  /* the data field from the iocb */
-    uint64_t  obj;   /* what iocb this event came from */
-    int64_t   res;   /* result code for this event */
-    int64_t   res2;  /* secondary result */
-};
-
-
-#endif
#endif /* NGX_TEST_BUILD_EPOLL */


@@ -124,7 +111,7 @@
     ngx_uint_t flags);

 #if (NGX_HAVE_FILE_AIO)
-static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
+static void ngx_epoll_io_uring_handler(ngx_event_t *ev);
#endif

 static void *ngx_epoll_create_conf(ngx_cycle_t *cycle);
@@ -141,13 +128,11 @@
#endif

 #if (NGX_HAVE_FILE_AIO)
-
-int                         ngx_eventfd = -1;
-aio_context_t               ngx_aio_ctx = 0;
+struct io_uring             ngx_ring;
+struct io_uring_params      ngx_ring_params;

-static ngx_event_t          ngx_eventfd_event;
-static ngx_connection_t     ngx_eventfd_conn;
-
+static ngx_event_t          ngx_ring_event;
+static ngx_connection_t     ngx_ring_conn;
#endif

 #if (NGX_HAVE_EPOLLRDHUP)
@@ -217,102 +202,40 @@

 #if (NGX_HAVE_FILE_AIO)

-/*
- * We call io_setup(), io_destroy() io_submit(), and io_getevents() directly
- * as syscalls instead of libaio usage, because the library header file
- * supports eventfd() since 0.3.107 version only.
- */
-
-static int
-io_setup(u_int nr_reqs, aio_context_t *ctx)
-{
-    return syscall(SYS_io_setup, nr_reqs, ctx);
-}
-
-
-static int
-io_destroy(aio_context_t ctx)
-{
-    return syscall(SYS_io_destroy, ctx);
-}
-
-
-static int
-io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events,
-    struct timespec *tmo)
-{
-    return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo);
-}
-
-
static void
ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf)
{
-    int                 n;
     struct epoll_event  ee;

-#if (NGX_HAVE_SYS_EVENTFD_H)
-    ngx_eventfd = eventfd(0, 0);
-#else
-    ngx_eventfd = syscall(SYS_eventfd, 0);
-#endif
-
-    if (ngx_eventfd == -1) {
+    if (io_uring_queue_init_params(32763, &ngx_ring, &ngx_ring_params) < 0) {
         ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
-                      "eventfd() failed");
-        ngx_file_aio = 0;
-        return;
-    }
-
-    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
-                   "eventfd: %d", ngx_eventfd);
-
-    n = 1;
-
-    if (ioctl(ngx_eventfd, FIONBIO, &n) == -1) {
-        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
-                      "ioctl(eventfd, FIONBIO) failed");
+                      "io_uring_queue_init_params() failed");
         goto failed;
     }

-    if (io_setup(epcf->aio_requests, &ngx_aio_ctx) == -1) {
-        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
-                      "io_setup() failed");
-        goto failed;
-    }
-
-    ngx_eventfd_event.data = &ngx_eventfd_conn;
-    ngx_eventfd_event.handler = ngx_epoll_eventfd_handler;
-    ngx_eventfd_event.log = cycle->log;
-    ngx_eventfd_event.active = 1;
-    ngx_eventfd_conn.fd = ngx_eventfd;
-    ngx_eventfd_conn.read = &ngx_eventfd_event;
-    ngx_eventfd_conn.log = cycle->log;
+    ngx_ring_event.data = &ngx_ring_conn;
+    ngx_ring_event.handler = ngx_epoll_io_uring_handler;
+    ngx_ring_event.log = cycle->log;
+    ngx_ring_event.active = 1;
+    ngx_ring_conn.fd = ngx_ring.ring_fd;
+    ngx_ring_conn.read = &ngx_ring_event;
+    ngx_ring_conn.log = cycle->log;

     ee.events = EPOLLIN|EPOLLET;
-    ee.data.ptr = &ngx_eventfd_conn;
+    ee.data.ptr = &ngx_ring_conn;

-    if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_eventfd, &ee) != -1) {
+    if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) {
         return;
     }

     ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                   "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");

-    if (io_destroy(ngx_aio_ctx) == -1) {
-        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
-                      "io_destroy() failed");
-    }
+    io_uring_queue_exit(&ngx_ring);

 failed:

-    if (close(ngx_eventfd) == -1) {
-        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
-                      "eventfd close() failed");
-    }
-
-    ngx_eventfd = -1;
-    ngx_aio_ctx = 0;
+    ngx_ring.ring_fd = 0;
     ngx_file_aio = 0;
}

@@ -549,23 +472,11 @@

 #if (NGX_HAVE_FILE_AIO)

-    if (ngx_eventfd != -1) {
-
-        if (io_destroy(ngx_aio_ctx) == -1) {
-            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
-                          "io_destroy() failed");
-        }
-
-        if (close(ngx_eventfd) == -1) {
-            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
-                          "eventfd close() failed");
-        }
-
-        ngx_eventfd = -1;
+    if (ngx_ring.ring_fd != 0) {
+        io_uring_queue_exit(&ngx_ring);
+        ngx_ring.ring_fd = 0;
     }

-    ngx_aio_ctx = 0;
-
#endif

     ngx_free(event_list);
@@ -939,84 +850,36 @@
#if (NGX_HAVE_FILE_AIO)

 static void
-ngx_epoll_eventfd_handler(ngx_event_t *ev)
+ngx_epoll_io_uring_handler(ngx_event_t *ev)
{
-    int               n, events;
-    long              i;
-    uint64_t          ready;
-    ngx_err_t         err;
     ngx_event_t      *e;
+    struct io_uring_cqe  *cqe;
+    unsigned head;
+    unsigned cqe_count = 0;
     ngx_event_aio_t  *aio;
-    struct io_event   event[64];
-    struct timespec   ts;

-    ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd handler");
-
-    n = read(ngx_eventfd, &ready, 8);
+    ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+                   "io_uring_peek_cqe: START");

-    err = ngx_errno;
-
-    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd: %d", n);
+    io_uring_for_each_cqe(&ngx_ring, head, cqe) {
+        ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+                       "io_event: %p %d %d",
+                       cqe->user_data, cqe->res, cqe->flags);

-    if (n != 8) {
-        if (n == -1) {
-            if (err == NGX_EAGAIN) {
-                return;
-            }
+        e = (ngx_event_t *) io_uring_cqe_get_data(cqe);
+        e->complete = 1;
+        e->active = 0;
+        e->ready = 1;

-            ngx_log_error(NGX_LOG_ALERT, ev->log, err, "read(eventfd) failed");
-            return;
-        }
+        aio = e->data;
+        aio->res = cqe->res;

-        ngx_log_error(NGX_LOG_ALERT, ev->log, 0,
-                      "read(eventfd) returned only %d bytes", n);
-        return;
+        ++cqe_count;
+
+        ngx_post_event(e, &ngx_posted_events);
     }

-    ts.tv_sec = 0;
-    ts.tv_nsec = 0;
-
-    while (ready) {
-
-        events = io_getevents(ngx_aio_ctx, 1, 64, event, &ts);
-
-        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
-                       "io_getevents: %d", events);
-
-        if (events > 0) {
-            ready -= events;
-
-            for (i = 0; i < events; i++) {
-
-                ngx_log_debug4(NGX_LOG_DEBUG_EVENT, ev->log, 0,
-                               "io_event: %XL %XL %L %L",
-                                event[i].data, event[i].obj,
-                                event[i].res, event[i].res2);
-
-                e = (ngx_event_t *) (uintptr_t) event[i].data;
-
-                e->complete = 1;
-                e->active = 0;
-                e->ready = 1;
-
-                aio = e->data;
-                aio->res = event[i].res;
-
-                ngx_post_event(e, &ngx_posted_events);
-            }
-
-            continue;
-        }
-
-        if (events == 0) {
-            return;
-        }
-
-        /* events == -1 */
-        ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno,
-                      "io_getevents() failed");
-        return;
-    }
+    io_uring_cq_advance(&ngx_ring, cqe_count);
}

 #endif
diff -r 82228f955153 -r 3677cf19b98b src/event/ngx_event.h
--- a/src/event/ngx_event.h       Tue Dec 15 17:41:39 2020 +0300
+++ b/src/event/ngx_event.h     Mon Jan 11 08:07:14 2021 -0500
@@ -160,7 +160,9 @@
     size_t                     nbytes;
#endif

-    ngx_aiocb_t                aiocb;
+    /* Make sure that this iov has the same lifecycle with its associated aio event */
+    struct iovec               iov;
+
     ngx_event_t                event;
};

diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_aio_read.c
--- a/src/os/unix/ngx_linux_aio_read.c   Tue Dec 15 17:41:39 2020 +0300
+++ b/src/os/unix/ngx_linux_aio_read.c Mon Jan 11 08:07:14 2021 -0500
@@ -9,20 +9,16 @@
#include <ngx_core.h>
#include <ngx_event.h>

+#include <liburing.h>

-extern int            ngx_eventfd;
-extern aio_context_t  ngx_aio_ctx;
+
+extern struct io_uring          ngx_ring;
+extern struct io_uring_params   ngx_ring_params;


 static void ngx_file_aio_event_handler(ngx_event_t *ev);


-static int
-io_submit(aio_context_t ctx, long n, struct iocb **paiocb)
-{
-    return syscall(SYS_io_submit, ctx, n, paiocb);
-}
-

 ngx_int_t
ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool)
@@ -50,10 +46,10 @@
ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
     ngx_pool_t *pool)
{
-    ngx_err_t         err;
-    struct iocb      *piocb[1];
-    ngx_event_t      *ev;
-    ngx_event_aio_t  *aio;
+    ngx_err_t             err;
+    ngx_event_t          *ev;
+    ngx_event_aio_t      *aio;
+    struct io_uring_sqe  *sqe;

     if (!ngx_file_aio) {
         return ngx_read_file(file, buf, size, offset);
@@ -93,22 +89,41 @@
         return NGX_ERROR;
     }

-    ngx_memzero(&aio->aiocb, sizeof(struct iocb));
+    sqe = io_uring_get_sqe(&ngx_ring);
+
+    if (!sqe) {
+        ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+                       "aio no sqe left:%d @%O:%uz %V",
+                       ev->complete, offset, size, &file->name);
+        return ngx_read_file(file, buf, size, offset);
+    }

-    aio->aiocb.aio_data = (uint64_t) (uintptr_t) ev;
-    aio->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
-    aio->aiocb.aio_fildes = file->fd;
-    aio->aiocb.aio_buf = (uint64_t) (uintptr_t) buf;
-    aio->aiocb.aio_nbytes = size;
-    aio->aiocb.aio_offset = offset;
-    aio->aiocb.aio_flags = IOCB_FLAG_RESFD;
-    aio->aiocb.aio_resfd = ngx_eventfd;
+    if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) {
+        /*
+         * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel
+         * doesn't need to import iovecs in advance.
+         *
+         * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support
+         * non-vectored read/write commands too.
+         *
+         * It's not perfect, but avoids an extra feature-test syscall.
+         */
+        io_uring_prep_read(sqe, file->fd, buf, size, offset);
+    } else {
+        /*
+         * We must store iov into heap to prevent kernel from returning -EFAULT
+         * in case `IORING_FEAT_SUBMIT_STABLE` is not supported
+         */
+        aio->iov.iov_base = buf;
+        aio->iov.iov_len = size;
+        io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset);
+    }
+    io_uring_sqe_set_data(sqe, ev);
+

     ev->handler = ngx_file_aio_event_handler;

-    piocb[0] = &aio->aiocb;
-
-    if (io_submit(ngx_aio_ctx, 1, piocb) == 1) {
+    if (io_uring_submit(&ngx_ring) == 1) {
         ev->active = 1;
         ev->ready = 0;
         ev->complete = 0;
diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h       Tue Dec 15 17:41:39 2020 +0300
+++ b/src/os/unix/ngx_linux_config.h    Mon Jan 11 08:07:14 2021 -0500
@@ -93,10 +93,6 @@
#include <sys/eventfd.h>
#endif
#include <sys/syscall.h>
-#if (NGX_HAVE_FILE_AIO)
-#include <linux/aio_abi.h>
-typedef struct iocb  ngx_aiocb_t;
-#endif


 #if (NGX_HAVE_CAPABILITIES)

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20210111/1f0d3cef/attachment-0001.htm>


More information about the nginx-devel mailing list