Welcome! Log In Create A New Profile

Advanced

[PATCH] Use io_uring for async io access

Zhao, Ping
January 11, 2021 02:06AM
Hello Nginx Developers,

This is a patch of Nginx io_uring for async io access. Would like to receive your comments.

Thanks,
Ping

# HG changeset patch
# User Ping Zhao <ping.zhao@intel.com<mailto:ping.zhao@intel.com>>
# Date 1610370434 18000
# Mon Jan 11 08:07:14 2021 -0500
# Node ID 3677cf19b98b054614030b80f73728b02fdda832
# Parent 82228f955153527fba12211f52bf102c90f38dfb
Use io_uring for async io access.

Replace aio with io_uring in async disk io access.

Io_uring is a new kernel feature to async io access. Nginx can use it for legacy disk aio access(for example, disk cache file access)

Check with iostat that shows nvme disk io has 30%+ performance improvement with 1 thread.
Test with wrk with 100 threads 200 connections(-t 100 -c 1000) with 25000 random requests.

iostat(B/s)
libaio 1.0 GB/s
io_uring 1.3+ GB/s

Patch contributor: Carter Li, Ping Zhao

diff -r 82228f955153 -r 3677cf19b98b auto/unix
--- a/auto/unix Tue Dec 15 17:41:39 2020 +0300
+++ b/auto/unix Mon Jan 11 08:07:14 2021 -0500
@@ -532,44 +532,23 @@

if [ $ngx_found = no ]; then

- ngx_feature="Linux AIO support"
+ ngx_feature="Linux io_uring support (liburing)"
ngx_feature_name="NGX_HAVE_FILE_AIO"
ngx_feature_run=no
- ngx_feature_incs="#include <linux/aio_abi.h>
- #include <sys/eventfd.h>"
+ ngx_feature_incs="#include <liburing.h>"
ngx_feature_path=
- ngx_feature_libs=
- ngx_feature_test="struct iocb iocb;
- iocb.aio_lio_opcode = IOCB_CMD_PREAD;
- iocb.aio_flags = IOCB_FLAG_RESFD;
- iocb.aio_resfd = -1;
- (void) iocb;
- (void) eventfd(0, 0)"
+ ngx_feature_libs="-luring"
+ ngx_feature_test="struct io_uring ring;
+ int ret = io_uring_queue_init(64, &ring, 0);
+ if (ret < 0) return 1;
+ io_uring_queue_exit(&ring);"
. auto/feature

if [ $ngx_found = yes ]; then
have=NGX_HAVE_EVENTFD . auto/have
have=NGX_HAVE_SYS_EVENTFD_H . auto/have
CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
- fi
- fi
-
- if [ $ngx_found = no ]; then
-
- ngx_feature="Linux AIO support (SYS_eventfd)"
- ngx_feature_incs="#include <linux/aio_abi.h>
- #include <sys/syscall.h>"
- ngx_feature_test="struct iocb iocb;
- iocb.aio_lio_opcode = IOCB_CMD_PREAD;
- iocb.aio_flags = IOCB_FLAG_RESFD;
- iocb.aio_resfd = -1;
- (void) iocb;
- (void) SYS_eventfd"
- . auto/feature
-
- if [ $ngx_found = yes ]; then
- have=NGX_HAVE_EVENTFD . auto/have
- CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS"
+ CORE_LIBS="$CORE_LIBS -luring"
fi
fi

@@ -577,7 +556,7 @@
cat << END

$0: no supported file AIO was found
-Currently file AIO is supported on FreeBSD 4.3+ and Linux 2.6.22+ only
+Currently file AIO is supported on FreeBSD 4.3+ and Linux 5.1.0+ (requires liburing) only

END
exit 1
diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_open_file_cache.c
--- a/src/core/ngx_open_file_cache.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/core/ngx_open_file_cache.c Mon Jan 11 08:07:14 2021 -0500
@@ -869,8 +869,8 @@
if (!of->log) {

/*
- * Use non-blocking open() not to hang on FIFO files, etc.
- * This flag has no effect on a regular files.
+ * Differs from plain read, IORING_OP_READV with O_NONBLOCK
+ * will return -EAGAIN if the operation may block.
*/

fd = ngx_open_file_wrapper(name, of, NGX_FILE_RDONLY|NGX_FILE_NONBLOCK,
diff -r 82228f955153 -r 3677cf19b98b src/core/ngx_output_chain.c
--- a/src/core/ngx_output_chain.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/core/ngx_output_chain.c Mon Jan 11 08:07:14 2021 -0500
@@ -589,6 +589,20 @@
if (ctx->aio_handler) {
n = ngx_file_aio_read(src->file, dst->pos, (size_t) size,
src->file_pos, ctx->pool);
+
+ if (n > 0 && n < size) {
+ ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0,
+ ngx_read_file_n " Try again, read only %z of %O from \"%s\"",
+ n, size, src->file->name.data);
+
+ src->file_pos += n;
+ dst->last += n;
+
+ n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n,
+ src->file_pos, ctx->pool);
+
+ }
+
if (n == NGX_AGAIN) {
ctx->aio_handler(ctx, src->file);
return NGX_AGAIN;
diff -r 82228f955153 -r 3677cf19b98b src/event/modules/ngx_epoll_module.c
--- a/src/event/modules/ngx_epoll_module.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/event/modules/ngx_epoll_module.c Mon Jan 11 08:07:14 2021 -0500
@@ -9,6 +9,10 @@
#include <ngx_core.h>
#include <ngx_event.h>

+#if (NGX_HAVE_FILE_AIO)
+#include <liburing.h>
+#endif
+

#if (NGX_TEST_BUILD_EPOLL)

@@ -75,23 +79,6 @@
#define SYS_eventfd 323
#endif

-#if (NGX_HAVE_FILE_AIO)
-
-#define SYS_io_setup 245
-#define SYS_io_destroy 246
-#define SYS_io_getevents 247
-
-typedef u_int aio_context_t;
-
-struct io_event {
- uint64_t data; /* the data field from the iocb */
- uint64_t obj; /* what iocb this event came from */
- int64_t res; /* result code for this event */
- int64_t res2; /* secondary result */
-};
-
-
-#endif
#endif /* NGX_TEST_BUILD_EPOLL */


@@ -124,7 +111,7 @@
ngx_uint_t flags);

#if (NGX_HAVE_FILE_AIO)
-static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
+static void ngx_epoll_io_uring_handler(ngx_event_t *ev);
#endif

static void *ngx_epoll_create_conf(ngx_cycle_t *cycle);
@@ -141,13 +128,11 @@
#endif

#if (NGX_HAVE_FILE_AIO)
-
-int ngx_eventfd = -1;
-aio_context_t ngx_aio_ctx = 0;
+struct io_uring ngx_ring;
+struct io_uring_params ngx_ring_params;

-static ngx_event_t ngx_eventfd_event;
-static ngx_connection_t ngx_eventfd_conn;
-
+static ngx_event_t ngx_ring_event;
+static ngx_connection_t ngx_ring_conn;
#endif

#if (NGX_HAVE_EPOLLRDHUP)
@@ -217,102 +202,40 @@

#if (NGX_HAVE_FILE_AIO)

-/*
- * We call io_setup(), io_destroy() io_submit(), and io_getevents() directly
- * as syscalls instead of libaio usage, because the library header file
- * supports eventfd() since 0.3.107 version only.
- */
-
-static int
-io_setup(u_int nr_reqs, aio_context_t *ctx)
-{
- return syscall(SYS_io_setup, nr_reqs, ctx);
-}
-
-
-static int
-io_destroy(aio_context_t ctx)
-{
- return syscall(SYS_io_destroy, ctx);
-}
-
-
-static int
-io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events,
- struct timespec *tmo)
-{
- return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo);
-}
-
-
static void
ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf)
{
- int n;
struct epoll_event ee;

-#if (NGX_HAVE_SYS_EVENTFD_H)
- ngx_eventfd = eventfd(0, 0);
-#else
- ngx_eventfd = syscall(SYS_eventfd, 0);
-#endif
-
- if (ngx_eventfd == -1) {
+ if (io_uring_queue_init_params(32763, &ngx_ring, &ngx_ring_params) < 0) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
- "eventfd() failed");
- ngx_file_aio = 0;
- return;
- }
-
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
- "eventfd: %d", ngx_eventfd);
-
- n = 1;
-
- if (ioctl(ngx_eventfd, FIONBIO, &n) == -1) {
- ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
- "ioctl(eventfd, FIONBIO) failed");
+ "io_uring_queue_init_params() failed");
goto failed;
}

- if (io_setup(epcf->aio_requests, &ngx_aio_ctx) == -1) {
- ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
- "io_setup() failed");
- goto failed;
- }
-
- ngx_eventfd_event.data = &ngx_eventfd_conn;
- ngx_eventfd_event.handler = ngx_epoll_eventfd_handler;
- ngx_eventfd_event.log = cycle->log;
- ngx_eventfd_event.active = 1;
- ngx_eventfd_conn.fd = ngx_eventfd;
- ngx_eventfd_conn.read = &ngx_eventfd_event;
- ngx_eventfd_conn.log = cycle->log;
+ ngx_ring_event.data = &ngx_ring_conn;
+ ngx_ring_event.handler = ngx_epoll_io_uring_handler;
+ ngx_ring_event.log = cycle->log;
+ ngx_ring_event.active = 1;
+ ngx_ring_conn.fd = ngx_ring.ring_fd;
+ ngx_ring_conn.read = &ngx_ring_event;
+ ngx_ring_conn.log = cycle->log;

ee.events = EPOLLIN|EPOLLET;
- ee.data.ptr = &ngx_eventfd_conn;
+ ee.data.ptr = &ngx_ring_conn;

- if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_eventfd, &ee) != -1) {
+ if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) {
return;
}

ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
"epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");

- if (io_destroy(ngx_aio_ctx) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "io_destroy() failed");
- }
+ io_uring_queue_exit(&ngx_ring);

failed:

- if (close(ngx_eventfd) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "eventfd close() failed");
- }
-
- ngx_eventfd = -1;
- ngx_aio_ctx = 0;
+ ngx_ring.ring_fd = 0;
ngx_file_aio = 0;
}

@@ -549,23 +472,11 @@

#if (NGX_HAVE_FILE_AIO)

- if (ngx_eventfd != -1) {
-
- if (io_destroy(ngx_aio_ctx) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "io_destroy() failed");
- }
-
- if (close(ngx_eventfd) == -1) {
- ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
- "eventfd close() failed");
- }
-
- ngx_eventfd = -1;
+ if (ngx_ring.ring_fd != 0) {
+ io_uring_queue_exit(&ngx_ring);
+ ngx_ring.ring_fd = 0;
}

- ngx_aio_ctx = 0;
-
#endif

ngx_free(event_list);
@@ -939,84 +850,36 @@
#if (NGX_HAVE_FILE_AIO)

static void
-ngx_epoll_eventfd_handler(ngx_event_t *ev)
+ngx_epoll_io_uring_handler(ngx_event_t *ev)
{
- int n, events;
- long i;
- uint64_t ready;
- ngx_err_t err;
ngx_event_t *e;
+ struct io_uring_cqe *cqe;
+ unsigned head;
+ unsigned cqe_count = 0;
ngx_event_aio_t *aio;
- struct io_event event[64];
- struct timespec ts;

- ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd handler");
-
- n = read(ngx_eventfd, &ready, 8);
+ ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+ "io_uring_peek_cqe: START");

- err = ngx_errno;
-
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd: %d", n);
+ io_uring_for_each_cqe(&ngx_ring, head, cqe) {
+ ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0,
+ "io_event: %p %d %d",
+ cqe->user_data, cqe->res, cqe->flags);

- if (n != 8) {
- if (n == -1) {
- if (err == NGX_EAGAIN) {
- return;
- }
+ e = (ngx_event_t *) io_uring_cqe_get_data(cqe);
+ e->complete = 1;
+ e->active = 0;
+ e->ready = 1;

- ngx_log_error(NGX_LOG_ALERT, ev->log, err, "read(eventfd) failed");
- return;
- }
+ aio = e->data;
+ aio->res = cqe->res;

- ngx_log_error(NGX_LOG_ALERT, ev->log, 0,
- "read(eventfd) returned only %d bytes", n);
- return;
+ ++cqe_count;
+
+ ngx_post_event(e, &ngx_posted_events);
}

- ts.tv_sec = 0;
- ts.tv_nsec = 0;
-
- while (ready) {
-
- events = io_getevents(ngx_aio_ctx, 1, 64, event, &ts);
-
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
- "io_getevents: %d", events);
-
- if (events > 0) {
- ready -= events;
-
- for (i = 0; i < events; i++) {
-
- ngx_log_debug4(NGX_LOG_DEBUG_EVENT, ev->log, 0,
- "io_event: %XL %XL %L %L",
- event[i].data, event[i].obj,
- event[i].res, event[i].res2);
-
- e = (ngx_event_t *) (uintptr_t) event[i].data;
-
- e->complete = 1;
- e->active = 0;
- e->ready = 1;
-
- aio = e->data;
- aio->res = event[i].res;
-
- ngx_post_event(e, &ngx_posted_events);
- }
-
- continue;
- }
-
- if (events == 0) {
- return;
- }
-
- /* events == -1 */
- ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno,
- "io_getevents() failed");
- return;
- }
+ io_uring_cq_advance(&ngx_ring, cqe_count);
}

#endif
diff -r 82228f955153 -r 3677cf19b98b src/event/ngx_event.h
--- a/src/event/ngx_event.h Tue Dec 15 17:41:39 2020 +0300
+++ b/src/event/ngx_event.h Mon Jan 11 08:07:14 2021 -0500
@@ -160,7 +160,9 @@
size_t nbytes;
#endif

- ngx_aiocb_t aiocb;
+ /* Make sure that this iov has the same lifecycle with its associated aio event */
+ struct iovec iov;
+
ngx_event_t event;
};

diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_aio_read.c
--- a/src/os/unix/ngx_linux_aio_read.c Tue Dec 15 17:41:39 2020 +0300
+++ b/src/os/unix/ngx_linux_aio_read.c Mon Jan 11 08:07:14 2021 -0500
@@ -9,20 +9,16 @@
#include <ngx_core.h>
#include <ngx_event.h>

+#include <liburing.h>

-extern int ngx_eventfd;
-extern aio_context_t ngx_aio_ctx;
+
+extern struct io_uring ngx_ring;
+extern struct io_uring_params ngx_ring_params;


static void ngx_file_aio_event_handler(ngx_event_t *ev);


-static int
-io_submit(aio_context_t ctx, long n, struct iocb **paiocb)
-{
- return syscall(SYS_io_submit, ctx, n, paiocb);
-}
-

ngx_int_t
ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool)
@@ -50,10 +46,10 @@
ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
ngx_pool_t *pool)
{
- ngx_err_t err;
- struct iocb *piocb[1];
- ngx_event_t *ev;
- ngx_event_aio_t *aio;
+ ngx_err_t err;
+ ngx_event_t *ev;
+ ngx_event_aio_t *aio;
+ struct io_uring_sqe *sqe;

if (!ngx_file_aio) {
return ngx_read_file(file, buf, size, offset);
@@ -93,22 +89,41 @@
return NGX_ERROR;
}

- ngx_memzero(&aio->aiocb, sizeof(struct iocb));
+ sqe = io_uring_get_sqe(&ngx_ring);
+
+ if (!sqe) {
+ ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0,
+ "aio no sqe left:%d @%O:%uz %V",
+ ev->complete, offset, size, &file->name);
+ return ngx_read_file(file, buf, size, offset);
+ }

- aio->aiocb.aio_data = (uint64_t) (uintptr_t) ev;
- aio->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
- aio->aiocb.aio_fildes = file->fd;
- aio->aiocb.aio_buf = (uint64_t) (uintptr_t) buf;
- aio->aiocb.aio_nbytes = size;
- aio->aiocb.aio_offset = offset;
- aio->aiocb.aio_flags = IOCB_FLAG_RESFD;
- aio->aiocb.aio_resfd = ngx_eventfd;
+ if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) {
+ /*
+ * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel
+ * doesn't need to import iovecs in advance.
+ *
+ * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support
+ * non-vectored read/write commands too.
+ *
+ * It's not perfect, but avoids an extra feature-test syscall.
+ */
+ io_uring_prep_read(sqe, file->fd, buf, size, offset);
+ } else {
+ /*
+ * We must store iov into heap to prevent kernel from returning -EFAULT
+ * in case `IORING_FEAT_SUBMIT_STABLE` is not supported
+ */
+ aio->iov.iov_base = buf;
+ aio->iov.iov_len = size;
+ io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset);
+ }
+ io_uring_sqe_set_data(sqe, ev);
+

ev->handler = ngx_file_aio_event_handler;

- piocb[0] = &aio->aiocb;
-
- if (io_submit(ngx_aio_ctx, 1, piocb) == 1) {
+ if (io_uring_submit(&ngx_ring) == 1) {
ev->active = 1;
ev->ready = 0;
ev->complete = 0;
diff -r 82228f955153 -r 3677cf19b98b src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h Tue Dec 15 17:41:39 2020 +0300
+++ b/src/os/unix/ngx_linux_config.h Mon Jan 11 08:07:14 2021 -0500
@@ -93,10 +93,6 @@
#include <sys/eventfd.h>
#endif
#include <sys/syscall.h>
-#if (NGX_HAVE_FILE_AIO)
-#include <linux/aio_abi.h>
-typedef struct iocb ngx_aiocb_t;
-#endif


#if (NGX_HAVE_CAPABILITIES)

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[PATCH] Use io_uring for async io access

Zhao, Ping 94 January 11, 2021 02:06AM

RE: [PATCH] Use io_uring for async io access

Zhao, Ping 13 January 11, 2021 08:34PM

Re: [PATCH] Use io_uring for async io access

Vladimir Homutov 16 January 12, 2021 08:46AM

RE: [PATCH] Use io_uring for async io access

Zhao, Ping 23 January 12, 2021 11:48PM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 47
Record Number of Users: 6 on February 13, 2018
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready