From 7b75935b00567d29ac982903e5f188ab78b4ba9b Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 18 Nov 2024 16:13:28 -0500 Subject: [PATCH] kqueue: use EVFILT_USER for async if available (#4588) Establishes a user event for kqueue to eliminate the overhead of the pipe and the system call read(2) per wakeup event. Relands commit 27134547ff using VSCode merge, since it shows the conflict is just on the order of #ifdef calls. Co-authored-by: Andy Pan --- src/unix/async.c | 83 +++++++++++++++++++++++++++++++++++++++++++++ src/unix/internal.h | 22 ++++++++++++ src/unix/kqueue.c | 11 ++++++ 3 files changed, 116 insertions(+) diff --git a/src/unix/async.c b/src/unix/async.c index 0ff2669e..aef25f6b 100644 --- a/src/unix/async.c +++ b/src/unix/async.c @@ -38,6 +38,34 @@ #include #endif +#if UV__KQUEUE_EVFILT_USER +static uv_once_t kqueue_runtime_detection_guard = UV_ONCE_INIT; +static int kqueue_evfilt_user_support = 1; + + +static void uv__kqueue_runtime_detection(void) { + int kq; + struct kevent ev[2]; + struct timespec timeout = {0, 0}; + + /* Perform the runtime detection to ensure that kqueue with + * EVFILT_USER actually works. */ + kq = kqueue(); + EV_SET(ev, UV__KQUEUE_EVFILT_USER_IDENT, EVFILT_USER, + EV_ADD | EV_CLEAR, 0, 0, 0); + EV_SET(ev + 1, UV__KQUEUE_EVFILT_USER_IDENT, EVFILT_USER, + 0, NOTE_TRIGGER, 0, 0); + if (kevent(kq, ev, 2, ev, 1, &timeout) < 1 || + ev[0].filter != EVFILT_USER || + ev[0].ident != UV__KQUEUE_EVFILT_USER_IDENT || + ev[0].flags & EV_ERROR) + /* If we wind up here, we can assume that EVFILT_USER is defined but + * broken on the current system. */ + kqueue_evfilt_user_support = 0; + uv__close(kq); +} +#endif + static void uv__async_send(uv_loop_t* loop); static int uv__async_start(uv_loop_t* loop); static void uv__cpu_relax(void); @@ -139,7 +167,11 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { assert(w == &loop->async_io_watcher); +#if UV__KQUEUE_EVFILT_USER + for (;!kqueue_evfilt_user_support;) { +#else for (;;) { +#endif r = read(w->fd, buf, sizeof(buf)); if (r == sizeof(buf)) @@ -195,6 +227,17 @@ static void uv__async_send(uv_loop_t* loop) { len = sizeof(val); fd = loop->async_io_watcher.fd; /* eventfd */ } +#elif defined(UV__KQUEUE_EVFILT_USER) + struct kevent ev; + + if (kqueue_evfilt_user_support) { + fd = loop->async_io_watcher.fd; /* magic number for EVFILT_USER */ + EV_SET(&ev, fd, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0); + r = kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL); + if (r == 0) + return; + abort(); + } #endif do @@ -215,6 +258,9 @@ static void uv__async_send(uv_loop_t* loop) { static int uv__async_start(uv_loop_t* loop) { int pipefd[2]; int err; +#if UV__KQUEUE_EVFILT_USER + struct kevent ev; +#endif if (loop->async_io_watcher.fd != -1) return 0; @@ -226,6 +272,36 @@ static int uv__async_start(uv_loop_t* loop) { pipefd[0] = err; pipefd[1] = -1; +#elif UV__KQUEUE_EVFILT_USER + uv_once(&kqueue_runtime_detection_guard, uv__kqueue_runtime_detection); + if (kqueue_evfilt_user_support) { + /* In order not to break the generic pattern of I/O polling, a valid + * file descriptor is required to take up a room in loop->watchers, + * thus we create one for that, but this fd will not be actually used, + * it's just a placeholder and magic number which is going to be closed + * during the cleanup, as other FDs. */ + err = uv__open_cloexec("/dev/null", O_RDONLY); + if (err < 0) + return err; + + pipefd[0] = err; + pipefd[1] = -1; + + /* When using EVFILT_USER event to wake up the kqueue, this event must be + * registered beforehand. Otherwise, calling kevent() to issue an + * unregistered EVFILT_USER event will get an ENOENT. + * Since uv__async_send() may happen before uv__io_poll() with multi-threads, + * we can't defer this registration of EVFILT_USER event as we did for other + * events, but must perform it right away. */ + EV_SET(&ev, err, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0); + err = kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL); + if (err < 0) + return UV__ERR(errno); + } else { + err = uv__make_pipe(pipefd, UV_NONBLOCK_PIPE); + if (err < 0) + return err; + } #else err = uv__make_pipe(pipefd, UV_NONBLOCK_PIPE); if (err < 0) @@ -236,6 +312,13 @@ static int uv__async_start(uv_loop_t* loop) { uv__io_start(loop, &loop->async_io_watcher, POLLIN); loop->async_wfd = pipefd[1]; +#if UV__KQUEUE_EVFILT_USER + /* Prevent the EVFILT_USER event from being added to kqueue redundantly + * and mistakenly later in uv__io_poll(). */ + if (kqueue_evfilt_user_support) + loop->async_io_watcher.events = loop->async_io_watcher.pevents; +#endif + return 0; } diff --git a/src/unix/internal.h b/src/unix/internal.h index 8d586b0b..698caf17 100644 --- a/src/unix/internal.h +++ b/src/unix/internal.h @@ -35,6 +35,10 @@ #include #include #include +#if defined(__APPLE__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) +#include +#endif #define uv__msan_unpoison(p, n) \ do { \ @@ -504,4 +508,22 @@ int uv__get_constrained_cpu(uv__cpu_constraint* constraint); #endif #endif +#if defined(EVFILT_USER) && defined(NOTE_TRIGGER) +/* EVFILT_USER is available since OS X 10.6, DragonFlyBSD 4.0, + * FreeBSD 8.1, and NetBSD 10.0. + * + * Note that even though EVFILT_USER is defined on the current system, + * it may still fail to work at runtime somehow. In that case, we fall + * back to pipe-based signaling. + */ +#define UV__KQUEUE_EVFILT_USER 1 +/* Magic number of identifier used for EVFILT_USER during runtime detection. + * There are no Google hits for this number when I create it. That way, + * people will be directed here if this number gets printed due to some + * kqueue error and they google for help. */ +#define UV__KQUEUE_EVFILT_USER_IDENT 0x1e7e7711 +#else +#define UV__KQUEUE_EVFILT_USER 0 +#endif + #endif /* UV_UNIX_INTERNAL_H_ */ diff --git a/src/unix/kqueue.c b/src/unix/kqueue.c index 66aa166f..876b7170 100644 --- a/src/unix/kqueue.c +++ b/src/unix/kqueue.c @@ -367,6 +367,17 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { continue; } +#if UV__KQUEUE_EVFILT_USER + if (ev->filter == EVFILT_USER) { + w = &loop->async_io_watcher; + assert(fd == w->fd); + uv__metrics_update_idle_time(loop); + w->cb(loop, w, w->events); + nevents++; + continue; + } +#endif + if (ev->filter == EVFILT_VNODE) { assert(w->events == POLLIN); assert(w->pevents == POLLIN);