diff --git a/CMakeLists.txt b/CMakeLists.txt index 0217c01b..de1272a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,7 +210,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android") src/unix/pthread-fixes.c src/unix/random-getentropy.c src/unix/random-getrandom.c - src/unix/random-sysctl-linux.c) + src/unix/random-sysctl-linux.c + src/unix/epoll.c) endif() if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux") @@ -254,7 +255,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") src/unix/linux-syscalls.c src/unix/procfs-exepath.c src/unix/random-getrandom.c - src/unix/random-sysctl-linux.c) + src/unix/random-sysctl-linux.c + src/unix/epoll.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD") @@ -325,7 +327,10 @@ endif() if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") list(APPEND uv_defines __EXTENSIONS__ _XOPEN_SOURCE=500) list(APPEND uv_libraries kstat nsl sendfile socket) - list(APPEND uv_sources src/unix/no-proctitle.c src/unix/sunos.c) + list(APPEND uv_sources + src/unix/no-proctitle.c + src/unix/sunos.c + src/unix/epoll.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "Haiku") diff --git a/Makefile.am b/Makefile.am index 6f2d018c..4d8fb40d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -391,7 +391,8 @@ libuv_la_CFLAGS += -D_GNU_SOURCE libuv_la_SOURCES += src/unix/android-ifaddrs.c \ src/unix/pthread-fixes.c \ src/unix/random-getrandom.c \ - src/unix/random-sysctl-linux.c + src/unix/random-sysctl-linux.c \ + src/unix/epoll.c endif if CYGWIN @@ -472,7 +473,8 @@ libuv_la_SOURCES += src/unix/linux-core.c \ src/unix/procfs-exepath.c \ src/unix/proctitle.c \ src/unix/random-getrandom.c \ - src/unix/random-sysctl-linux.c + src/unix/random-sysctl-linux.c \ + src/unix/epoll.c test_run_tests_LDFLAGS += -lutil endif @@ -516,6 +518,7 @@ libuv_la_CFLAGS += -D__EXTENSIONS__ \ -D_XOPEN_SOURCE=500 \ -D_REENTRANT libuv_la_SOURCES += src/unix/no-proctitle.c \ + src/unix/epoll.c \ src/unix/sunos.c endif diff --git a/src/unix/epoll.c b/src/unix/epoll.c new file mode 100644 index 00000000..97348e25 --- /dev/null +++ b/src/unix/epoll.c @@ -0,0 +1,422 @@ +/* Copyright libuv contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "uv.h" +#include "internal.h" +#include +#include + +int uv__epoll_init(uv_loop_t* loop) { + int fd; + fd = epoll_create1(O_CLOEXEC); + + /* epoll_create1() can fail either because it's not implemented (old kernel) + * or because it doesn't understand the O_CLOEXEC flag. + */ + if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) { + fd = epoll_create(256); + + if (fd != -1) + uv__cloexec(fd, 1); + } + + loop->backend_fd = fd; + if (fd == -1) + return UV__ERR(errno); + + return 0; +} + + +void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) { + struct epoll_event* events; + struct epoll_event dummy; + uintptr_t i; + uintptr_t nfds; + + assert(loop->watchers != NULL); + assert(fd >= 0); + + events = (struct epoll_event*) loop->watchers[loop->nwatchers]; + nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1]; + if (events != NULL) + /* Invalidate events with same file descriptor */ + for (i = 0; i < nfds; i++) + if (events[i].data.fd == fd) + events[i].data.fd = -1; + + /* Remove the file descriptor from the epoll. + * This avoids a problem where the same file description remains open + * in another process, causing repeated junk epoll events. + * + * We pass in a dummy epoll_event, to work around a bug in old kernels. + */ + if (loop->backend_fd >= 0) { + /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that + * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings. + */ + memset(&dummy, 0, sizeof(dummy)); + epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy); + } +} + + +int uv__io_check_fd(uv_loop_t* loop, int fd) { + struct epoll_event e; + int rc; + + memset(&e, 0, sizeof(e)); + e.events = POLLIN; + e.data.fd = -1; + + rc = 0; + if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e)) + if (errno != EEXIST) + rc = UV__ERR(errno); + + if (rc == 0) + if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e)) + abort(); + + return rc; +} + + +void uv__io_poll(uv_loop_t* loop, int timeout) { + /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes + * effectively infinite on 32 bits architectures. To avoid blocking + * indefinitely, we cap the timeout and poll again if necessary. + * + * Note that "30 minutes" is a simplification because it depends on + * the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200, + * that being the largest value I have seen in the wild (and only once.) + */ + static const int max_safe_timeout = 1789569; + static int no_epoll_pwait_cached; + static int no_epoll_wait_cached; + int no_epoll_pwait; + int no_epoll_wait; + struct epoll_event events[1024]; + struct epoll_event* pe; + struct epoll_event e; + int real_timeout; + QUEUE* q; + uv__io_t* w; + sigset_t sigset; + uint64_t sigmask; + uint64_t base; + int have_signals; + int nevents; + int count; + int nfds; + int fd; + int op; + int i; + int user_timeout; + int reset_timeout; + + if (loop->nfds == 0) { + assert(QUEUE_EMPTY(&loop->watcher_queue)); + return; + } + + memset(&e, 0, sizeof(e)); + + while (!QUEUE_EMPTY(&loop->watcher_queue)) { + q = QUEUE_HEAD(&loop->watcher_queue); + QUEUE_REMOVE(q); + QUEUE_INIT(q); + + w = QUEUE_DATA(q, uv__io_t, watcher_queue); + assert(w->pevents != 0); + assert(w->fd >= 0); + assert(w->fd < (int) loop->nwatchers); + + e.events = w->pevents; + e.data.fd = w->fd; + + if (w->events == 0) + op = EPOLL_CTL_ADD; + else + op = EPOLL_CTL_MOD; + + /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching + * events, skip the syscall and squelch the events after epoll_wait(). + */ + if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) { + if (errno != EEXIST) + abort(); + + assert(op == EPOLL_CTL_ADD); + + /* We've reactivated a file descriptor that's been watched before. */ + if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e)) + abort(); + } + + w->events = w->pevents; + } + + sigmask = 0; + if (loop->flags & UV_LOOP_BLOCK_SIGPROF) { + sigemptyset(&sigset); + sigaddset(&sigset, SIGPROF); + sigmask |= 1 << (SIGPROF - 1); + } + + assert(timeout >= -1); + base = loop->time; + count = 48; /* Benchmarks suggest this gives the best throughput. */ + real_timeout = timeout; + + if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; + } else { + reset_timeout = 0; + user_timeout = 0; + } + + /* You could argue there is a dependency between these two but + * ultimately we don't care about their ordering with respect + * to one another. Worst case, we make a few system calls that + * could have been avoided because another thread already knows + * they fail with ENOSYS. Hardly the end of the world. + */ + no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached); + no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached); + + for (;;) { + /* Only need to set the provider_entry_time if timeout != 0. The function + * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME. + */ + if (timeout != 0) + uv__metrics_set_provider_entry_time(loop); + + /* See the comment for max_safe_timeout for an explanation of why + * this is necessary. Executive summary: kernel bug workaround. + */ + if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout) + timeout = max_safe_timeout; + + if (sigmask != 0 && no_epoll_pwait != 0) + if (pthread_sigmask(SIG_BLOCK, &sigset, NULL)) + abort(); + + if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) { + nfds = epoll_pwait(loop->backend_fd, + events, + ARRAY_SIZE(events), + timeout, + &sigset); + if (nfds == -1 && errno == ENOSYS) { + uv__store_relaxed(&no_epoll_pwait_cached, 1); + no_epoll_pwait = 1; + } + } else { + nfds = epoll_wait(loop->backend_fd, + events, + ARRAY_SIZE(events), + timeout); + if (nfds == -1 && errno == ENOSYS) { + uv__store_relaxed(&no_epoll_wait_cached, 1); + no_epoll_wait = 1; + } + } + + if (sigmask != 0 && no_epoll_pwait != 0) + if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL)) + abort(); + + /* Update loop->time unconditionally. It's tempting to skip the update when + * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the + * operating system didn't reschedule our process while in the syscall. + */ + SAVE_ERRNO(uv__update_time(loop)); + + if (nfds == 0) { + assert(timeout != -1); + + if (reset_timeout != 0) { + timeout = user_timeout; + reset_timeout = 0; + } + + if (timeout == -1) + continue; + + if (timeout == 0) + return; + + /* We may have been inside the system call for longer than |timeout| + * milliseconds so we need to update the timestamp to avoid drift. + */ + goto update_timeout; + } + + if (nfds == -1) { + if (errno == ENOSYS) { + /* epoll_wait() or epoll_pwait() failed, try the other system call. */ + assert(no_epoll_wait == 0 || no_epoll_pwait == 0); + continue; + } + + if (errno != EINTR) + abort(); + + if (reset_timeout != 0) { + timeout = user_timeout; + reset_timeout = 0; + } + + if (timeout == -1) + continue; + + if (timeout == 0) + return; + + /* Interrupted by a signal. Update timeout and poll again. */ + goto update_timeout; + } + + have_signals = 0; + nevents = 0; + + { + /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */ + union { + struct epoll_event* events; + uv__io_t* watchers; + } x; + + x.events = events; + assert(loop->watchers != NULL); + loop->watchers[loop->nwatchers] = x.watchers; + loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds; + } + + for (i = 0; i < nfds; i++) { + pe = events + i; + fd = pe->data.fd; + + /* Skip invalidated events, see uv__platform_invalidate_fd */ + if (fd == -1) + continue; + + assert(fd >= 0); + assert((unsigned) fd < loop->nwatchers); + + w = loop->watchers[fd]; + + if (w == NULL) { + /* File descriptor that we've stopped watching, disarm it. + * + * Ignore all errors because we may be racing with another thread + * when the file descriptor is closed. + */ + epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe); + continue; + } + + /* Give users only events they're interested in. Prevents spurious + * callbacks when previous callback invocation in this loop has stopped + * the current watcher. Also, filters out events that users has not + * requested us to watch. + */ + pe->events &= w->pevents | POLLERR | POLLHUP; + + /* Work around an epoll quirk where it sometimes reports just the + * EPOLLERR or EPOLLHUP event. In order to force the event loop to + * move forward, we merge in the read/write events that the watcher + * is interested in; uv__read() and uv__write() will then deal with + * the error or hangup in the usual fashion. + * + * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user + * reads the available data, calls uv_read_stop(), then sometime later + * calls uv_read_start() again. By then, libuv has forgotten about the + * hangup and the kernel won't report EPOLLIN again because there's + * nothing left to read. If anything, libuv is to blame here. The + * current hack is just a quick bandaid; to properly fix it, libuv + * needs to remember the error/hangup event. We should get that for + * free when we switch over to edge-triggered I/O. + */ + if (pe->events == POLLERR || pe->events == POLLHUP) + pe->events |= + w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI); + + if (pe->events != 0) { + /* Run signal watchers last. This also affects child process watchers + * because those are implemented in terms of signal watchers. + */ + if (w == &loop->signal_io_watcher) { + have_signals = 1; + } else { + uv__metrics_update_idle_time(loop); + w->cb(loop, w, pe->events); + } + + nevents++; + } + } + + if (reset_timeout != 0) { + timeout = user_timeout; + reset_timeout = 0; + } + + if (have_signals != 0) { + uv__metrics_update_idle_time(loop); + loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN); + } + + loop->watchers[loop->nwatchers] = NULL; + loop->watchers[loop->nwatchers + 1] = NULL; + + if (have_signals != 0) + return; /* Event loop should cycle now so don't poll again. */ + + if (nevents != 0) { + if (nfds == ARRAY_SIZE(events) && --count != 0) { + /* Poll for more events but don't block this time. */ + timeout = 0; + continue; + } + return; + } + + if (timeout == 0) + return; + + if (timeout == -1) + continue; + +update_timeout: + assert(timeout > 0); + + real_timeout -= (loop->time - base); + if (real_timeout <= 0) + return; + + timeout = real_timeout; + } +} + diff --git a/src/unix/internal.h b/src/unix/internal.h index 69084ed2..5365cc71 100644 --- a/src/unix/internal.h +++ b/src/unix/internal.h @@ -259,6 +259,7 @@ int uv__signal_loop_fork(uv_loop_t* loop); /* platform specific */ uint64_t uv__hrtime(uv_clocktype_t type); int uv__kqueue_init(uv_loop_t* loop); +int uv__epoll_init(uv_loop_t* loop); int uv__platform_loop_init(uv_loop_t* loop); void uv__platform_loop_delete(uv_loop_t* loop); void uv__platform_invalidate_fd(uv_loop_t* loop, int fd); diff --git a/src/unix/linux-core.c b/src/unix/linux-core.c index 8746744d..eaf97599 100644 --- a/src/unix/linux-core.c +++ b/src/unix/linux-core.c @@ -82,29 +82,12 @@ static int read_times(FILE* statfile_fp, static void read_speeds(unsigned int numcpus, uv_cpu_info_t* ci); static uint64_t read_cpufreq(unsigned int cpunum); - int uv__platform_loop_init(uv_loop_t* loop) { - int fd; - fd = epoll_create1(O_CLOEXEC); - - /* epoll_create1() can fail either because it's not implemented (old kernel) - * or because it doesn't understand the O_CLOEXEC flag. - */ - if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) { - fd = epoll_create(256); - - if (fd != -1) - uv__cloexec(fd, 1); - } - - loop->backend_fd = fd; + loop->inotify_fd = -1; loop->inotify_watchers = NULL; - if (fd == -1) - return UV__ERR(errno); - - return 0; + return uv__epoll_init(loop); } @@ -134,380 +117,6 @@ void uv__platform_loop_delete(uv_loop_t* loop) { } -void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) { - struct epoll_event* events; - struct epoll_event dummy; - uintptr_t i; - uintptr_t nfds; - - assert(loop->watchers != NULL); - assert(fd >= 0); - - events = (struct epoll_event*) loop->watchers[loop->nwatchers]; - nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1]; - if (events != NULL) - /* Invalidate events with same file descriptor */ - for (i = 0; i < nfds; i++) - if (events[i].data.fd == fd) - events[i].data.fd = -1; - - /* Remove the file descriptor from the epoll. - * This avoids a problem where the same file description remains open - * in another process, causing repeated junk epoll events. - * - * We pass in a dummy epoll_event, to work around a bug in old kernels. - */ - if (loop->backend_fd >= 0) { - /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that - * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings. - */ - memset(&dummy, 0, sizeof(dummy)); - epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy); - } -} - - -int uv__io_check_fd(uv_loop_t* loop, int fd) { - struct epoll_event e; - int rc; - - memset(&e, 0, sizeof(e)); - e.events = POLLIN; - e.data.fd = -1; - - rc = 0; - if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e)) - if (errno != EEXIST) - rc = UV__ERR(errno); - - if (rc == 0) - if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e)) - abort(); - - return rc; -} - - -void uv__io_poll(uv_loop_t* loop, int timeout) { - /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes - * effectively infinite on 32 bits architectures. To avoid blocking - * indefinitely, we cap the timeout and poll again if necessary. - * - * Note that "30 minutes" is a simplification because it depends on - * the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200, - * that being the largest value I have seen in the wild (and only once.) - */ - static const int max_safe_timeout = 1789569; - static int no_epoll_pwait_cached; - static int no_epoll_wait_cached; - int no_epoll_pwait; - int no_epoll_wait; - struct epoll_event events[1024]; - struct epoll_event* pe; - struct epoll_event e; - int real_timeout; - QUEUE* q; - uv__io_t* w; - sigset_t sigset; - uint64_t sigmask; - uint64_t base; - int have_signals; - int nevents; - int count; - int nfds; - int fd; - int op; - int i; - int user_timeout; - int reset_timeout; - - if (loop->nfds == 0) { - assert(QUEUE_EMPTY(&loop->watcher_queue)); - return; - } - - memset(&e, 0, sizeof(e)); - - while (!QUEUE_EMPTY(&loop->watcher_queue)) { - q = QUEUE_HEAD(&loop->watcher_queue); - QUEUE_REMOVE(q); - QUEUE_INIT(q); - - w = QUEUE_DATA(q, uv__io_t, watcher_queue); - assert(w->pevents != 0); - assert(w->fd >= 0); - assert(w->fd < (int) loop->nwatchers); - - e.events = w->pevents; - e.data.fd = w->fd; - - if (w->events == 0) - op = EPOLL_CTL_ADD; - else - op = EPOLL_CTL_MOD; - - /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching - * events, skip the syscall and squelch the events after epoll_wait(). - */ - if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) { - if (errno != EEXIST) - abort(); - - assert(op == EPOLL_CTL_ADD); - - /* We've reactivated a file descriptor that's been watched before. */ - if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e)) - abort(); - } - - w->events = w->pevents; - } - - sigmask = 0; - if (loop->flags & UV_LOOP_BLOCK_SIGPROF) { - sigemptyset(&sigset); - sigaddset(&sigset, SIGPROF); - sigmask |= 1 << (SIGPROF - 1); - } - - assert(timeout >= -1); - base = loop->time; - count = 48; /* Benchmarks suggest this gives the best throughput. */ - real_timeout = timeout; - - if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { - reset_timeout = 1; - user_timeout = timeout; - timeout = 0; - } else { - reset_timeout = 0; - user_timeout = 0; - } - - /* You could argue there is a dependency between these two but - * ultimately we don't care about their ordering with respect - * to one another. Worst case, we make a few system calls that - * could have been avoided because another thread already knows - * they fail with ENOSYS. Hardly the end of the world. - */ - no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached); - no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached); - - for (;;) { - /* Only need to set the provider_entry_time if timeout != 0. The function - * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME. - */ - if (timeout != 0) - uv__metrics_set_provider_entry_time(loop); - - /* See the comment for max_safe_timeout for an explanation of why - * this is necessary. Executive summary: kernel bug workaround. - */ - if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout) - timeout = max_safe_timeout; - - if (sigmask != 0 && no_epoll_pwait != 0) - if (pthread_sigmask(SIG_BLOCK, &sigset, NULL)) - abort(); - - if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) { - nfds = epoll_pwait(loop->backend_fd, - events, - ARRAY_SIZE(events), - timeout, - &sigset); - if (nfds == -1 && errno == ENOSYS) { - uv__store_relaxed(&no_epoll_pwait_cached, 1); - no_epoll_pwait = 1; - } - } else { - nfds = epoll_wait(loop->backend_fd, - events, - ARRAY_SIZE(events), - timeout); - if (nfds == -1 && errno == ENOSYS) { - uv__store_relaxed(&no_epoll_wait_cached, 1); - no_epoll_wait = 1; - } - } - - if (sigmask != 0 && no_epoll_pwait != 0) - if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL)) - abort(); - - /* Update loop->time unconditionally. It's tempting to skip the update when - * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the - * operating system didn't reschedule our process while in the syscall. - */ - SAVE_ERRNO(uv__update_time(loop)); - - if (nfds == 0) { - assert(timeout != -1); - - if (reset_timeout != 0) { - timeout = user_timeout; - reset_timeout = 0; - } - - if (timeout == -1) - continue; - - if (timeout == 0) - return; - - /* We may have been inside the system call for longer than |timeout| - * milliseconds so we need to update the timestamp to avoid drift. - */ - goto update_timeout; - } - - if (nfds == -1) { - if (errno == ENOSYS) { - /* epoll_wait() or epoll_pwait() failed, try the other system call. */ - assert(no_epoll_wait == 0 || no_epoll_pwait == 0); - continue; - } - - if (errno != EINTR) - abort(); - - if (reset_timeout != 0) { - timeout = user_timeout; - reset_timeout = 0; - } - - if (timeout == -1) - continue; - - if (timeout == 0) - return; - - /* Interrupted by a signal. Update timeout and poll again. */ - goto update_timeout; - } - - have_signals = 0; - nevents = 0; - - { - /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */ - union { - struct epoll_event* events; - uv__io_t* watchers; - } x; - - x.events = events; - assert(loop->watchers != NULL); - loop->watchers[loop->nwatchers] = x.watchers; - loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds; - } - - for (i = 0; i < nfds; i++) { - pe = events + i; - fd = pe->data.fd; - - /* Skip invalidated events, see uv__platform_invalidate_fd */ - if (fd == -1) - continue; - - assert(fd >= 0); - assert((unsigned) fd < loop->nwatchers); - - w = loop->watchers[fd]; - - if (w == NULL) { - /* File descriptor that we've stopped watching, disarm it. - * - * Ignore all errors because we may be racing with another thread - * when the file descriptor is closed. - */ - epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe); - continue; - } - - /* Give users only events they're interested in. Prevents spurious - * callbacks when previous callback invocation in this loop has stopped - * the current watcher. Also, filters out events that users has not - * requested us to watch. - */ - pe->events &= w->pevents | POLLERR | POLLHUP; - - /* Work around an epoll quirk where it sometimes reports just the - * EPOLLERR or EPOLLHUP event. In order to force the event loop to - * move forward, we merge in the read/write events that the watcher - * is interested in; uv__read() and uv__write() will then deal with - * the error or hangup in the usual fashion. - * - * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user - * reads the available data, calls uv_read_stop(), then sometime later - * calls uv_read_start() again. By then, libuv has forgotten about the - * hangup and the kernel won't report EPOLLIN again because there's - * nothing left to read. If anything, libuv is to blame here. The - * current hack is just a quick bandaid; to properly fix it, libuv - * needs to remember the error/hangup event. We should get that for - * free when we switch over to edge-triggered I/O. - */ - if (pe->events == POLLERR || pe->events == POLLHUP) - pe->events |= - w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI); - - if (pe->events != 0) { - /* Run signal watchers last. This also affects child process watchers - * because those are implemented in terms of signal watchers. - */ - if (w == &loop->signal_io_watcher) { - have_signals = 1; - } else { - uv__metrics_update_idle_time(loop); - w->cb(loop, w, pe->events); - } - - nevents++; - } - } - - if (reset_timeout != 0) { - timeout = user_timeout; - reset_timeout = 0; - } - - if (have_signals != 0) { - uv__metrics_update_idle_time(loop); - loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN); - } - - loop->watchers[loop->nwatchers] = NULL; - loop->watchers[loop->nwatchers + 1] = NULL; - - if (have_signals != 0) - return; /* Event loop should cycle now so don't poll again. */ - - if (nevents != 0) { - if (nfds == ARRAY_SIZE(events) && --count != 0) { - /* Poll for more events but don't block this time. */ - timeout = 0; - continue; - } - return; - } - - if (timeout == 0) - return; - - if (timeout == -1) - continue; - -update_timeout: - assert(timeout > 0); - - real_timeout -= (loop->time - base); - if (real_timeout <= 0) - return; - - timeout = real_timeout; - } -} - uint64_t uv__hrtime(uv_clocktype_t type) { static clock_t fast_clock_id = -1; diff --git a/src/unix/sunos.c b/src/unix/sunos.c index d511c18b..5e2ec688 100644 --- a/src/unix/sunos.c +++ b/src/unix/sunos.c @@ -65,24 +65,9 @@ int uv__platform_loop_init(uv_loop_t* loop) { - int err; - int fd; - loop->fs_fd = -1; - loop->backend_fd = -1; - fd = port_create(); - if (fd == -1) - return UV__ERR(errno); - - err = uv__cloexec(fd, 1); - if (err) { - uv__close(fd); - return err; - } - loop->backend_fd = fd; - - return 0; + return uv__epoll_init(loop); } @@ -111,264 +96,6 @@ int uv__io_fork(uv_loop_t* loop) { } -void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) { - struct port_event* events; - uintptr_t i; - uintptr_t nfds; - - assert(loop->watchers != NULL); - assert(fd >= 0); - - events = (struct port_event*) loop->watchers[loop->nwatchers]; - nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1]; - if (events == NULL) - return; - - /* Invalidate events with same file descriptor */ - for (i = 0; i < nfds; i++) - if ((int) events[i].portev_object == fd) - events[i].portev_object = -1; -} - - -int uv__io_check_fd(uv_loop_t* loop, int fd) { - if (port_associate(loop->backend_fd, PORT_SOURCE_FD, fd, POLLIN, 0)) - return UV__ERR(errno); - - if (port_dissociate(loop->backend_fd, PORT_SOURCE_FD, fd)) { - perror("(libuv) port_dissociate()"); - abort(); - } - - return 0; -} - - -void uv__io_poll(uv_loop_t* loop, int timeout) { - struct port_event events[1024]; - struct port_event* pe; - struct timespec spec; - QUEUE* q; - uv__io_t* w; - sigset_t* pset; - sigset_t set; - uint64_t base; - uint64_t diff; - uint64_t idle_poll; - unsigned int nfds; - unsigned int i; - int saved_errno; - int have_signals; - int nevents; - int count; - int err; - int fd; - int user_timeout; - int reset_timeout; - - if (loop->nfds == 0) { - assert(QUEUE_EMPTY(&loop->watcher_queue)); - return; - } - - while (!QUEUE_EMPTY(&loop->watcher_queue)) { - q = QUEUE_HEAD(&loop->watcher_queue); - QUEUE_REMOVE(q); - QUEUE_INIT(q); - - w = QUEUE_DATA(q, uv__io_t, watcher_queue); - assert(w->pevents != 0); - - if (port_associate(loop->backend_fd, - PORT_SOURCE_FD, - w->fd, - w->pevents, - 0)) { - perror("(libuv) port_associate()"); - abort(); - } - - w->events = w->pevents; - } - - pset = NULL; - if (loop->flags & UV_LOOP_BLOCK_SIGPROF) { - pset = &set; - sigemptyset(pset); - sigaddset(pset, SIGPROF); - } - - assert(timeout >= -1); - base = loop->time; - count = 48; /* Benchmarks suggest this gives the best throughput. */ - - if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { - reset_timeout = 1; - user_timeout = timeout; - timeout = 0; - } else { - reset_timeout = 0; - } - - for (;;) { - /* Only need to set the provider_entry_time if timeout != 0. The function - * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME. - */ - if (timeout != 0) - uv__metrics_set_provider_entry_time(loop); - - if (timeout != -1) { - spec.tv_sec = timeout / 1000; - spec.tv_nsec = (timeout % 1000) * 1000000; - } - - /* Work around a kernel bug where nfds is not updated. */ - events[0].portev_source = 0; - - nfds = 1; - saved_errno = 0; - - if (pset != NULL) - pthread_sigmask(SIG_BLOCK, pset, NULL); - - err = port_getn(loop->backend_fd, - events, - ARRAY_SIZE(events), - &nfds, - timeout == -1 ? NULL : &spec); - - if (pset != NULL) - pthread_sigmask(SIG_UNBLOCK, pset, NULL); - - if (err) { - /* Work around another kernel bug: port_getn() may return events even - * on error. - */ - if (errno == EINTR || errno == ETIME) { - saved_errno = errno; - } else { - perror("(libuv) port_getn()"); - abort(); - } - } - - /* Update loop->time unconditionally. It's tempting to skip the update when - * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the - * operating system didn't reschedule our process while in the syscall. - */ - SAVE_ERRNO(uv__update_time(loop)); - - if (events[0].portev_source == 0) { - if (reset_timeout != 0) { - timeout = user_timeout; - reset_timeout = 0; - } - - if (timeout == 0) - return; - - if (timeout == -1) - continue; - - goto update_timeout; - } - - if (nfds == 0) { - assert(timeout != -1); - return; - } - - have_signals = 0; - nevents = 0; - - assert(loop->watchers != NULL); - loop->watchers[loop->nwatchers] = (void*) events; - loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds; - for (i = 0; i < nfds; i++) { - pe = events + i; - fd = pe->portev_object; - - /* Skip invalidated events, see uv__platform_invalidate_fd */ - if (fd == -1) - continue; - - assert(fd >= 0); - assert((unsigned) fd < loop->nwatchers); - - w = loop->watchers[fd]; - - /* File descriptor that we've stopped watching, ignore. */ - if (w == NULL) - continue; - - /* Run signal watchers last. This also affects child process watchers - * because those are implemented in terms of signal watchers. - */ - if (w == &loop->signal_io_watcher) { - have_signals = 1; - } else { - uv__metrics_update_idle_time(loop); - w->cb(loop, w, pe->portev_events); - } - - nevents++; - - if (w != loop->watchers[fd]) - continue; /* Disabled by callback. */ - - /* Events Ports operates in oneshot mode, rearm timer on next run. */ - if (w->pevents != 0 && QUEUE_EMPTY(&w->watcher_queue)) - QUEUE_INSERT_TAIL(&loop->watcher_queue, &w->watcher_queue); - } - - if (reset_timeout != 0) { - timeout = user_timeout; - reset_timeout = 0; - } - - if (have_signals != 0) { - uv__metrics_update_idle_time(loop); - loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN); - } - - loop->watchers[loop->nwatchers] = NULL; - loop->watchers[loop->nwatchers + 1] = NULL; - - if (have_signals != 0) - return; /* Event loop should cycle now so don't poll again. */ - - if (nevents != 0) { - if (nfds == ARRAY_SIZE(events) && --count != 0) { - /* Poll for more events but don't block this time. */ - timeout = 0; - continue; - } - return; - } - - if (saved_errno == ETIME) { - assert(timeout != -1); - return; - } - - if (timeout == 0) - return; - - if (timeout == -1) - continue; - -update_timeout: - assert(timeout > 0); - - diff = loop->time - base; - if (diff >= (uint64_t) timeout) - return; - - timeout -= diff; - } -} - - uint64_t uv__hrtime(uv_clocktype_t type) { return gethrtime(); }