unix: don't accept() connections in a loop (#3696)

After analysis of many real-world programs I've come to conclude that
accepting in a loop is nearly always suboptimal.

1. 99.9% of the time the second accept() call fails with EAGAIN, meaning
   there are no additional connections to accept. Not super expensive
   in isolation but it adds up.

2. When there are more connections to accept but the listen socket is
   shared between multiple processes (ex. the Node.js cluster module),
   libuv's greedy behavior necessitated the UV_TCP_SINGLE_ACCEPT hack
   to slow it down in order to give other processes a chance.

Accepting a single connection and relying on level-triggered polling to
get notified on the next incoming connection both simplifies the code
and optimizes for the common case.
This commit is contained in:
Ben Noordhuis 2022-07-20 12:42:50 +02:00 committed by GitHub
parent ac5180e29c
commit 6c692ad1cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 12 additions and 78 deletions

View File

@ -867,11 +867,6 @@ void uv__io_init(uv__io_t* w, uv__io_cb cb, int fd) {
w->fd = fd;
w->events = 0;
w->pevents = 0;
#if defined(UV_HAVE_KQUEUE)
w->rcount = 0;
w->wcount = 0;
#endif /* defined(UV_HAVE_KQUEUE) */
}

View File

@ -333,7 +333,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
if (ev->filter == EVFILT_READ) {
if (w->pevents & POLLIN) {
revents |= POLLIN;
w->rcount = ev->data;
} else {
/* TODO batch up */
struct kevent events[1];
@ -349,7 +348,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
if (ev->filter == EV_OOBAND) {
if (w->pevents & UV__POLLPRI) {
revents |= UV__POLLPRI;
w->rcount = ev->data;
} else {
/* TODO batch up */
struct kevent events[1];
@ -363,7 +361,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
if (ev->filter == EVFILT_WRITE) {
if (w->pevents & POLLOUT) {
revents |= POLLOUT;
w->wcount = ev->data;
} else {
/* TODO batch up */
struct kevent events[1];

View File

@ -495,76 +495,34 @@ static int uv__emfile_trick(uv_loop_t* loop, int accept_fd) {
}
#if defined(UV_HAVE_KQUEUE)
# define UV_DEC_BACKLOG(w) w->rcount--;
#else
# define UV_DEC_BACKLOG(w) /* no-op */
#endif /* defined(UV_HAVE_KQUEUE) */
void uv__server_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) {
uv_stream_t* stream;
int err;
int fd;
stream = container_of(w, uv_stream_t, io_watcher);
assert(events & POLLIN);
assert(stream->accepted_fd == -1);
assert(!(stream->flags & UV_HANDLE_CLOSING));
uv__io_start(stream->loop, &stream->io_watcher, POLLIN);
fd = uv__stream_fd(stream);
err = uv__accept(fd);
/* connection_cb can close the server socket while we're
* in the loop so check it on each iteration.
*/
while (uv__stream_fd(stream) != -1) {
assert(stream->accepted_fd == -1);
if (err == UV_EMFILE || err == UV_ENFILE)
err = uv__emfile_trick(loop, fd); /* Shed load. */
#if defined(UV_HAVE_KQUEUE)
if (w->rcount <= 0)
return;
#endif /* defined(UV_HAVE_KQUEUE) */
if (err < 0)
return;
err = uv__accept(uv__stream_fd(stream));
if (err < 0) {
if (err == UV_EAGAIN || err == UV__ERR(EWOULDBLOCK))
return; /* Not an error. */
stream->accepted_fd = err;
stream->connection_cb(stream, 0);
if (err == UV_ECONNABORTED)
continue; /* Ignore. Nothing we can do about that. */
if (err == UV_EMFILE || err == UV_ENFILE) {
err = uv__emfile_trick(loop, uv__stream_fd(stream));
if (err == UV_EAGAIN || err == UV__ERR(EWOULDBLOCK))
break;
}
stream->connection_cb(stream, err);
continue;
}
UV_DEC_BACKLOG(w)
stream->accepted_fd = err;
stream->connection_cb(stream, 0);
if (stream->accepted_fd != -1) {
/* The user hasn't yet accepted called uv_accept() */
uv__io_stop(loop, &stream->io_watcher, POLLIN);
return;
}
if (stream->type == UV_TCP &&
(stream->flags & UV_HANDLE_TCP_SINGLE_ACCEPT)) {
/* Give other processes a chance to accept connections. */
struct timespec timeout = { 0, 1 };
nanosleep(&timeout, NULL);
}
}
if (stream->accepted_fd != -1)
/* The user hasn't yet accepted called uv_accept() */
uv__io_stop(loop, &stream->io_watcher, POLLIN);
}
#undef UV_DEC_BACKLOG
int uv_accept(uv_stream_t* server, uv_stream_t* client) {
int err;

View File

@ -338,24 +338,12 @@ int uv_tcp_close_reset(uv_tcp_t* handle, uv_close_cb close_cb) {
int uv__tcp_listen(uv_tcp_t* tcp, int backlog, uv_connection_cb cb) {
static int single_accept_cached = -1;
unsigned long flags;
int single_accept;
int err;
if (tcp->delayed_error)
return tcp->delayed_error;
single_accept = uv__load_relaxed(&single_accept_cached);
if (single_accept == -1) {
const char* val = getenv("UV_TCP_SINGLE_ACCEPT");
single_accept = (val != NULL && atoi(val) != 0); /* Off by default. */
uv__store_relaxed(&single_accept_cached, single_accept);
}
if (single_accept)
tcp->flags |= UV_HANDLE_TCP_SINGLE_ACCEPT;
flags = 0;
#if defined(__MVS__)
/* on zOS the listen call does not bind automatically
@ -460,10 +448,6 @@ int uv_tcp_keepalive(uv_tcp_t* handle, int on, unsigned int delay) {
int uv_tcp_simultaneous_accepts(uv_tcp_t* handle, int enable) {
if (enable)
handle->flags &= ~UV_HANDLE_TCP_SINGLE_ACCEPT;
else
handle->flags |= UV_HANDLE_TCP_SINGLE_ACCEPT;
return 0;
}