From 4f5c8da191a29a8567e69c8586c9e275ab465232 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Mon, 10 Sep 2012 16:20:56 +0200 Subject: [PATCH] unix: reimplement accept() EMFILE trick Implement a best effort approach to mitigating accept() EMFILE errors. We have a spare file descriptor stashed away that we close to get below the EMFILE limit. Next, we accept all pending connections and close them immediately to signal the clients that we're overloaded - and we are, but we still keep on trucking. There is one caveat: it's not reliable in a multi-threaded environment. The file descriptor limit is per process. Our party trick fails if another thread opens a file or creates a socket in the time window between us calling close() and accept(). Fixes #315. --- include/uv-private/uv-unix.h | 1 + src/unix/loop.c | 6 ++ src/unix/stream.c | 134 +++++++++++++++++++++++++++++------ 3 files changed, 120 insertions(+), 21 deletions(-) diff --git a/include/uv-private/uv-unix.h b/include/uv-private/uv-unix.h index 6cdea672..91ffbe4c 100644 --- a/include/uv-private/uv-unix.h +++ b/include/uv-private/uv-unix.h @@ -137,6 +137,7 @@ typedef struct { uint64_t time; \ void* signal_ctx; \ uv_signal_t child_watcher; \ + int emfile_fd; \ UV_PLATFORM_LOOP_FIELDS \ #define UV_REQ_TYPE_PRIVATE /* empty */ diff --git a/src/unix/loop.c b/src/unix/loop.c index 70cdc824..5cd3bd0c 100644 --- a/src/unix/loop.c +++ b/src/unix/loop.c @@ -51,6 +51,7 @@ int uv__loop_init(uv_loop_t* loop, int default_loop) { loop->time = uv_hrtime() / 1000000; loop->async_pipefd[0] = -1; loop->async_pipefd[1] = -1; + loop->emfile_fd = -1; loop->ev = (default_loop ? ev_default_loop : ev_loop_new)(flags); ev_set_userdata(loop->ev, loop); eio_channel_init(&loop->uv_eio_channel, loop); @@ -73,4 +74,9 @@ void uv__loop_delete(uv_loop_t* loop) { uv__platform_loop_delete(loop); uv__signal_unregister(loop); ev_loop_destroy(loop->ev); + + if (loop->emfile_fd != -1) { + close(loop->emfile_fd); + loop->emfile_fd = -1; + } } diff --git a/src/unix/stream.c b/src/unix/stream.c index e3bd51bf..9e004740 100644 --- a/src/unix/stream.c +++ b/src/unix/stream.c @@ -62,6 +62,29 @@ static void uv__read(uv_stream_t* stream); static void uv__stream_io(uv_loop_t* loop, uv__io_t* w, int events); +/* Used by the accept() EMFILE party trick. */ +static int uv__open_cloexec(const char* path, int flags) { + int fd; + +#if defined(__linux__) + fd = open(path, flags | UV__O_CLOEXEC); + if (fd != -1) + return fd; + + if (errno != EINVAL) + return -1; + + /* O_CLOEXEC not supported. */ +#endif + + fd = open(path, flags); + if (fd != -1) + uv__cloexec(fd, 1); + + return fd; +} + + static size_t uv__buf_count(uv_buf_t bufs[], int bufcnt) { size_t total = 0; int i; @@ -90,6 +113,9 @@ void uv__stream_init(uv_loop_t* loop, ngx_queue_init(&stream->write_completed_queue); stream->write_queue_size = 0; + if (loop->emfile_fd == -1) + loop->emfile_fd = uv__open_cloexec("/", O_RDONLY); + #if defined(__APPLE__) stream->select = NULL; #endif /* defined(__APPLE_) */ @@ -370,10 +396,56 @@ static void uv__next_accept(uv_idle_t* idle, int status) { } -void uv__server_io(uv_loop_t* loop, uv__io_t* w, int events) { +/* Implements a best effort approach to mitigating accept() EMFILE errors. + * We have a spare file descriptor stashed away that we close to get below + * the EMFILE limit. Next, we accept all pending connections and close them + * immediately to signal the clients that we're overloaded - and we are, but + * we still keep on trucking. + * + * There is one caveat: it's not reliable in a multi-threaded environment. + * The file descriptor limit is per process. Our party trick fails if another + * thread opens a file or creates a socket in the time window between us + * calling close() and accept(). + */ +static int uv__emfile_trick(uv_loop_t* loop, int accept_fd) { int fd; - uv_stream_t* stream = container_of(w, uv_stream_t, read_watcher); + int r; + if (loop->emfile_fd == -1) + return -1; + + close(loop->emfile_fd); + + for (;;) { + fd = uv__accept(accept_fd); + + if (fd != -1) { + close(fd); + continue; + } + + if (errno == EINTR) + continue; + + if (errno == EAGAIN || errno == EWOULDBLOCK) + r = 0; + else + r = -1; + + loop->emfile_fd = uv__open_cloexec("/", O_RDONLY); + + return r; + } +} + + +void uv__server_io(uv_loop_t* loop, uv__io_t* w, int events) { + static __read_mostly int use_emfile_trick = -1; + uv_stream_t* stream; + int fd; + int r; + + stream = container_of(w, uv_stream_t, read_watcher); assert(events == UV__IO_READ); assert(!(stream->flags & UV_CLOSING)); @@ -389,28 +461,48 @@ void uv__server_io(uv_loop_t* loop, uv__io_t* w, int events) { assert(stream->accepted_fd < 0); fd = uv__accept(stream->fd); - if (fd < 0) { - if (errno == EAGAIN || errno == EWOULDBLOCK) { - /* No problem. */ - return; - } else if (errno == ECONNABORTED) { - /* ignore */ - continue; - } else { - uv__set_sys_error(stream->loop, errno); - stream->connection_cb((uv_stream_t*)stream, -1); - } - } else { - stream->accepted_fd = fd; - stream->connection_cb(stream, 0); + if (fd == -1) { + switch (errno) { +#if EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + case EAGAIN: + return; /* Not an error. */ - if (stream->accepted_fd != -1 || - (stream->type == UV_TCP && stream->flags == UV_TCP_SINGLE_ACCEPT)) { - /* The user hasn't yet accepted called uv_accept() */ - uv__io_stop(stream->loop, &stream->read_watcher); - break; + case ECONNABORTED: + continue; /* Ignore. */ + + case EMFILE: + case ENFILE: + if (use_emfile_trick == -1) { + const char* val = getenv("UV_ACCEPT_EMFILE_TRICK"); + use_emfile_trick = (val == NULL || atoi(val) != 0); + } + + if (use_emfile_trick) { + SAVE_ERRNO(r = uv__emfile_trick(loop, stream->fd)); + if (r == 0) + continue; + } + + /* Fall through. */ + + default: + uv__set_sys_error(loop, errno); + stream->connection_cb(stream, -1); + continue; } } + + stream->accepted_fd = fd; + stream->connection_cb(stream, 0); + + if (stream->accepted_fd != -1 || + (stream->type == UV_TCP && stream->flags == UV_TCP_SINGLE_ACCEPT)) { + /* The user hasn't yet accepted called uv_accept() */ + uv__io_stop(loop, &stream->read_watcher); + break; + } } if (stream->fd != -1 &&