unix: rethink relaxed accept() approach

Benchmarks demonstrated that the idle timer handle approach didn't balance the
load quite fair enough, the majority of new connections still ended up in one
or two processes.

The new approach voluntarily gives up a scheduler timeslice by calling
nanosleep() with a one nanosecond timeout.

Why not sched_yield()? Because on Linux (and this is probably true for other
Unices as well), sched_yield() only yields if there are other processes running
on the same CPU.

nanosleep() on the other hand always forces the process to sleep, which gives
other processes a chance to accept our pending connections.
This commit is contained in:
Ben Noordhuis 2012-09-18 00:04:50 +02:00
parent 37dc7472d7
commit be2a2176ce
3 changed files with 10 additions and 59 deletions

View File

@ -185,8 +185,7 @@ typedef struct {
int fd; \ int fd; \
UV_STREAM_PRIVATE_PLATFORM_FIELDS \ UV_STREAM_PRIVATE_PLATFORM_FIELDS \
#define UV_TCP_PRIVATE_FIELDS \ #define UV_TCP_PRIVATE_FIELDS /* empty */
uv_idle_t* idle_handle; /* for UV_TCP_SINGLE_ACCEPT handles */ \
#define UV_UDP_PRIVATE_FIELDS \ #define UV_UDP_PRIVATE_FIELDS \
int fd; \ int fd; \

View File

@ -386,16 +386,6 @@ void uv__stream_destroy(uv_stream_t* stream) {
} }
static void uv__next_accept(uv_idle_t* idle, int status) {
uv_stream_t* stream = idle->data;
uv_idle_stop(idle);
if (stream->accepted_fd == -1)
uv__io_start(stream->loop, &stream->read_watcher);
}
/* Implements a best effort approach to mitigating accept() EMFILE errors. /* Implements a best effort approach to mitigating accept() EMFILE errors.
* We have a spare file descriptor stashed away that we close to get below * We have a spare file descriptor stashed away that we close to get below
* the EMFILE limit. Next, we accept all pending connections and close them * the EMFILE limit. Next, we accept all pending connections and close them
@ -497,40 +487,17 @@ void uv__server_io(uv_loop_t* loop, uv__io_t* w, int events) {
stream->accepted_fd = fd; stream->accepted_fd = fd;
stream->connection_cb(stream, 0); stream->connection_cb(stream, 0);
if (stream->accepted_fd != -1 || if (stream->accepted_fd != -1) {
(stream->type == UV_TCP && stream->flags == UV_TCP_SINGLE_ACCEPT)) {
/* The user hasn't yet accepted called uv_accept() */ /* The user hasn't yet accepted called uv_accept() */
uv__io_stop(loop, &stream->read_watcher); uv__io_stop(loop, &stream->read_watcher);
break; return;
}
} }
if (stream->fd != -1 && if (stream->type == UV_TCP && (stream->flags & UV_TCP_SINGLE_ACCEPT)) {
stream->accepted_fd == -1 && /* Give other processes a chance to accept connections. */
(stream->type == UV_TCP && stream->flags == UV_TCP_SINGLE_ACCEPT)) struct timespec timeout = { 0, 1 };
{ nanosleep(&timeout, NULL);
/* Defer the next accept() syscall to the next event loop tick. }
* This lets us guarantee fair load balancing in in multi-process setups.
* The problem is as follows:
*
* 1. Multiple processes listen on the same socket.
* 2. The OS scheduler commonly gives preference to one process to
* avoid task switches.
* 3. That process therefore accepts most of the new connections,
* leading to a (sometimes very) unevenly distributed load.
*
* Here is how we mitigate this issue:
*
* 1. Accept a connection.
* 2. Start an idle watcher.
* 3. Don't accept new connections until the idle callback fires.
*
* This works because the callback only fires when there have been
* no recent events, i.e. none of the watched file descriptors have
* recently been readable or writable.
*/
uv_tcp_t* tcp = (uv_tcp_t*) stream;
uv_idle_start(tcp->idle_handle, uv__next_accept);
} }
} }

View File

@ -30,7 +30,6 @@
int uv_tcp_init(uv_loop_t* loop, uv_tcp_t* tcp) { int uv_tcp_init(uv_loop_t* loop, uv_tcp_t* tcp) {
uv__stream_init(loop, (uv_stream_t*)tcp, UV_TCP); uv__stream_init(loop, (uv_stream_t*)tcp, UV_TCP);
tcp->idle_handle = NULL;
return 0; return 0;
} }
@ -245,20 +244,9 @@ int uv_tcp_listen(uv_tcp_t* tcp, int backlog, uv_connection_cb cb) {
single_accept = (val == NULL) || (atoi(val) != 0); /* on by default */ single_accept = (val == NULL) || (atoi(val) != 0); /* on by default */
} }
if (!single_accept) if (single_accept)
goto no_single_accept;
tcp->idle_handle = malloc(sizeof(*tcp->idle_handle));
if (tcp->idle_handle == NULL)
return uv__set_sys_error(tcp->loop, ENOMEM);
if (uv_idle_init(tcp->loop, tcp->idle_handle))
abort();
tcp->idle_handle->flags |= UV__HANDLE_INTERNAL;
tcp->flags |= UV_TCP_SINGLE_ACCEPT; tcp->flags |= UV_TCP_SINGLE_ACCEPT;
no_single_accept:
if (maybe_new_socket(tcp, AF_INET, UV_STREAM_READABLE)) if (maybe_new_socket(tcp, AF_INET, UV_STREAM_READABLE))
return -1; return -1;
@ -397,8 +385,5 @@ int uv_tcp_simultaneous_accepts(uv_tcp_t* handle, int enable) {
void uv__tcp_close(uv_tcp_t* handle) { void uv__tcp_close(uv_tcp_t* handle) {
if (handle->idle_handle)
uv_close((uv_handle_t*)handle->idle_handle, (uv_close_cb)free);
uv__stream_close((uv_stream_t*)handle); uv__stream_close((uv_stream_t*)handle);
} }