From 1752791c9ea89dbf54e2a20a9d9f899119a2d179 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Tue, 20 Jun 2023 13:01:12 +0200 Subject: [PATCH] linux: work around io_uring IORING_OP_CLOSE bug (#4059) Work around a poorly understood bug in older kernels where closing a file descriptor pointing to /foo/bar results in ETXTBSY errors when trying to execve("/foo/bar") later on. The bug seems to have been fixed somewhere between 5.15.85 and 5.15.90. I couldn't pinpoint the responsible commit but good candidates are the several data race fixes. Interestingly, it seems to manifest only when running under Docker so the possibility of a Docker bug can't be completely ruled out either. This commit moves uv__kernel_version() from fs.c to linux.c because the latter now uses it more than the former. Fixes: https://github.com/nodejs/node/issues/48444 --- src/unix/fs.c | 26 -------------------------- src/unix/linux.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/src/unix/fs.c b/src/unix/fs.c index 80b6a239..17c2d8fa 100644 --- a/src/unix/fs.c +++ b/src/unix/fs.c @@ -57,7 +57,6 @@ #if defined(__linux__) # include -# include #endif #if defined(__sun) @@ -899,31 +898,6 @@ out: #ifdef __linux__ -unsigned uv__kernel_version(void) { - static _Atomic unsigned cached_version; - struct utsname u; - unsigned version; - unsigned major; - unsigned minor; - unsigned patch; - - version = atomic_load_explicit(&cached_version, memory_order_relaxed); - if (version != 0) - return version; - - if (-1 == uname(&u)) - return 0; - - if (3 != sscanf(u.release, "%u.%u.%u", &major, &minor, &patch)) - return 0; - - version = major * 65536 + minor * 256 + patch; - atomic_store_explicit(&cached_version, version, memory_order_relaxed); - - return version; -} - - /* Pre-4.20 kernels have a bug where CephFS uses the RADOS copy-from command * in copy_file_range() when it shouldn't. There is no workaround except to * fall back to a regular copy. diff --git a/src/unix/linux.c b/src/unix/linux.c index 29af6c07..48b9c2c4 100644 --- a/src/unix/linux.c +++ b/src/unix/linux.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -309,6 +310,31 @@ static struct watcher_root* uv__inotify_watchers(uv_loop_t* loop) { } +unsigned uv__kernel_version(void) { + static _Atomic unsigned cached_version; + struct utsname u; + unsigned version; + unsigned major; + unsigned minor; + unsigned patch; + + version = atomic_load_explicit(&cached_version, memory_order_relaxed); + if (version != 0) + return version; + + if (-1 == uname(&u)) + return 0; + + if (3 != sscanf(u.release, "%u.%u.%u", &major, &minor, &patch)) + return 0; + + version = major * 65536 + minor * 256 + patch; + atomic_store_explicit(&cached_version, version, memory_order_relaxed); + + return version; +} + + ssize_t uv__fs_copy_file_range(int fd_in, off_t* off_in, @@ -731,6 +757,17 @@ int uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req) { struct uv__io_uring_sqe* sqe; struct uv__iou* iou; + /* Work around a poorly understood bug in older kernels where closing a file + * descriptor pointing to /foo/bar results in ETXTBSY errors when trying to + * execve("/foo/bar") later on. The bug seems to have been fixed somewhere + * between 5.15.85 and 5.15.90. I couldn't pinpoint the responsible commit + * but good candidates are the several data race fixes. Interestingly, it + * seems to manifest only when running under Docker so the possibility of + * a Docker bug can't be completely ruled out either. Yay, computers. + */ + if (uv__kernel_version() < /* 5.15.90 */ 0x050F5A) + return 0; + iou = &uv__get_internal_fields(loop)->iou; sqe = uv__iou_get_sqe(iou, loop, req);