unix,win: fix off-by-one in uv_wtf8_to_utf16() (#4609)

uv_wtf8_length_as_utf16() checks if codepoints are > 0xFFFF (to see if it should be encoded as a surrogate pair), therefore uv_wtf8_to_utf16() should too. Instead it checked > 0x1000. Harmonize the checks. Fixes: https://github.com/nodejs/node/issues/55914
2024-11-19 19:09:03 +01:00 · 2024-11-19 19:09:03 +01:00 · c6b67af390
commit c6b67af390
parent 7b75935b00
3 changed files with 15 additions and 1 deletions
--- a/src/idna.c
+++ b/src/idna.c
@ -393,7 +393,7 @@ void uv_wtf8_to_utf16(const char* source_ptr,
    code_point = uv__wtf8_decode1(&source_ptr);
    /* uv_wtf8_length_as_utf16 should have been called and checked first. */
    assert(code_point >= 0);
-    if (code_point > 0x10000) {
+    if (code_point > 0xFFFF) {
      assert(code_point < 0x10FFFF);
      *w_target++ = (((code_point - 0x10000) >> 10) + 0xD800);
      *w_target++ = ((code_point - 0x10000) & 0x3FF) + 0xDC00;
--- a/test/test-idna.c
+++ b/test/test-idna.c
@ -218,3 +218,15 @@ TEST_IMPL(idna_toascii) {
 #undef T

 #endif  /* __MVS__ */
+
+TEST_IMPL(wtf8) {
+  static const char input[] = "ᜄȺy𐞲:𞢢𘴇𐀀'¥3̞[<i$";
+  uint16_t buf[32];
+  ssize_t len;
+
+  len = uv_wtf8_length_as_utf16(input);
+  ASSERT_GT(len, 0);
+  ASSERT_LT(len, ARRAY_SIZE(buf));
+  uv_wtf8_to_utf16(input, buf, len);
+  return 0;
+}
--- a/test/test-list.h
+++ b/test/test-list.h
@ -572,6 +572,7 @@ TEST_DECLARE  (fork_threadpool_queue_work_simple)

 TEST_DECLARE  (iouring_pollhup)

+TEST_DECLARE  (wtf8)
 TEST_DECLARE  (idna_toascii)
 TEST_DECLARE  (utf8_decode1)
 TEST_DECLARE  (utf8_decode1_overrun)
@ -1223,6 +1224,7 @@ TASK_LIST_START

  TEST_ENTRY  (iouring_pollhup)

+  TEST_ENTRY  (wtf8)
  TEST_ENTRY  (utf8_decode1)
  TEST_ENTRY  (utf8_decode1_overrun)
  TEST_ENTRY  (uname)