idna: fix OOB read in punycode decoder

libuv was vulnerable to out-of-bounds reads in the uv__idna_toascii() function which is used to convert strings to ASCII. This is called by the DNS resolution function and can lead to information disclosures or crashes. Reported by Eric Sesterhenn in collaboration with Cure53 and ExpressVPN. Reported-By: Eric Sesterhenn <eric.sesterhenn@x41-dsec.de> Fixes: https://github.com/libuv/libuv/issues/3147 PR-URL: https://github.com/libuv/libuv-private/pull/1 Refs: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22918 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Richard Lau <riclau@uk.ibm.com>
2021-05-21 11:23:36 +02:00 · 2021-05-21 11:23:36 +02:00 · b7466e31e4
commit b7466e31e4
parent 4a27d87a69
3 changed files with 57 additions and 13 deletions
--- a/src/idna.c
+++ b/src/idna.c
@ -19,6 +19,7 @@

 #include "uv.h"
 #include "idna.h"
+#include <assert.h>
 #include <string.h>

 static unsigned uv__utf8_decode1_slow(const char** p,
@ -32,7 +33,7 @@ static unsigned uv__utf8_decode1_slow(const char** p,
  if (a > 0xF7)
    return -1;

-  switch (*p - pe) {
+  switch (pe - *p) {
  default:
    if (a > 0xEF) {
      min = 0x10000;
@ -62,6 +63,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
      a = 0;
      break;
    }
+    /* Fall through. */
+  case 0:
    return -1;  /* Invalid continuation byte. */
  }

@ -88,6 +91,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
 unsigned uv__utf8_decode1(const char** p, const char* pe) {
  unsigned a;

+  assert(*p < pe);
+
  a = (unsigned char) *(*p)++;

  if (a < 128)
@ -96,9 +101,6 @@ unsigned uv__utf8_decode1(const char** p, const char* pe) {
  return uv__utf8_decode1_slow(p, pe, a);
 }

-#define foreach_codepoint(c, p, pe) \
-  for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
-
 static int uv__idna_toascii_label(const char* s, const char* se,
                                  char** d, char* de) {
  static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
@ -121,15 +123,22 @@ static int uv__idna_toascii_label(const char* s, const char* se,
  ss = s;
  todo = 0;

-  foreach_codepoint(c, &s, se) {
+  /* Note: after this loop we've visited all UTF-8 characters and know
+   * they're legal so we no longer need to check for decode errors.
+   */
+  while (s < se) {
+    c = uv__utf8_decode1(&s, se);
+
+    if (c == -1u)
+      return UV_EINVAL;
+
    if (c < 128)
      h++;
-    else if (c == (unsigned) -1)
-      return UV_EINVAL;
    else
      todo++;
  }

+  /* Only write "xn--" when there are non-ASCII characters. */
  if (todo > 0) {
    if (*d < de) *(*d)++ = 'x';
    if (*d < de) *(*d)++ = 'n';
@ -137,9 +146,13 @@ static int uv__idna_toascii_label(const char* s, const char* se,
    if (*d < de) *(*d)++ = '-';
  }

+  /* Write ASCII characters. */
  x = 0;
  s = ss;
-  foreach_codepoint(c, &s, se) {
+  while (s < se) {
+    c = uv__utf8_decode1(&s, se);
+    assert(c != -1u);
+
    if (c > 127)
      continue;

@ -166,10 +179,15 @@ static int uv__idna_toascii_label(const char* s, const char* se,
  while (todo > 0) {
    m = -1;
    s = ss;
-    foreach_codepoint(c, &s, se)
+
+    while (s < se) {
+      c = uv__utf8_decode1(&s, se);
+      assert(c != -1u);
+
      if (c >= n)
        if (c < m)
          m = c;
+    }

    x = m - n;
    y = h + 1;
@ -181,7 +199,10 @@ static int uv__idna_toascii_label(const char* s, const char* se,
    n = m;

    s = ss;
-    foreach_codepoint(c, &s, se) {
+    while (s < se) {
+      c = uv__utf8_decode1(&s, se);
+      assert(c != -1u);
+
      if (c < n)
        if (++delta == 0)
          return UV_E2BIG;  /* Overflow. */
@ -245,8 +266,6 @@ static int uv__idna_toascii_label(const char* s, const char* se,
  return 0;
 }

-#undef foreach_codepoint
-
 long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
  const char* si;
  const char* st;
@ -256,10 +275,14 @@ long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {

  ds = d;

-  for (si = s; si < se; /* empty */) {
+  si = s;
+  while (si < se) {
    st = si;
    c = uv__utf8_decode1(&si, se);

+    if (c == -1u)
+      return UV_EINVAL;
+
    if (c != '.')
      if (c != 0x3002)  /* 。 */
        if (c != 0xFF0E)  /* ． */
--- a/test/test-idna.c
+++ b/test/test-idna.c
@ -96,6 +96,25 @@ TEST_IMPL(utf8_decode1) {
  return 0;
 }

+TEST_IMPL(utf8_decode1_overrun) {
+  const char* p;
+  char b[1];
+
+  /* Single byte. */
+  p = b;
+  b[0] = 0x7F;
+  ASSERT_EQ(0x7F, uv__utf8_decode1(&p, b + 1));
+  ASSERT_EQ(p, b + 1);
+
+  /* Multi-byte. */
+  p = b;
+  b[0] = 0xC0;
+  ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + 1));
+  ASSERT_EQ(p, b + 1);
+
+  return 0;
+}
+
 /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
 #ifndef __MVS__

--- a/test/test-list.h
+++ b/test/test-list.h
@ -528,6 +528,7 @@ TEST_DECLARE  (fork_threadpool_queue_work_simple)

 TEST_DECLARE  (idna_toascii)
 TEST_DECLARE  (utf8_decode1)
+TEST_DECLARE  (utf8_decode1_overrun)
 TEST_DECLARE  (uname)

 TEST_DECLARE  (metrics_idle_time)
@ -1124,6 +1125,7 @@ TASK_LIST_START
 #endif

  TEST_ENTRY  (utf8_decode1)
+  TEST_ENTRY  (utf8_decode1_overrun)
  TEST_ENTRY  (uname)

 /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */