strparse: speed up the hex parser somewhat

Around 2.3x speed-up parsing many large hexadecimal numbers. The decimal and
octal parser get marginally faster.

Still very readable, compact and easy to follow code.

Tweaks

- combine the max and the overflow check, gains 3ns/num (use a separate
  check outside of the loop instead for max < base)
- one less indirection in the pointer, gains 3ns/num
- using the table lookup for hex nums, gains 5ns/num
- unfold the num_digit() macro, gains 3s/num
- use the hexasciitable unconditionally, gains 2ns/num
- use post-increment pointer in the table lookup, gains 1ns/num
- improved valid_digit() using the table for the hex case,
  gains 26 ns/num
- use "max char" in valid_digit(), gains 3ns/num

Behavior changes:

- no longer returns STRE_TOO_BIG - only STRE_OVERFLOW
- does not move the char ** on error, which is probably better

Updated and extended test 1664 (significantly).

Closes #16374
This commit is contained in:
Daniel Stenberg 2025-02-17 22:34:21 +01:00
parent 3fd1dfc829
commit ad700a0917
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
6 changed files with 310 additions and 56 deletions

View File

@ -995,7 +995,7 @@ static CURLcode cf_socket_ctx_init(struct cf_socket_ctx *ctx,
p = getenv("CURL_DBG_SOCK_RMAX");
if(p) {
curl_off_t l;
if(!Curl_str_number(&p, &l, SIZE_T_MAX))
if(!Curl_str_number(&p, &l, CURL_OFF_T_MAX))
ctx->recv_max = (size_t)l;
}
}

View File

@ -882,14 +882,14 @@ CURLcode Curl_conn_send(struct Curl_easy *data, int sockindex,
DEBUGASSERT(data->conn);
conn = data->conn;
#ifdef DEBUGBUILD
{
if(write_len) {
/* Allow debug builds to override this logic to force short sends
*/
const char *p = getenv("CURL_SMALLSENDS");
if(p) {
curl_off_t altsize;
if(!Curl_str_number(&p, &altsize, SIZE_T_MAX))
write_len = CURLMIN(write_len, (size_t)altsize);
if(!Curl_str_number(&p, &altsize, write_len))
write_len = (size_t)altsize;
}
}
#endif

View File

@ -195,11 +195,13 @@ static CURLcode xfer_send(struct Curl_easy *data,
/* Allow debug builds to override this logic to force short initial
sends */
size_t body_len = blen - hds_len;
const char *p = getenv("CURL_SMALLREQSEND");
if(p) {
curl_off_t body_small;
if(!Curl_str_number(&p, &body_small, body_len))
blen = hds_len + (size_t)body_small;
if(body_len) {
const char *p = getenv("CURL_SMALLREQSEND");
if(p) {
curl_off_t body_small;
if(!Curl_str_number(&p, &body_small, body_len))
blen = hds_len + (size_t)body_small;
}
}
}
#endif

View File

@ -104,40 +104,57 @@ int Curl_str_singlespace(const char **linep)
return Curl_str_single(linep, ' ');
}
/* given an ASCII hexadecimal character, return the value */
#define HEXDIGIT2NUM(x) \
(((x) > '9') ? Curl_raw_tolower(x) - 'a' + 10 : x - '0')
/* given an ASCII character and a given base, return TRUE if valid */
#define valid_digit(digit, base) \
(((base == 10) && ISDIGIT(digit)) || \
((base == 16) && ISXDIGIT(digit)) || \
((base == 8) && ISODIGIT(digit)))
/* given an ASCII character and a given base, return the value */
#define num_digit(digit, base) \
((base != 16) ? digit - '0' : HEXDIGIT2NUM(digit))
/* given an ASCII character and max ascii, return TRUE if valid */
#define valid_digit(x,m) \
(((x) >= '0') && ((x) <= m) && hexasciitable[(x)-'0'])
/* no support for 0x prefix nor leading spaces */
static int str_num_base(const char **linep, curl_off_t *nump, curl_off_t max,
int base) /* 8, 10 or 16, nothing else */
{
/* We use 16 for the zero index (and the necessary bitwise AND in the loop)
to be able to have a non-zero value there to make valid_digit() able to
use the info */
static const unsigned char hexasciitable[] = {
16, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* 0x30: 0 - 9 */
0, 0, 0, 0, 0, 0, 0,
10, 11, 12, 13, 14, 15, /* 0x41: A - F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
10, 11, 12, 13, 14, 15 /* 0x61: a - f */
};
curl_off_t num = 0;
const char *p;
int m = (base == 10) ? '9' : /* the largest digit possible */
(base == 16) ? 'f' : '7';
DEBUGASSERT(linep && *linep && nump);
DEBUGASSERT((base == 8) || (base == 10) || (base == 16));
DEBUGASSERT(max >= 0); /* mostly to catch SIZE_T_MAX, which is too large */
*nump = 0;
if(!valid_digit(**linep, base))
p = *linep;
if(!valid_digit(*p, m))
return STRE_NO_NUM;
do {
int n = num_digit(**linep, base);
if(num > ((CURL_OFF_T_MAX - n) / base))
return STRE_OVERFLOW;
num = num * base + n;
if(num > max)
return STRE_BIG; /** too big */
(*linep)++;
} while(valid_digit(**linep, base));
if(max < base) {
/* special-case low max scenario because check needs to be different */
do {
int n = hexasciitable[*p++ - '0'] & 0x0f;
num = num * base + n;
if(num > max)
return STRE_OVERFLOW;
} while(valid_digit(*p, m));
}
else {
do {
int n = hexasciitable[*p++ - '0'] & 0x0f;
if(num > ((max - n) / base))
return STRE_OVERFLOW;
num = num * base + n;
} while(valid_digit(*p, m));
}
*nump = num;
*linep = p;
return STRE_OK;
}

View File

@ -85,10 +85,10 @@ Curl_str_single
6: ("") 5, line 0
Curl_str_number
0: ("1") 0, [1] line 1
1: ("10000") 1, [0] line 4
1: ("10000") 7, [0] line 0
2: ("1234") 0, [1234] line 4
3: ("1235") 0, [1235] line 4
4: ("1236") 1, [0] line 3
4: ("1236") 7, [0] line 0
5: ("01234") 0, [1234] line 5
6: ("00000000000000000000000000001234") 0, [1234] line 32
7: ("0123 345") 0, [123] line 4
@ -96,31 +96,95 @@ Curl_str_number
9: ("-12") 8, [0] line 0
10: (" 123") 8, [0] line 0
11: ("") 8, [0] line 0
Curl_str_number varying max
0: ("00") max 8 == 0, [0]
1: ("1") max 8 == 0, [1]
2: ("1") max 1 == 0, [1]
3: ("2") max 1 == 7, [0]
4: ("2") max 2 == 0, [2]
5: ("5") max 6 == 0, [5]
6: ("000000000000000000000006") max 6 == 0, [6]
7: ("7") max 6 == 7, [0]
8: ("8") max 6 == 7, [0]
9: ("9") max 8 == 7, [0]
10: ("10") max 10 == 0, [10]
11: ("11") max 10 == 7, [0]
12: ("12") max 10 == 7, [0]
Curl_str_hex varying max
0: ("00") max 8 == 0, [0]
1: ("1") max 8 == 0, [1]
2: ("1") max 1 == 0, [1]
3: ("2") max 1 == 7, [0]
4: ("2") max 2 == 0, [2]
5: ("5") max 6 == 0, [5]
6: ("000000000000000000000006") max 6 == 0, [6]
7: ("7") max 6 == 7, [0]
8: ("8") max 6 == 7, [0]
9: ("9") max 8 == 7, [0]
10: ("a") max 14 == 0, [10]
11: ("b") max 14 == 0, [11]
12: ("c") max 14 == 0, [12]
13: ("d") max 14 == 0, [13]
14: ("e") max 14 == 0, [14]
15: ("f") max 14 == 7, [0]
16: ("f") max 15 == 0, [15]
17: ("10") max 16 == 0, [16]
18: ("11") max 16 == 7, [0]
19: ("12") max 16 == 7, [0]
Curl_str_octal varying max
0: ("00") max 4 == 0, [0]
1: ("1") max 4 == 0, [1]
2: ("1") max 4 == 0, [1]
3: ("2") max 4 == 0, [2]
4: ("3") max 4 == 0, [3]
5: ("4") max 4 == 0, [4]
6: ("5") max 4 == 7, [0]
7: ("000000000000000000000006") max 6 == 0, [6]
8: ("7") max 7 == 0, [7]
9: ("10") max 8 == 0, [8]
10: ("11") max 8 == 7, [0]
11: ("11") max 9 == 0, [9]
12: ("12") max 9 == 7, [0]
13: ("13") max 9 == 7, [0]
14: ("8") max 10 == 8, [0]
Curl_str_number / max
0: ("9223372036854775807") 0, [9223372036854775807] line 19
1: ("9223372036854775808") 7, [0] line 18
2: ("18446744073709551615") 7, [0] line 19
3: ("18446744073709551616") 7, [0] line 19
4: ("18446744073709551617") 7, [0] line 19
1: ("9223372036854775808") 7, [0] line 0
2: ("18446744073709551615") 7, [0] line 0
3: ("18446744073709551616") 7, [0] line 0
4: ("18446744073709551617") 7, [0] line 0
5: ("0123456799a") 0, [123456799] line 10
6: ("0123456789") 0, [123456789] line 10
7: ("123498760b") 0, [123498760] line 9
8: ("1234987607611298232") 0, [1234987607611298232] line 19
9: ("1111111111111111111") 0, [1111111111111111111] line 19
10: ("2222222222222222222") 0, [2222222222222222222] line 19
11: ("00000000000000000000000000000009223372036854775807") 0, [9223372036854775807] line 50
12: ("3333333333333333333") 0, [3333333333333333333] line 19
13: ("4444444444444444444") 0, [4444444444444444444] line 19
14: ("5555555555555555555") 0, [5555555555555555555] line 19
15: ("6666666666666666666") 0, [6666666666666666666] line 19
16: ("7777777777777777777") 0, [7777777777777777777] line 19
17: ("8888888888888888888") 0, [8888888888888888888] line 19
18: ("999999999999999999") 0, [999999999999999999] line 18
Curl_str_newline
0: ("a") 6, line 0
1: ("aa") 6, line 0
2: ("A") 6, line 0
3: ("b") 6, line 0
4: ("\") 6, line 0
5: (" ") 6, line 0
6: ("
") 0, line 1
7: (" ") 0, line 1
8: ("
") 0, line 1
9: ("") 6, line 0
0: (%61) 6, line 0
1: (%61) 6, line 0
2: (%41) 6, line 0
3: (%62) 6, line 0
4: (%5c) 6, line 0
5: (%20) 6, line 0
6: (%0a) 0, line 1
7: (%0d) 0, line 1
8: (%0d) 0, line 1
9: (%0c) 6, line 0
10: (%00) 6, line 0
Curl_str_hex
0: ("1") 0, [1] line 1
1: ("1000") 0, [4096] line 4
2: ("1234") 0, [4660] line 4
3: ("1235") 0, [4661] line 4
4: ("1236") 1, [0] line 3
4: ("1236") 7, [0] line 0
5: ("01234") 0, [4660] line 5
6: ("00000000000000000000000000001234") 0, [4660] line 32
7: ("0123 345") 0, [291] line 4
@ -133,7 +197,7 @@ Curl_str_octal
1: ("1000") 0, [512] line 4
2: ("1234") 0, [668] line 4
3: ("1235") 0, [669] line 4
4: ("1236") 1, [0] line 3
4: ("1236") 7, [0] line 0
5: ("01234") 0, [668] line 5
6: ("00000000000000000000000000001234") 0, [668] line 32
7: ("0123 345") 0, [83] line 4
@ -143,10 +207,34 @@ Curl_str_octal
11: ("") 8, [0] line 0
Curl_str_octal / max
0: ("777777777777777777777") 0, [9223372036854775807] line 21
1: ("1000000000000000000000") 7, [0] line 21
1: ("1000000000000000000000") 7, [0] line 0
2: ("111111111111111111111") 0, [1317624576693539401] line 21
3: ("222222222222222222222") 0, [2635249153387078802] line 21
4: ("333333333333333333333") 0, [3952873730080618203] line 21
5: ("444444444444444444444") 0, [5270498306774157604] line 21
6: ("555555555555555555555") 0, [6588122883467697005] line 21
7: ("666666666666666666666") 0, [7905747460161236406] line 21
Curl_str_hex / max
0: ("7FFFFFFFFFFFFFFF") 0, [9223372036854775807] line 16
1: ("8000000000000000") 7, [0] line 15
1: ("8000000000000000") 7, [0] line 0
2: ("1111111111111111") 0, [1229782938247303441] line 16
3: ("2222222222222222") 0, [2459565876494606882] line 16
4: ("3333333333333333") 0, [3689348814741910323] line 16
5: ("4444444444444444") 0, [4919131752989213764] line 16
6: ("5555555555555555") 0, [6148914691236517205] line 16
7: ("6666666666666666") 0, [7378697629483820646] line 16
8: ("7777777777777777") 0, [8608480567731124087] line 16
9: ("888888888888888") 0, [614891469123651720] line 15
10: ("999999999999999") 0, [691752902764108185] line 15
11: ("aaaaaaaaAAAAAAA") 0, [768614336404564650] line 15
12: ("bbbbbbbbBBBBBBB") 0, [845475770045021115] line 15
13: ("BBBBBBBBbbbbbbb") 0, [845475770045021115] line 15
14: ("ccccccccCCCCCCC") 0, [922337203685477580] line 15
15: ("ddddddddDDDDDDD") 0, [999198637325934045] line 15
16: ("eeeeeeeeEEEEEEE") 0, [1076060070966390510] line 15
17: ("ffffffffFFFFFFF") 0, [1152921504606846975] line 15
18: ("abcdef") 0, [11259375] line 6
19: ("ABCDEF") 0, [11259375] line 6
</stdout>
</verify>
</testcase>

View File

@ -205,6 +205,114 @@ UNITTEST_START
}
}
{
struct t {
const char *str;
curl_off_t max;
};
static struct t nums[] = {
{ "00", 8},
{ "1", 8},
{ "1", 1},
{ "2", 1},
{ "2", 2},
{ "5", 6},
{ "000000000000000000000006", 6},
{ "7", 6},
{ "8", 6},
{ "9", 8},
{ "10", 10},
{ "11", 10},
{ "12", 10},
{NULL, 0}
};
printf("Curl_str_number varying max\n");
for(i = 0; nums[i].str; i++) {
curl_off_t num;
const char *line = nums[i].str;
const char *orgline = line;
int rc = Curl_str_number(&line, &num, nums[i].max);
printf("%u: (\"%s\") max %" CURL_FORMAT_CURL_OFF_T
" == %d, [%" CURL_FORMAT_CURL_OFF_T "]\n",
i, orgline, nums[i].max, rc, num);
}
}
{
struct t {
const char *str;
curl_off_t max;
};
static struct t nums[] = {
{ "00", 8},
{ "1", 8},
{ "1", 1},
{ "2", 1},
{ "2", 2},
{ "5", 6},
{ "000000000000000000000006", 6},
{ "7", 6},
{ "8", 6},
{ "9", 8},
{ "a", 14},
{ "b", 14},
{ "c", 14},
{ "d", 14},
{ "e", 14},
{ "f", 14},
{ "f", 15},
{ "10", 16},
{ "11", 16},
{ "12", 16},
{NULL, 0}
};
printf("Curl_str_hex varying max\n");
for(i = 0; nums[i].str; i++) {
curl_off_t num;
const char *line = nums[i].str;
const char *orgline = line;
int rc = Curl_str_hex(&line, &num, nums[i].max);
printf("%u: (\"%s\") max %" CURL_FORMAT_CURL_OFF_T
" == %d, [%" CURL_FORMAT_CURL_OFF_T "]\n",
i, orgline, nums[i].max, rc, num);
}
}
{
struct t {
const char *str;
curl_off_t max;
};
static struct t nums[] = {
{ "00", 4},
{ "1", 4},
{ "1", 4},
{ "2", 4},
{ "3", 4},
{ "4", 4},
{ "5", 4},
{ "000000000000000000000006", 6},
{ "7", 7},
{ "10", 8},
{ "11", 8},
{ "11", 9},
{ "12", 9},
{ "13", 9},
{ "8", 10},
{NULL, 0}
};
printf("Curl_str_octal varying max\n");
for(i = 0; nums[i].str; i++) {
curl_off_t num;
const char *line = nums[i].str;
const char *orgline = line;
int rc = Curl_str_octal(&line, &num, nums[i].max);
printf("%u: (\"%s\") max %" CURL_FORMAT_CURL_OFF_T
" == %d, [%" CURL_FORMAT_CURL_OFF_T "]\n",
i, orgline, nums[i].max, rc, num);
}
}
{
/* CURL_OFF_T is typically 9223372036854775807 */
static const char *nums[] = {
@ -213,6 +321,20 @@ UNITTEST_START
"18446744073709551615", /* 2^64 - 1 */
"18446744073709551616", /* 2^64 */
"18446744073709551617", /* 2^64 + 1 */
"0123456799a",
"0123456789",
"123498760b",
"1234987607611298232",
"1111111111111111111",
"2222222222222222222",
"00000000000000000000000000000009223372036854775807",
"3333333333333333333",
"4444444444444444444",
"5555555555555555555",
"6666666666666666666",
"7777777777777777777",
"8888888888888888888",
"999999999999999999",
NULL
};
printf("Curl_str_number / max\n");
@ -237,6 +359,7 @@ UNITTEST_START
"\n",
"\r",
"\r\n",
"\x0c",
"",
NULL
};
@ -245,8 +368,8 @@ UNITTEST_START
const char *line = newl[i];
const char *orgline = line;
int rc = Curl_str_newline(&line);
printf("%u: (\"%s\") %d, line %d\n",
i, orgline, rc, (int)(line - orgline));
printf("%u: (%%%02x) %d, line %d\n",
i, *orgline, rc, (int)(line - orgline));
}
}
@ -309,6 +432,12 @@ UNITTEST_START
static const char *nums[] = {
"777777777777777777777", /* 2^63 -1 */
"1000000000000000000000", /* 2^63 */
"111111111111111111111",
"222222222222222222222",
"333333333333333333333",
"444444444444444444444",
"555555555555555555555",
"666666666666666666666",
NULL
};
printf("Curl_str_octal / max\n");
@ -327,6 +456,24 @@ UNITTEST_START
static const char *nums[] = {
"7FFFFFFFFFFFFFFF", /* 2^63 -1 */
"8000000000000000", /* 2^63 */
"1111111111111111",
"2222222222222222",
"3333333333333333",
"4444444444444444",
"5555555555555555",
"6666666666666666",
"7777777777777777",
"888888888888888",
"999999999999999",
"aaaaaaaaAAAAAAA",
"bbbbbbbbBBBBBBB",
"BBBBBBBBbbbbbbb",
"ccccccccCCCCCCC",
"ddddddddDDDDDDD",
"eeeeeeeeEEEEEEE",
"ffffffffFFFFFFF",
"abcdef",
"ABCDEF",
NULL
};
printf("Curl_str_hex / max\n");