hiredis/parser.c
2011-09-30 18:18:07 +02:00

524 lines
17 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include "parser.h"
#define ERRNO(code) RPE_##code
#define SET_ERRNO(code) do { \
parser->err = ERRNO(code); \
} while(0)
/* The redis_protocol argument to the callback function must be provided by
* the caller because "ISO C99 requires rest arguments to be used". */
#define CALLBACK(X, ...) do { \
if (callbacks && callbacks->on_##X) { \
if (callbacks->on_##X(parser, __VA_ARGS__) != 0) { \
SET_ERRNO(CALLBACK); \
goto error; \
} \
} \
} while(0)
#define RESET_PROTOCOL(ptr) do { \
redis_protocol *__tmp = (ptr); \
__tmp->poff = 0; \
__tmp->plen = 0; \
__tmp->coff = 0; \
__tmp->clen = 0; \
__tmp->type = 0; \
__tmp->size = -1; \
__tmp->cursor = -1; \
__tmp->data = NULL; \
} while(0)
#define REDIS_PARSER_STATES(_X) \
_X(unused) /* = 0 in enum */ \
_X(type_char) \
_X(integer_start) \
_X(integer_pos_19) \
_X(integer_pos_09) \
_X(integer_neg_19) \
_X(integer_neg_09) \
_X(integer_cr) \
_X(integer_lf) \
_X(bulk) \
_X(bulk_cr) \
_X(bulk_lf) \
_X(line) \
_X(line_lf) \
#define _GEN(name) s_##name,
enum state {
REDIS_PARSER_STATES(_GEN)
};
#undef _GEN
#define _GEN(code, description) description,
static const char *strerror_map[] = {
REDIS_PARSER_ERRNO_MAP(_GEN)
};
#undef _GEN
#define STATE(st) \
case s_##st: \
l_##st: \
state = s_##st; \
if (pos >= end) { /* No more data */ \
goto finalize; \
}
#define ADVANCE(bytes) do { \
pos += (bytes); nread += (bytes); \
} while(0)
#define MOVE(st) do { \
goto l_##st; \
} while(0)
#define ADVANCE_AND_MOVE(st) do { \
ADVANCE(1); \
MOVE(st); \
} while(0)
#ifdef DEBUG
#define _ENUM_GEN(s) #s,
static const char *state_str[] = {
REDIS_PARSER_STATES(_ENUM_GEN)
};
#undef _ENUM_GEN
#define LOG(fmt, ...) do { \
fprintf(stderr, fmt "\n", __VA_ARGS__); \
fflush(stderr); \
} while(0)
#include <ctype.h>
/* Can hold 10 char representations per LOG call */
static char _chrtos_buf[10][8];
static int _chrtos_idx = 0;
static const char *chrtos(char byte) {
char *buf = _chrtos_buf[_chrtos_idx++ %
(sizeof(_chrtos_buf) / sizeof(_chrtos_buf[0]))];
switch(byte) {
case '\\':
case '"':
sprintf(buf,"\\%c",byte);
break;
case '\n': sprintf(buf,"\\n"); break;
case '\r': sprintf(buf,"\\r"); break;
case '\t': sprintf(buf,"\\t"); break;
case '\a': sprintf(buf,"\\a"); break;
case '\b': sprintf(buf,"\\b"); break;
default:
if (isprint(byte))
sprintf(buf,"%c",byte);
else
sprintf(buf,"\\x%02x",(unsigned char)byte);
break;
}
return buf;
}
#else
#define LOG(fmt, ...) do { ; } while (0)
#endif
void redis_parser_init(redis_parser *parser, const redis_parser_callbacks *callbacks) {
parser->stackidx = -1;
parser->callbacks = callbacks;
parser->err = 0;
}
/* Execute the parser against len bytes in buf. When a full message was read,
* the "dst" pointer is populated with the address of the root payload object.
* This pointer is set to NULL when no full message could be parsed.
*
* This function returns the number of bytes that could be parsed. When no full
* message was parsed and the return value is smaller than the number of bytes
* that were available, an error occured and the parser should be
* re-initialized before parsing more data.
*/
size_t redis_parser_execute(redis_parser *parser, redis_protocol **dst, const char *buf, size_t len) {
redis_protocol *stack = parser->stack;
const redis_parser_callbacks *callbacks = parser->callbacks;
const char *pos;
const char *end;
size_t nread;
int stackidx;
unsigned char state;
struct redis_parser_int64_s i64;
redis_protocol *cur;
/* Abort immediately if the parser is in an error state. It should be
* re-initialized before attempting to execute it with new data. */
if (parser->err) {
return 0;
}
/* Reset destination */
if (dst) *dst = NULL;
/* Reset root protocol object for new messages */
if (parser->stackidx == -1) {
RESET_PROTOCOL(&stack[0]);
parser->nread = 0;
parser->stackidx = 0;
parser->state = s_type_char;
}
pos = buf;
end = buf+len;
nread = parser->nread;
stackidx = parser->stackidx;
state = parser->state;
i64 = parser->i64;
while (pos < end && stackidx >= 0) {
cur = &stack[stackidx];
cur->parent = stackidx > 0 ? &stack[stackidx-1] : NULL;
switch (state) {
STATE(type_char) {
cur->poff = nread;
switch (*pos) {
case '$':
cur->type = REDIS_STRING;
ADVANCE_AND_MOVE(integer_start);
case '*':
cur->type = REDIS_ARRAY;
ADVANCE_AND_MOVE(integer_start);
case ':':
cur->type = REDIS_INTEGER;
ADVANCE_AND_MOVE(integer_start);
case '+':
cur->type = REDIS_STATUS;
cur->cursor = 0;
ADVANCE_AND_MOVE(line);
case '-':
cur->type = REDIS_ERROR;
cur->cursor = 0;
ADVANCE_AND_MOVE(line);
}
SET_ERRNO(INVALID_TYPE);
goto error;
}
STATE(integer_start) {
char ch = *pos;
i64.i64 = 0;
i64.ui64 = 0;
/* Start unsigned, thus positive */
if (ch >= '1' && ch <= '9') {
i64.ui64 = ch - '0';
ADVANCE_AND_MOVE(integer_pos_09);
}
/* Start with negative sign */
if (ch == '-') {
ADVANCE_AND_MOVE(integer_neg_19);
}
/* Start with positive sign */
if (ch == '+') {
ADVANCE_AND_MOVE(integer_pos_19);
}
/* Single integer character is a zero */
if (ch == '0') {
ADVANCE_AND_MOVE(integer_cr);
}
SET_ERRNO(INVALID_INT);
goto error;
}
STATE(integer_pos_19) {
char ch = *pos;
if (ch >= '1' && ch <= '9') {
i64.ui64 = ch - '0';
ADVANCE_AND_MOVE(integer_pos_09);
}
SET_ERRNO(INVALID_INT);
goto error;
}
STATE(integer_pos_09) {
char ch = *pos;
if (ch >= '0' && ch <= '9') {
if (i64.ui64 > ((uint64_t)INT64_MAX / 10)) { /* Overflow */
SET_ERRNO(OVERFLOW);
goto error;
}
i64.ui64 *= 10;
if (i64.ui64 > ((uint64_t)INT64_MAX - (ch - '0'))) { /* Overflow */
SET_ERRNO(OVERFLOW);
goto error;
}
i64.ui64 += ch - '0';
ADVANCE_AND_MOVE(integer_pos_09);
} else if (ch == '\r') {
i64.i64 = i64.ui64;
ADVANCE_AND_MOVE(integer_lf);
}
SET_ERRNO(INVALID_INT);
goto error;
}
STATE(integer_neg_19) {
char ch = *pos;
if (ch >= '1' && ch <= '9') {
i64.ui64 = ch - '0';
ADVANCE_AND_MOVE(integer_neg_09);
}
SET_ERRNO(INVALID_INT);
goto error;
}
STATE(integer_neg_09) {
char ch = *pos;
if (ch >= '0' && ch <= '9') {
if (i64.ui64 > (((uint64_t)-(INT64_MIN+1)+1) / 10)) { /* Overflow */
SET_ERRNO(OVERFLOW);
goto error;
}
i64.ui64 *= 10;
if (i64.ui64 > (((uint64_t)-(INT64_MIN+1)+1) - (ch - '0'))) { /* Overflow */
SET_ERRNO(OVERFLOW);
goto error;
}
i64.ui64 += ch - '0';
ADVANCE_AND_MOVE(integer_neg_09);
} else if (ch == '\r') {
i64.i64 = -i64.ui64;
ADVANCE_AND_MOVE(integer_lf);
}
SET_ERRNO(INVALID_INT);
goto error;
}
STATE(integer_cr) {
if (*pos == '\r') {
ADVANCE_AND_MOVE(integer_lf);
}
SET_ERRNO(EXPECTED_CR);
goto error;
}
STATE(integer_lf) {
if (*pos != '\n') {
SET_ERRNO(EXPECTED_LF);
goto error;
}
/* Protocol length can be set regardless of type */
cur->plen = nread - cur->poff + 1; /* include \n */
if (cur->type == REDIS_STRING) {
if (i64.i64 < 0) { /* nil bulk */
cur->type = REDIS_NIL;
CALLBACK(nil, cur);
goto done;
}
/* Setup content offset and length */
cur->coff = nread + 1; /* include \n */
cur->clen = (unsigned)i64.i64;
cur->plen += cur->clen + 2; /* include \r\n */
/* Store size of complete bulk */
cur->size = (unsigned)i64.i64;
cur->cursor = 0;
ADVANCE_AND_MOVE(bulk);
}
if (cur->type == REDIS_ARRAY) {
if (i64.i64 < 0) { /* nil multi bulk */
cur->type = REDIS_NIL;
CALLBACK(nil, cur);
goto done;
}
/* Store size of complete multi bulk */
cur->size = (unsigned)i64.i64;
cur->cursor = 0;
CALLBACK(array, cur, cur->size);
goto done;
}
if (cur->type == REDIS_INTEGER) {
/* Setup content offset and length */
cur->coff = cur->poff + 1;
cur->clen = nread - cur->coff - 1; /* remove \r */
CALLBACK(integer, cur, i64.i64);
goto done;
}
assert(NULL && "unexpected object type in s_integer_lf");
}
STATE(bulk) {
size_t remaining = cur->size - cur->cursor;
size_t available = (end-pos);
/* Everything can be read */
if (remaining <= available) {
CALLBACK(string, cur, pos, remaining);
ADVANCE(remaining);
MOVE(bulk_cr);
}
/* Not everything can be read */
CALLBACK(string, cur, pos, available);
ADVANCE(available);
/* Add number of processed bytes to cursor */
cur->cursor += available;
goto finalize;
}
STATE(bulk_cr) {
if (*pos == '\r') {
ADVANCE_AND_MOVE(bulk_lf);
}
SET_ERRNO(EXPECTED_CR);
goto error;
}
STATE(bulk_lf) {
if (*pos == '\n') {
goto done;
}
SET_ERRNO(EXPECTED_LF);
goto error;
}
STATE(line) {
const char *mark = pos;
/* Remove tight loop and add function-wide "line mark" once
* limits on line length are added. */
while(pos < end) {
if (*pos == '\r') {
cur->coff = cur->poff + 1;
cur->clen = nread - cur->coff;
CALLBACK(string, cur, mark, pos-mark);
ADVANCE_AND_MOVE(line_lf);
}
ADVANCE(1);
}
/* No more data */
CALLBACK(string, cur, mark, pos-mark);
/* Add number of processed bytes to cursor */
cur->cursor += pos-mark;
goto finalize;
}
STATE(line_lf) {
if (*pos == '\n') {
cur->plen = nread - cur->poff + 1; /* include \n */
goto done;
}
SET_ERRNO(EXPECTED_LF);
goto error;
}
}
/* Transitions should be made from within the switch */
assert(NULL && "invalid code path");
done:
/* Message is done when root object is done */
do {
/* Move to nested object when we see an incomplete array */
if (cur->type == REDIS_ARRAY && (cur->cursor < cur->size)) {
RESET_PROTOCOL(&stack[++stackidx]);
break;
}
/* Aggregate plen for nested objects */
if (stackidx > 0) {
assert(stack[stackidx-1].type == REDIS_ARRAY);
stack[stackidx-1].plen += cur->plen;
stack[stackidx-1].cursor++;
}
cur = &stack[--stackidx];
} while (stackidx >= 0);
/* Always move back to start state */
state = s_type_char;
ADVANCE(1);
continue;
}
/* Set destination pointer when full message was read */
if (stackidx == -1) {
if (dst) *dst = &stack[0];
}
finalize:
parser->nread = nread;
parser->stackidx = stackidx;
parser->state = state;
parser->i64 = i64;
return pos-buf;
error:
if (parser->err == ERRNO(OK)) {
SET_ERRNO(UNKNOWN);
}
return pos-buf;
}
void redis_parser_destroy(redis_parser *parser) {
/* Only trigger destroy callback when the parser has unfinished
* redis_protocol instances. */
if (parser->stackidx >= 0 &&
parser->callbacks &&
parser->callbacks->destroy)
{
parser->callbacks->destroy(parser, redis_parser_root(parser));
}
}
redis_protocol *redis_parser_root(redis_parser *parser) {
return &parser->stack[0];
}
enum redis_parser_errno redis_parser_err(redis_parser *parser) {
return parser->err;
}
const char *redis_parser_strerror(enum redis_parser_errno err) {
if (err < (sizeof(strerror_map)/sizeof(strerror_map[0])))
return strerror_map[err];
return NULL;
}