lib: Add minimal JSON library This is a minimal JSON parser (and string encoder helper). This has been originally written for the NATS client sample project, but since it's a generic bit of code, it's also being provided as a library outside the NATS application source. It's limited (no support for arrays, nested objects, only integer numbers, etc), but it is sufficient for the NATS protocol to work. Jira: ZEP-1012 Change-Id: Ibfe64aa1884e8763576ec5862f77e81b4fd54b69 Signed-off-by: Leandro Pereira <leandro.pereira@intel.com>
diff --git a/lib/json/Kconfig b/lib/json/Kconfig new file mode 100644 index 0000000..755aa84 --- /dev/null +++ b/lib/json/Kconfig
@@ -0,0 +1,19 @@ +# Kconfig - JSON library + +# +# Copyright (c) 2016 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +menu "Additional libraries" + +config JSON_LIBRARY + bool + default N + prompt "Build JSON library" + help + Build a minimal JSON parsing/encoding library. Used by sample + applications such as the NATS client. + +endmenu
diff --git a/lib/json/Makefile b/lib/json/Makefile new file mode 100644 index 0000000..6ddb50a --- /dev/null +++ b/lib/json/Makefile
@@ -0,0 +1 @@ +obj-$(CONFIG_JSON_LIBRARY) = json.o
diff --git a/lib/json/json.c b/lib/json/json.c new file mode 100644 index 0000000..5e9591f --- /dev/null +++ b/lib/json/json.c
@@ -0,0 +1,564 @@ +/* + * Copyright (c) 2017 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "json.h" + +struct token { + enum json_tokens type; + char *start; + char *end; +}; + +struct lexer { + void *(*state)(struct lexer *lexer); + char *start; + char *pos; + char *end; + struct token token; +}; + +struct json_obj { + struct lexer lexer; +}; + +struct json_obj_key_value { + const char *key; + size_t key_len; + struct token value; +}; + +static bool lexer_consume(struct lexer *lexer, struct token *token, + enum json_tokens empty_token) +{ + if (lexer->token.type == empty_token) { + return false; + } + + *token = lexer->token; + lexer->token.type = empty_token; + + return true; +} + +static bool lexer_next(struct lexer *lexer, struct token *token) +{ + while (lexer->state) { + if (lexer_consume(lexer, token, JSON_TOK_NONE)) { + return true; + } + + lexer->state = lexer->state(lexer); + } + + return lexer_consume(lexer, token, JSON_TOK_EOF); +} + +static void *lexer_json(struct lexer *lexer); + +static void emit(struct lexer *lexer, enum json_tokens token) +{ + lexer->token.type = token; + lexer->token.start = lexer->start; + lexer->token.end = lexer->pos; + lexer->start = lexer->pos; +} + +static char next(struct lexer *lexer) +{ + if (lexer->pos >= lexer->end) { + lexer->pos = lexer->end + 1; + + return '\0'; + } + + return *lexer->pos++; +} + +static void ignore(struct lexer *lexer) +{ + lexer->start = lexer->pos; +} + +static void backup(struct lexer *lexer) +{ + lexer->pos--; +} + +static char peek(struct lexer *lexer) +{ + char chr = next(lexer); + + backup(lexer); + + return chr; +} + +static void *lexer_string(struct lexer *lexer) +{ + ignore(lexer); + + while (true) { + char chr = next(lexer); + + if (chr == '\0') { + emit(lexer, JSON_TOK_ERROR); + return NULL; + } + + if (chr == '\\') { + switch (next(lexer)) { + case '"': + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + continue; + case 'u': + if (!isxdigit(next(lexer))) { + goto error; + } + + if (!isxdigit(next(lexer))) { + goto error; + } + + if (!isxdigit(next(lexer))) { + goto error; + } + + if (!isxdigit(next(lexer))) { + goto error; + } + + break; + default: + goto error; + } + } + + if (chr == '"') { + backup(lexer); + emit(lexer, JSON_TOK_STRING); + + next(lexer); + ignore(lexer); + + return lexer_json; + } + } + +error: + emit(lexer, JSON_TOK_ERROR); + return NULL; +} + +static void *lexer_boolean(struct lexer *lexer) +{ + backup(lexer); + + switch (next(lexer)) { + case 't': + if (next(lexer) != 'r') { + goto error; + } + + if (next(lexer) != 'u') { + goto error; + } + + if (next(lexer) != 'e') { + goto error; + } + + emit(lexer, JSON_TOK_TRUE); + return lexer_json; + case 'f': + if (next(lexer) != 'a') { + goto error; + } + + if (next(lexer) != 'l') { + goto error; + } + + if (next(lexer) != 's') { + goto error; + } + + if (next(lexer) != 'e') { + goto error; + } + + emit(lexer, JSON_TOK_FALSE); + return lexer_json; + } + +error: + emit(lexer, JSON_TOK_ERROR); + return NULL; +} + +static void *lexer_null(struct lexer *lexer) +{ + if (next(lexer) != 'u') { + goto error; + } + + if (next(lexer) != 'l') { + goto error; + } + + if (next(lexer) != 'l') { + goto error; + } + + emit(lexer, JSON_TOK_NULL); + return lexer_json; + +error: + emit(lexer, JSON_TOK_ERROR); + return NULL; +} + + +static void *lexer_number(struct lexer *lexer) +{ + while (true) { + char chr = next(lexer); + + if (isdigit(chr) || chr == '.') { + continue; + } + + backup(lexer); + emit(lexer, JSON_TOK_NUMBER); + + return lexer_json; + } +} + +static void *lexer_json(struct lexer *lexer) +{ + while (true) { + char chr = next(lexer); + + switch (chr) { + case '\0': + emit(lexer, JSON_TOK_EOF); + return NULL; + case '}': + case '{': + case ',': + case ':': + emit(lexer, (enum json_tokens)chr); + return lexer_json; + case '"': + return lexer_string; + case 'n': + return lexer_null; + case 't': + case 'f': + return lexer_boolean; + case '-': + if (isdigit(peek(lexer))) { + return lexer_number; + } + + /* fallthrough */ + default: + if (isspace(chr)) { + continue; + } + + if (isdigit(chr)) { + return lexer_number; + } + + emit(lexer, JSON_TOK_ERROR); + return NULL; + } + } +} + +static void lexer_init(struct lexer *lexer, char *data, size_t len) +{ + lexer->state = lexer_json; + lexer->start = data; + lexer->pos = data; + lexer->end = data + len; + lexer->token.type = JSON_TOK_NONE; +} + +static int obj_init(struct json_obj *json, char *data, size_t len) +{ + struct token token; + + lexer_init(&json->lexer, data, len); + + if (!lexer_next(&json->lexer, &token)) { + return -EINVAL; + } + + if (token.type != JSON_TOK_OBJECT_START) { + return -EINVAL; + } + + return 0; +} + +static int obj_next(struct json_obj *json, struct json_obj_key_value *kv) +{ + struct token token; + + if (!lexer_next(&json->lexer, &token)) { + return -EINVAL; + } + + /* Match end of object or next key */ + switch (token.type) { + case JSON_TOK_OBJECT_END: + kv->key = NULL; + kv->key_len = 0; + kv->value = token; + + return 0; + case JSON_TOK_COMMA: + if (!lexer_next(&json->lexer, &token)) { + return -EINVAL; + } + + if (token.type != JSON_TOK_STRING) { + return -EINVAL; + } + + /* fallthrough */ + case JSON_TOK_STRING: + kv->key = token.start; + kv->key_len = (size_t)(token.end - token.start); + break; + default: + return -EINVAL; + } + + /* Match : after key */ + if (!lexer_next(&json->lexer, &token)) { + return -EINVAL; + } + + if (token.type != JSON_TOK_COLON) { + return -EINVAL; + } + + /* Match value */ + if (!lexer_next(&json->lexer, &kv->value)) { + return -EINVAL; + } + + switch (kv->value.type) { + case JSON_TOK_STRING: + case JSON_TOK_NUMBER: + case JSON_TOK_TRUE: + case JSON_TOK_FALSE: + case JSON_TOK_NULL: + return 0; + default: + return -EINVAL; + } +} + +static int decode_num(const struct token *token, int32_t *num) +{ + /* FIXME: strtod() is not available in newlib/minimal libc, + * so using strtol() here; this means no floating point + * numbers. + */ + char *endptr; + char prev_end; + + prev_end = *token->end; + *token->end = '\0'; + + errno = 0; + *num = strtol(token->start, &endptr, 10); + + *token->end = prev_end; + + if (errno != 0) { + return -errno; + } + + if (*endptr) { + return -EINVAL; + } + + return 0; +} + +static bool equivalent_types(enum json_tokens type1, enum json_tokens type2) +{ + if (type1 == JSON_TOK_TRUE || type1 == JSON_TOK_FALSE) { + return type2 == JSON_TOK_TRUE || type2 == JSON_TOK_FALSE; + } + + return type1 == type2; +} + +int json_obj_parse(char *payload, size_t len, + const struct json_obj_descr *descr, size_t descr_len, + void *val) +{ + struct json_obj obj; + struct json_obj_key_value kv; + int32_t decoded_fields = 0; + size_t i; + int ret; + + assert(descr_len < (sizeof(decoded_fields) * CHAR_BIT - 1)); + + ret = obj_init(&obj, payload, len); + if (ret < 0) { + return ret; + } + + while (!obj_next(&obj, &kv)) { + if (kv.value.type == JSON_TOK_OBJECT_END) { + if (decoded_fields == (1 << descr_len) - 1) { + return decoded_fields; + } + + return -EINVAL; + } + + for (i = 0; i < descr_len; i++) { + void *field = (char *)val + descr[i].offset; + + /* Field has been decoded already, skip */ + if (decoded_fields & (1 << i)) { + continue; + } + + /* Check if it's the i-th field */ + if (kv.key_len != descr[i].field_name_len) { + continue; + } + + if (memcmp(kv.key, descr[i].field_name, + descr[i].field_name_len)) { + continue; + } + + /* Is the value of the expected type? */ + if (!equivalent_types(kv.value.type, descr[i].type)) { + return -EINVAL; + } + + /* Store the decoded value */ + switch (descr[i].type) { + case JSON_TOK_FALSE: + case JSON_TOK_TRUE: { + bool *value = field; + + *value = descr[i].type == JSON_TOK_TRUE; + + break; + } + case JSON_TOK_NUMBER: { + int32_t *num = field; + + if (decode_num(&kv.value, num) < 0) { + return -EINVAL; + } + + break; + } + case JSON_TOK_STRING: { + char **str = field; + + *kv.value.end = '\0'; + *str = kv.value.start; + + break; + } + default: + return -EINVAL; + } + + decoded_fields |= 1<<i; + } + } + + return -EINVAL; +} + +static const char escapable[] = "\"\\/\b\f\n\r\t"; + +static int json_escape_internal(char *str, size_t *len, size_t buf_size) +{ + char tmp_buf[buf_size + 1]; + char *cur, *out = tmp_buf, *escape; + + for (cur = str; *cur; cur++) { + escape = memchr(escapable, *cur, sizeof(escapable) - 1); + if (escape) { + *out++ = '\\'; + *out++ = "\"\\/bfnrt"[escape - escapable]; + } else { + *out++ = *cur; + } + } + + *out = '\0'; + *len = out - tmp_buf; + memcpy(str, tmp_buf, *len); + + return 0; +} + +size_t json_calc_escaped_len(const char *str, size_t len) +{ + size_t escaped_len = len; + size_t pos; + + for (pos = 0; pos < len; pos++) { + if (memchr(escapable, str[pos], sizeof(escapable) - 1)) { + escaped_len++; + } + } + + return escaped_len; +} + +ssize_t json_escape(char *str, size_t *len, size_t buf_size) +{ + size_t escaped_len; + + escaped_len = json_calc_escaped_len(str, *len); + + if (escaped_len == *len) { + /* If no escape is necessary, don't bother using up temporary + * stack space to copy the string. + */ + return 0; + } + + if (escaped_len >= buf_size) { + return -ENOMEM; + } + + return json_escape_internal(str, len, escaped_len); +}
diff --git a/lib/json/json.h b/lib/json/json.h new file mode 100644 index 0000000..1934cf3 --- /dev/null +++ b/lib/json/json.h
@@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __JSON_H +#define __JSON_H + +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> + +enum json_tokens { + JSON_TOK_NONE = '_', + JSON_TOK_OBJECT_START = '{', + JSON_TOK_OBJECT_END = '}', + JSON_TOK_STRING = '"', + JSON_TOK_COLON = ':', + JSON_TOK_COMMA = ',', + JSON_TOK_NUMBER = '0', + JSON_TOK_TRUE = 't', + JSON_TOK_FALSE = 'f', + JSON_TOK_NULL = 'n', + JSON_TOK_ERROR = '!', + JSON_TOK_EOF = '\0', +}; + +struct json_obj_descr { + const char *field_name; + size_t field_name_len; + size_t offset; + + /* Valid values here: JSON_TOK_STRING, JSON_TOK_NUMBER, + * JSON_TOK_TRUE, JSON_TOK_FALSE. (All others ignored.) + */ + enum json_tokens type; +}; + +/** + * @brief Parses the JSON-encoded object pointer to by @param json, with + * size @param len, according to the descriptor pointed to by @param descr. + * Values are stored in a struct pointed to by @param val. Set up the + * descriptor like this: + * + * struct s { int foo; char *bar; } + * struct json_obj_descr descr[] = { + * { .field_name = "foo", + * .field_name_len = 3, + * .offset = offsetof(struct s, foo), + * .type = JSON_TOK_NUMBER }, + * { .field_name = "bar", + * .field_name_len = 3, + * .offset = offsetof(struct s, bar), + * .type = JSON_TOK_STRING } + * }; + * + * Since this parser is designed for machine-to-machine communications, + * some liberties were taken to simplify the design: (1) strings are not + * unescaped; (2) no UTF-8 validation is performed; (3) only integer + * numbers are supported; (4) nested objects are not supported, including + * arrays and objects within objects. + * + * @param json Pointer to JSON-encoded value to be parsed + * + * @param len Length of JSON-encoded value + * + * @param descr Pointer to the descriptor array + * + * @param descr_len Number of elements in the descriptor array. Must be less + * than 31 due to implementation detail reasons (if more fields are + * necessary, use two descriptors) + * + * @param val Pointer to the struct to hold the decoded values + * + * @return < 0 if error, bitmap of decoded fields on success (bit 0 + * is set if first field in the descriptor has been properly decoded, etc). + */ +int json_obj_parse(char *json, size_t len, + const struct json_obj_descr *descr, size_t descr_len, + void *val); + +/** + * @brief Escapes the string so it can be used to encode JSON objects + * + * @param str The string to escape; the escape string is stored the + * buffer pointed to by this parameter + * + * @param len Points to a size_t containing the size before and after + * the escaping process + * + * @param buf_size The size of buffer str points to + * + * @return 0 if string has been escaped properly, or -ENOMEM if there + * was not enough space to escape the buffer + */ +ssize_t json_escape(char *str, size_t *len, size_t buf_size); + +/** + * @brief Calculates the JSON-escaped string length + * + * @param str The string to analyze + * + * @param len String size + * + * @return The length str would have if it were escaped + */ +size_t json_calc_escaped_len(const char *str, size_t len); + +#endif /* __JSON_H */