From f38b84ea2b6b61d309c604faedd41ab3b0fcf16b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 4 Nov 2009 14:55:53 -0800 Subject: [PATCH] Implement JSON parsing and serialization. This will be used by the upcoming Open vSwitch configuration database. --- configure.ac | 2 + lib/automake.mk | 4 + lib/dynamic-string.c | 25 + lib/dynamic-string.h | 1 + lib/json.c | 1569 ++++++++++++++++++++++++++++++++++++++++++ lib/json.h | 122 ++++ lib/unicode.c | 38 + lib/unicode.h | 53 ++ lib/util.c | 55 ++ lib/util.h | 4 + tests/.gitignore | 1 + tests/automake.mk | 5 + tests/json.at | 297 ++++++++ tests/test-json.c | 160 +++++ tests/testsuite.at | 1 + 15 files changed, 2337 insertions(+) create mode 100644 lib/json.c create mode 100644 lib/json.h create mode 100644 lib/unicode.c create mode 100644 lib/unicode.h create mode 100644 tests/json.at create mode 100644 tests/test-json.c diff --git a/configure.ac b/configure.ac index c8eed7c9..fe676d92 100644 --- a/configure.ac +++ b/configure.ac @@ -38,6 +38,8 @@ AC_USE_SYSTEM_EXTENSIONS AC_C_BIGENDIAN AC_SYS_LARGEFILE +AC_SEARCH_LIBS([pow], [m]) + OVS_CHECK_COVERAGE OVS_CHECK_NDEBUG OVS_CHECK_NETLINK diff --git a/lib/automake.mk b/lib/automake.mk index 825395ac..9c540428 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -49,6 +49,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/hash.h \ lib/hmap.c \ lib/hmap.h \ + lib/json.c \ + lib/json.h \ lib/leak-checker.c \ lib/leak-checker.h \ lib/learning-switch.c \ @@ -107,6 +109,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/type-props.h \ lib/unixctl.c \ lib/unixctl.h \ + lib/unicode.c \ + lib/unicode.h \ lib/util.c \ lib/util.h \ lib/valgrind.h \ diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c index 9684ffad..531e53bb 100644 --- a/lib/dynamic-string.c +++ b/lib/dynamic-string.c @@ -71,6 +71,31 @@ ds_put_char(struct ds *ds, char c) *ds_put_uninit(ds, 1) = c; } +/* Appends unicode code point 'uc' to 'ds' in UTF-8 encoding. */ +void +ds_put_utf8(struct ds *ds, int uc) +{ + if (uc <= 0x7f) { + ds_put_char(ds, uc); + } else if (uc <= 0x7ff) { + ds_put_char(ds, 0xc0 | (uc >> 6)); + ds_put_char(ds, 0x80 | (uc & 0x3f)); + } else if (uc <= 0xffff) { + ds_put_char(ds, 0xe0 | (uc >> 12)); + ds_put_char(ds, 0x80 | ((uc >> 6) & 0x3f)); + ds_put_char(ds, 0x80 | (uc & 0x3f)); + } else if (uc <= 0x10ffff) { + ds_put_char(ds, 0xf0 | (uc >> 18)); + ds_put_char(ds, 0x80 | ((uc >> 12) & 0x3f)); + ds_put_char(ds, 0x80 | ((uc >> 6) & 0x3f)); + ds_put_char(ds, 0x80 | (uc & 0x3f)); + } else { + /* Invalid code point. Insert the Unicode general substitute + * REPLACEMENT CHARACTER. */ + ds_put_utf8(ds, 0xfffd); + } +} + void ds_put_char_multiple(struct ds *ds, char c, size_t n) { diff --git a/lib/dynamic-string.h b/lib/dynamic-string.h index a44e0b30..01b93c4e 100644 --- a/lib/dynamic-string.h +++ b/lib/dynamic-string.h @@ -40,6 +40,7 @@ void ds_truncate(struct ds *, size_t new_length); void ds_reserve(struct ds *, size_t min_length); char *ds_put_uninit(struct ds *, size_t n); void ds_put_char(struct ds *, char); +void ds_put_utf8(struct ds *, int uc); void ds_put_char_multiple(struct ds *, char, size_t n); void ds_put_buffer(struct ds *, const char *, size_t n); void ds_put_cstr(struct ds *, const char *); diff --git a/lib/json.c b/lib/json.c new file mode 100644 index 00000000..dfd6b845 --- /dev/null +++ b/lib/json.c @@ -0,0 +1,1569 @@ +/* + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "json.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "dynamic-string.h" +#include "hash.h" +#include "shash.h" +#include "unicode.h" +#include "util.h" + +/* The type of a JSON token. */ +enum json_token_type { + T_EOF = 0, + T_BEGIN_ARRAY = '[', + T_END_ARRAY = ']', + T_BEGIN_OBJECT = '{', + T_END_OBJECT = '}', + T_NAME_SEPARATOR = ':', + T_VALUE_SEPARATOR = ',', + T_FALSE = UCHAR_MAX + 1, + T_NULL, + T_TRUE, + T_INTEGER, + T_REAL, + T_STRING +}; + +/* A JSON token. + * + * RFC 4627 doesn't define a lexical structure for JSON but I believe this to + * be compliant with the standard. + */ +struct json_token { + enum json_token_type type; + union { + double real; + long long int integer; + const char *string; + } u; +}; + +enum json_lex_state { + JSON_LEX_START, /* Not inside a token. */ + JSON_LEX_NUMBER, /* Reading a number. */ + JSON_LEX_KEYWORD, /* Reading a keyword. */ + JSON_LEX_STRING, /* Reading a quoted string. */ + JSON_LEX_ESCAPE /* In a quoted string just after a "\". */ +}; + +enum json_parse_state { + JSON_PARSE_START, /* Beginning of input. */ + JSON_PARSE_END, /* End of input. */ + + /* Objects. */ + JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */ + JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */ + JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */ + JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */ + JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */ + + /* Arrays. */ + JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */ + JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */ + JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */ +}; + +struct json_parser_node { + struct json *json; +}; + +/* A JSON parser. */ +struct json_parser { + int flags; + + /* Lexical analysis. */ + enum json_lex_state lex_state; + struct ds buffer; /* Buffer for accumulating token text. */ + + /* Parsing. */ + enum json_parse_state parse_state; +#define JSON_MAX_HEIGHT 1000 + struct json_parser_node *stack; + size_t height, allocated_height; + char *member_name; + + /* Parse status. */ + bool done; + char *error; /* Error message, if any, null if none yet. */ +}; + +static struct json *json_create(enum json_type type); +static void json_parser_input(struct json_parser *, struct json_token *); + +static void json_error(struct json_parser *p, const char *format, ...) + PRINTF_FORMAT(2, 3); + +const char * +json_type_to_string(enum json_type type) +{ + switch (type) { + case JSON_NULL: + return "null"; + + case JSON_FALSE: + return "false"; + + case JSON_TRUE: + return "true"; + + case JSON_OBJECT: + return "object"; + + case JSON_ARRAY: + return "array"; + + case JSON_INTEGER: + case JSON_REAL: + return "number"; + + case JSON_STRING: + return "string"; + + case JSON_N_TYPES: + default: + return ""; + } +} + +/* Functions for manipulating struct json. */ + +struct json * +json_null_create(void) +{ + return json_create(JSON_NULL); +} + +struct json * +json_boolean_create(bool b) +{ + return json_create(b ? JSON_TRUE : JSON_FALSE); +} + +struct json * +json_string_create_nocopy(char *s) +{ + struct json *json = json_create(JSON_STRING); + json->u.string = s; + return json; +} + +struct json * +json_string_create(const char *s) +{ + return json_string_create_nocopy(xstrdup(s)); +} + +struct json * +json_array_create_empty(void) +{ + struct json *json = json_create(JSON_ARRAY); + json->u.array.elems = NULL; + json->u.array.n = 0; + json->u.array.n_allocated = 0; + return json; +} + +void +json_array_add(struct json *array_, struct json *element) +{ + struct json_array *array = json_array(array_); + if (array->n >= array->n_allocated) { + array->elems = x2nrealloc(array->elems, &array->n_allocated, + sizeof *array->elems); + } + array->elems[array->n++] = element; +} + +void +json_array_trim(struct json *array_) +{ + struct json_array *array = json_array(array_); + if (array->n < array->n_allocated){ + array->n_allocated = array->n; + array->elems = xrealloc(array->elems, array->n * sizeof *array->elems); + } +} + +struct json * +json_array_create(struct json **elements, size_t n) +{ + struct json *json = json_create(JSON_ARRAY); + json->u.array.elems = elements; + json->u.array.n = n; + json->u.array.n_allocated = n; + return json; +} + +struct json * +json_array_create_2(struct json *elem0, struct json *elem1) +{ + struct json **elems = xmalloc(2 * sizeof *elems); + elems[0] = elem0; + elems[1] = elem1; + return json_array_create(elems, 2); +} + +struct json * +json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2) +{ + struct json **elems = xmalloc(3 * sizeof *elems); + elems[0] = elem0; + elems[1] = elem1; + elems[2] = elem2; + return json_array_create(elems, 3); +} + +struct json * +json_object_create(void) +{ + struct json *json = json_create(JSON_OBJECT); + json->u.object = xmalloc(sizeof *json->u.object); + shash_init(json->u.object); + return json; +} + +struct json * +json_integer_create(long long int integer) +{ + struct json *json = json_create(JSON_INTEGER); + json->u.integer = integer; + return json; +} + +struct json * +json_real_create(double real) +{ + struct json *json = json_create(JSON_REAL); + json->u.real = real; + return json; +} + +void +json_object_put(struct json *json, const char *name, struct json *value) +{ + shash_add(json->u.object, name, value); +} + +void +json_object_put_string(struct json *json, const char *name, const char *value) +{ + json_object_put(json, name, json_string_create(value)); +} + +const char * +json_string(const struct json *json) +{ + assert(json->type == JSON_STRING); + return json->u.string; +} + +struct json_array * +json_array(const struct json *json) +{ + assert(json->type == JSON_ARRAY); + return (struct json_array *) &json->u.array; +} + +struct shash * +json_object(const struct json *json) +{ + assert(json->type == JSON_OBJECT); + return (struct shash *) json->u.object; +} + +bool +json_boolean(const struct json *json) +{ + assert(json->type == JSON_TRUE || json->type == JSON_FALSE); + return json->type == JSON_TRUE; +} + +double +json_real(const struct json *json) +{ + assert(json->type == JSON_REAL || json->type == JSON_INTEGER); + return json->type == JSON_REAL ? json->u.real : json->u.integer; +} + +int64_t +json_integer(const struct json *json) +{ + assert(json->type == JSON_INTEGER); + return json->u.integer; +} + +static void json_destroy_object(struct shash *object); +static void json_destroy_array(struct json_array *array); + +/* Frees 'json' and everything it points to, recursively. */ +void +json_destroy(struct json *json) +{ + if (json) { + switch (json->type) { + case JSON_OBJECT: + json_destroy_object(json->u.object); + break; + + case JSON_ARRAY: + json_destroy_array(&json->u.array); + break; + + case JSON_STRING: + free(json->u.string); + break; + + case JSON_NULL: + case JSON_FALSE: + case JSON_TRUE: + case JSON_INTEGER: + case JSON_REAL: + break; + + case JSON_N_TYPES: + NOT_REACHED(); + } + free(json); + } +} + +static void +json_destroy_object(struct shash *object) +{ + struct shash_node *node, *next; + + SHASH_FOR_EACH_SAFE (node, next, object) { + struct json *value = node->data; + + json_destroy(value); + shash_delete(object, node); + } + shash_destroy(object); + free(object); +} + +static void +json_destroy_array(struct json_array *array) +{ + size_t i; + + for (i = 0; i < array->n; i++) { + json_destroy(array->elems[i]); + } + free(array->elems); +} + +static struct json *json_clone_object(const struct shash *object); +static struct json *json_clone_array(const struct json_array *array); + +/* Returns a deep copy of 'json'. */ +struct json * +json_clone(const struct json *json) +{ + switch (json->type) { + case JSON_OBJECT: + return json_clone_object(json->u.object); + + case JSON_ARRAY: + return json_clone_array(&json->u.array); + + case JSON_STRING: + return json_string_create(json->u.string); + + case JSON_NULL: + case JSON_FALSE: + case JSON_TRUE: + return json_create(json->type); + + case JSON_INTEGER: + return json_integer_create(json->u.integer); + + case JSON_REAL: + return json_real_create(json->u.real); + + case JSON_N_TYPES: + default: + NOT_REACHED(); + } +} + +static struct json * +json_clone_object(const struct shash *object) +{ + struct shash_node *node; + struct json *json; + + json = json_object_create(); + SHASH_FOR_EACH (node, object) { + struct json *value = node->data; + json_object_put(json, node->name, json_clone(value)); + } + return json; +} + +static struct json * +json_clone_array(const struct json_array *array) +{ + struct json **elems; + size_t i; + + elems = xmalloc(array->n * sizeof *elems); + for (i = 0; i < array->n; i++) { + elems[i] = json_clone(array->elems[i]); + } + return json_array_create(elems, array->n); +} + +static size_t +json_hash_object(const struct shash *object, size_t basis) +{ + const struct shash_node **nodes; + size_t n, i; + + nodes = shash_sort(object); + n = shash_count(object); + for (i = 0; i < n; i++) { + const struct shash_node *node = nodes[i]; + basis = hash_string(node->name, basis); + basis = json_hash(node->data, basis); + } + return basis; +} + +static size_t +json_hash_array(const struct json_array *array, size_t basis) +{ + size_t i; + + basis = hash_int(array->n, basis); + for (i = 0; i < array->n; i++) { + basis = json_hash(array->elems[i], basis); + } + return basis; +} + +size_t +json_hash(const struct json *json, size_t basis) +{ + switch (json->type) { + case JSON_OBJECT: + return json_hash_object(json->u.object, basis); + + case JSON_ARRAY: + return json_hash_array(&json->u.array, basis); + + case JSON_STRING: + return hash_string(json->u.string, basis); + + case JSON_NULL: + case JSON_FALSE: + case JSON_TRUE: + return hash_int(json->type << 8, basis); + + case JSON_INTEGER: + return hash_int(json->u.integer, basis); + + case JSON_REAL: + return hash_double(json->u.real, basis); + + case JSON_N_TYPES: + default: + NOT_REACHED(); + } +} + +static bool +json_equal_object(const struct shash *a, const struct shash *b) +{ + struct shash_node *node; + + if (shash_count(a) != shash_count(b)) { + return false; + } + + SHASH_FOR_EACH (node, a) { + if (!shash_find(b, node->name)) { + return false; + } + } + + return true; +} + +static bool +json_equal_array(const struct json_array *a, const struct json_array *b) +{ + size_t i; + + if (a->n != b->n) { + return false; + } + + for (i = 0; i < a->n; i++) { + if (!json_equal(a->elems[i], b->elems[i])) { + return false; + } + } + + return true; +} + +bool +json_equal(const struct json *a, const struct json *b) +{ + if (a->type != b->type) { + return false; + } + + switch (a->type) { + case JSON_OBJECT: + return json_equal_object(a->u.object, b->u.object); + + case JSON_ARRAY: + return json_equal_array(&a->u.array, &b->u.array); + + case JSON_STRING: + return !strcmp(a->u.string, b->u.string); + + case JSON_NULL: + case JSON_FALSE: + case JSON_TRUE: + return true; + + case JSON_INTEGER: + return a->u.integer == b->u.integer; + + case JSON_REAL: + return a->u.real == b->u.real; + + case JSON_N_TYPES: + default: + NOT_REACHED(); + } +} + +/* Lexical analysis. */ + +static void +json_lex_keyword(struct json_parser *p) +{ + struct json_token token; + const char *s; + + s = ds_cstr(&p->buffer); + if (!strcmp(s, "false")) { + token.type = T_FALSE; + } else if (!strcmp(s, "true")) { + token.type = T_TRUE; + } else if (!strcmp(s, "null")) { + token.type = T_NULL; + } else { + json_error(p, "invalid keyword '%s'", s); + return; + } + json_parser_input(p, &token); +} + +static void +json_lex_number(struct json_parser *p) +{ + const char *cp = ds_cstr(&p->buffer); + unsigned long long int significand = 0; + int sig_digits = 0; + bool imprecise = false; + bool negative = false; + int pow10 = 0; + + /* Leading minus sign. */ + if (*cp == '-') { + negative = true; + cp++; + } + + /* At least one integer digit, but 0 may not be used as a leading digit for + * a longer number. */ + significand = 0; + sig_digits = 0; + if (*cp == '0') { + cp++; + if (isdigit(*cp)) { + json_error(p, "leading zeros not allowed"); + return; + } + } else if (isdigit(*cp)) { + do { + if (significand <= ULLONG_MAX / 10) { + significand = significand * 10 + (*cp - '0'); + sig_digits++; + } else { + pow10++; + if (*cp != '0') { + imprecise = true; + } + } + cp++; + } while (isdigit(*cp)); + } else { + json_error(p, "'-' must be followed by digit"); + return; + } + + /* Optional fraction. */ + if (*cp == '.') { + cp++; + if (!isdigit(*cp)) { + json_error(p, "decimal point must be followed by digit"); + return; + } + do { + if (significand <= ULLONG_MAX / 10) { + significand = significand * 10 + (*cp - '0'); + sig_digits++; + pow10--; + } else if (*cp != '0') { + imprecise = true; + } + cp++; + } while (isdigit(*cp)); + } + + /* Optional exponent. */ + if (*cp == 'e' || *cp == 'E') { + bool negative_exponent = false; + int exponent; + + cp++; + if (*cp == '+') { + cp++; + } else if (*cp == '-') { + negative_exponent = true; + cp++; + } + + if (!isdigit(*cp)) { + json_error(p, "exponent must contain at least one digit"); + return; + } + + exponent = 0; + do { + if (exponent >= INT_MAX / 10) { + json_error(p, "exponent outside valid range"); + return; + } + exponent = exponent * 10 + (*cp - '0'); + cp++; + } while (isdigit(*cp)); + + if (negative_exponent) { + pow10 -= exponent; + } else { + pow10 += exponent; + } + } + + if (*cp != '\0') { + json_error(p, "syntax error in number"); + return; + } + + /* Figure out number. + * + * We suppress negative zeros as a matter of policy. */ + if (!significand) { + struct json_token token; + token.type = T_INTEGER; + token.u.integer = 0; + json_parser_input(p, &token); + return; + } + + if (!imprecise) { + while (pow10 > 0 && significand < ULLONG_MAX / 10) { + significand *= 10; + sig_digits++; + pow10--; + } + while (pow10 < 0 && significand % 10 == 0) { + significand /= 10; + sig_digits--; + pow10++; + } + if (pow10 == 0 + && significand <= (negative + ? (unsigned long long int) LLONG_MAX + 1 + : LLONG_MAX)) { + struct json_token token; + token.type = T_INTEGER; + token.u.integer = negative ? -significand : significand; + json_parser_input(p, &token); + return; + } + } + + if (pow10 + sig_digits <= DBL_MAX_10_EXP) { + struct json_token token; + token.type = T_REAL; + token.u.real = significand * pow(10.0, pow10); + if (token.u.real <= DBL_MAX) { + if (negative && token.u.real) { + token.u.real = -token.u.real; + } + json_parser_input(p, &token); + return; + } + } + json_error(p, "number outside valid range"); +} + +static bool +json_lex_4hex(struct json_parser *p, const char *cp, int *valuep) +{ + int value, i; + + value = 0; + for (i = 0; i < 4; i++) { + unsigned char c = *cp++; + if (!isxdigit(c)) { + json_error(p, "malformed \\u escape"); + return false; + } + value = (value << 4) | hexit_value(c); + } + if (!value) { + json_error(p, "null bytes not supported in quoted strings"); + return false; + } + *valuep = value; + return true; +} + +static const char * +json_lex_unicode(struct json_parser *p, const char *cp, struct ds *s) +{ + int c0, c1; + + if (!json_lex_4hex(p, cp, &c0)) { + return NULL; + } + cp += 4; + if (!uc_is_leading_surrogate(c0)) { + ds_put_utf8(s, c0); + return cp; + } + + if (*cp++ != '\\' || *cp++ != 'u') { + json_error(p, "malformed escaped surrogate pair"); + return NULL; + } + + if (!json_lex_4hex(p, cp, &c1)) { + return NULL; + } + cp += 4; + if (!uc_is_trailing_surrogate(c1)) { + json_error(p, "second half of escaped surrogate pair is not " + "trailing surrogate"); + return NULL; + } + + ds_put_utf8(s, utf16_decode_surrogate_pair(c0, c1)); + return cp; +} + +static void +json_lex_string(struct json_parser *p) +{ + struct json_token token; + const char *cp; + struct ds s; + + cp = ds_cstr(&p->buffer); + if (!strchr(cp, '\\')) { + token.type = T_STRING; + token.u.string = cp; + json_parser_input(p, &token); + return; + } + + ds_init(&s); + ds_reserve(&s, strlen(cp)); + while (*cp != '\0') { + if (*cp != '\\') { + ds_put_char(&s, *cp++); + continue; + } + + cp++; + switch (*cp++) { + case '"': case '\\': case '/': + ds_put_char(&s, cp[-1]); + break; + + case 'b': + ds_put_char(&s, '\b'); + break; + + case 'f': + ds_put_char(&s, '\f'); + break; + + case 'n': + ds_put_char(&s, '\n'); + break; + + case 'r': + ds_put_char(&s, '\r'); + break; + + case 't': + ds_put_char(&s, '\t'); + break; + + case 'u': + cp = json_lex_unicode(p, cp, &s); + if (!cp) { + goto exit; + } + break; + + default: + json_error(p, "bad escape \\%c", cp[-1]); + goto exit; + } + } + + token.type = T_STRING; + token.u.string = ds_cstr(&s); + json_parser_input(p, &token); + +exit: + ds_destroy(&s); + return; +} + +static bool +json_lex_input(struct json_parser *p, int c) +{ + struct json_token token; + + switch (p->lex_state) { + case JSON_LEX_START: + switch (c) { + case ' ': case '\t': case '\n': case '\r': + /* Nothing to do. */ + return true; + + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + p->lex_state = JSON_LEX_KEYWORD; + break; + + case '[': case '{': case ']': case '}': case ':': case ',': + token.type = c; + json_parser_input(p, &token); + return true; + + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + p->lex_state = JSON_LEX_NUMBER; + break; + + case '"': + p->lex_state = JSON_LEX_STRING; + return true; + + default: + if (isprint(c)) { + json_error(p, "invalid character '%c'", c); + } else { + json_error(p, "invalid character U+%04x", c); + } + return true; + } + break; + + case JSON_LEX_KEYWORD: + if (!isalpha((unsigned char) c)) { + json_lex_keyword(p); + return false; + } + break; + + case JSON_LEX_NUMBER: + if (!strchr(".0123456789eE-+", c)) { + json_lex_number(p); + return false; + } + break; + + case JSON_LEX_STRING: + if (c == '\\') { + p->lex_state = JSON_LEX_ESCAPE; + } else if (c == '"') { + json_lex_string(p); + return true; + } else if (c < 0x20) { + json_error(p, "U+%04X must be escaped in quoted string", c); + return true; + } + break; + + case JSON_LEX_ESCAPE: + p->lex_state = JSON_LEX_STRING; + break; + + default: + abort(); + } + ds_put_char(&p->buffer, c); + return true; +} + +/* Parsing. */ + +/* Parses 'string' as a JSON object or array and returns a newly allocated + * 'struct json'. The caller must free the returned structure with + * json_destroy() when it is no longer needed. + * + * 'string' must be encoded in UTF-8. + * + * If 'string' is valid JSON, then the returned 'struct json' will be either an + * object (JSON_OBJECT) or an array (JSON_ARRAY). + * + * If 'string' is not valid JSON, then the returned 'struct json' will be a + * string (JSON_STRING) that describes the particular error encountered during + * parsing. (This is an acceptable means of error reporting because at its top + * level JSON must be either an object or an array; a bare string is not + * valid.) */ +struct json * +json_from_string(const char *string) +{ + struct json_parser *p = json_parser_create(JSPF_TRAILER); + json_parser_feed(p, string, strlen(string)); + return json_parser_finish(p); +} + +/* Reads the file named 'file_name', parses its contents as a JSON object or + * array, and returns a newly allocated 'struct json'. The caller must free + * the returned structure with json_destroy() when it is no longer needed. + * + * The file must be encoded in UTF-8. + * + * See json_from_string() for return value semantics. + */ +struct json * +json_from_file(const char *file_name) +{ + struct json_parser *p; + struct json *json; + FILE *stream; + + /* Open file. */ + stream = fopen(file_name, "r"); + if (!stream) { + return json_string_create_nocopy( + xasprintf("error opening \"%s\": %s", file_name, strerror(errno))); + } + + /* Read and parse file. */ + p = json_parser_create(JSPF_TRAILER); + for (;;) { + char buffer[BUFSIZ]; + size_t n; + + n = fread(buffer, 1, sizeof buffer, stream); + if (!n || json_parser_feed(p, buffer, n) != n) { + break; + } + } + json = json_parser_finish(p); + + /* Close file and check for I/O errors. */ + if (ferror(stream)) { + json_destroy(json); + json = json_string_create_nocopy( + xasprintf("error reading \"%s\": %s", file_name, strerror(errno))); + } + fclose(stream); + + return json; +} + +struct json_parser * +json_parser_create(int flags) +{ + struct json_parser *p = xzalloc(sizeof *p); + p->flags = flags; + return p; +} + +size_t +json_parser_feed(struct json_parser *p, const char *input, size_t n) +{ + size_t i; + for (i = 0; !p->done && i < n; ) { + if (json_lex_input(p, input[i])) { + i++; + } + } + return i; +} + +bool +json_parser_is_done(const struct json_parser *p) +{ + return p->done; +} + +struct json * +json_parser_finish(struct json_parser *p) +{ + struct json *json; + + switch (p->lex_state) { + case JSON_LEX_START: + break; + + case JSON_LEX_STRING: + case JSON_LEX_ESCAPE: + json_error(p, "unexpected end of input in quoted string"); + break; + + case JSON_LEX_NUMBER: + case JSON_LEX_KEYWORD: + json_lex_input(p, ' '); + break; + } + + if (p->parse_state == JSON_PARSE_START) { + json_error(p, "empty input stream"); + } else if (p->parse_state != JSON_PARSE_END) { + json_error(p, "unexpected end of input"); + } + + if (!p->error) { + assert(p->height == 1); + assert(p->stack[0].json != NULL); + json = p->stack[--p->height].json; + } else { + json = json_string_create_nocopy(p->error); + p->error = NULL; + } + + json_parser_abort(p); + + return json; +} + +void +json_parser_abort(struct json_parser *p) +{ + if (p) { + ds_destroy(&p->buffer); + if (p->height) { + json_destroy(p->stack[0].json); + } + free(p->stack); + free(p->member_name); + free(p->error); + free(p); + } +} + +static struct json_parser_node * +json_parser_top(struct json_parser *p) +{ + return &p->stack[p->height - 1]; +} + +static void +json_parser_put_value(struct json_parser *p, struct json *value) +{ + struct json_parser_node *node = json_parser_top(p); + if (node->json->type == JSON_OBJECT) { + json_object_put(node->json, p->member_name, value); + free(p->member_name); + p->member_name = NULL; + } else if (node->json->type == JSON_ARRAY) { + json_array_add(node->json, value); + } else { + NOT_REACHED(); + } +} + +static struct json_parser_node * +json_parser_push(struct json_parser *p, + struct json *new_json, enum json_parse_state new_state) +{ + if (p->height < JSON_MAX_HEIGHT) { + struct json_parser_node *node; + + if (p->height >= p->allocated_height) { + p->stack = x2nrealloc(p->stack, &p->allocated_height, + sizeof *p->stack); + } + + if (p->height > 0) { + json_parser_put_value(p, new_json); + } + + node = &p->stack[p->height++]; + node->json = new_json; + p->parse_state = new_state; + return node; + } else { + json_error(p, "input exceeds maximum nesting depth %d", + JSON_MAX_HEIGHT); + return NULL; + } +} + +static void +json_parser_push_object(struct json_parser *p) +{ + json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT); +} + +static void +json_parser_push_array(struct json_parser *p) +{ + json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT); +} + +static void +json_parse_value(struct json_parser *p, struct json_token *token, + enum json_parse_state next_state) +{ + struct json *value; + + switch (token->type) { + case T_FALSE: + value = json_boolean_create(false); + break; + + case T_NULL: + value = json_null_create(); + break; + + case T_TRUE: + value = json_boolean_create(true); + break; + + case '{': + json_parser_push_object(p); + return; + + case '[': + json_parser_push_array(p); + return; + + case T_INTEGER: + value = json_integer_create(token->u.integer); + break; + + case T_REAL: + value = json_real_create(token->u.real); + break; + + case T_STRING: + value = json_string_create(token->u.string); + break; + + case T_EOF: + case '}': + case ']': + case ':': + case ',': + default: + json_error(p, "syntax error expecting value"); + return; + } + + json_parser_put_value(p, value); + p->parse_state = next_state; +} + +static void +json_parser_pop(struct json_parser *p) +{ + struct json_parser_node *node; + + /* Conserve memory. */ + node = json_parser_top(p); + if (node->json->type == JSON_ARRAY) { + json_array_trim(node->json); + } + + /* Pop off the top-of-stack. */ + if (p->height == 1) { + p->parse_state = JSON_PARSE_END; + if (!(p->flags & JSPF_TRAILER)) { + p->done = true; + } + } else { + p->height--; + node = json_parser_top(p); + if (node->json->type == JSON_ARRAY) { + p->parse_state = JSON_PARSE_ARRAY_NEXT; + } else if (node->json->type == JSON_OBJECT) { + p->parse_state = JSON_PARSE_OBJECT_NEXT; + } else { + NOT_REACHED(); + } + } +} + +static void +json_parser_input(struct json_parser *p, struct json_token *token) +{ + switch (p->parse_state) { + case JSON_PARSE_START: + if (token->type == '{') { + json_parser_push_object(p); + } else if (token->type == '[') { + json_parser_push_array(p); + } else { + json_error(p, "syntax error at beginning of input"); + } + break; + + case JSON_PARSE_END: + json_error(p, "trailing garbage at end of input"); + break; + + case JSON_PARSE_OBJECT_INIT: + if (token->type == '}') { + json_parser_pop(p); + break; + } + /* Fall through. */ + case JSON_PARSE_OBJECT_NAME: + if (token->type == T_STRING) { + p->member_name = xstrdup(token->u.string); + p->parse_state = JSON_PARSE_OBJECT_COLON; + } else { + json_error(p, "syntax error parsing object expecting string"); + } + break; + + case JSON_PARSE_OBJECT_COLON: + if (token->type == ':') { + p->parse_state = JSON_PARSE_OBJECT_VALUE; + } else { + json_error(p, "syntax error parsing object expecting ':'"); + } + break; + + case JSON_PARSE_OBJECT_VALUE: + json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT); + break; + + case JSON_PARSE_OBJECT_NEXT: + if (token->type == ',') { + p->parse_state = JSON_PARSE_OBJECT_NAME; + } else if (token->type == '}') { + json_parser_pop(p); + } else { + json_error(p, "syntax error expecting '}' or ','"); + } + break; + + case JSON_PARSE_ARRAY_INIT: + if (token->type == ']') { + json_parser_pop(p); + break; + } + /* Fall through. */ + case JSON_PARSE_ARRAY_VALUE: + json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT); + break; + + case JSON_PARSE_ARRAY_NEXT: + if (token->type == ',') { + p->parse_state = JSON_PARSE_ARRAY_VALUE; + } else if (token->type == ']') { + json_parser_pop(p); + } else { + json_error(p, "syntax error expecting ']' or ','"); + } + break; + + default: + abort(); + } + + p->lex_state = JSON_LEX_START; + ds_clear(&p->buffer); +} + +static struct json * +json_create(enum json_type type) +{ + struct json *json = xmalloc(sizeof *json); + json->type = type; + return json; +} + +static void +json_error(struct json_parser *p, const char *format, ...) +{ + if (!p->error) { + va_list args; + + va_start(args, format); + p->error = xvasprintf(format, args); + va_end(args); + + p->done = true; + } +} + +#define SPACES_PER_LEVEL 2 + +struct json_serializer { + struct ds ds; + int depth; + int flags; +}; + +static void json_to_ds(const struct json *, struct json_serializer *); +static void json_object_to_ds(const struct shash *object, + struct json_serializer *); +static void json_array_to_ds(const struct json_array *, + struct json_serializer *); +static void json_string_to_ds(const char *string, struct ds *); + +/* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns + * that string. The caller is responsible for freeing the returned string, + * with free(), when it is no longer needed. + * + * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each + * nesting level introducing an additional indentation. Otherwise, the + * returned string does not contain any new-line characters. + * + * If 'flags' contains JSSF_SORT, members of objects in the output are sorted + * in bytewise lexicographic order for reproducibility. Otherwise, members of + * objects are output in an indeterminate order. + * + * The returned string is valid JSON only if 'json' represents an array or an + * object, since a bare literal does not satisfy the JSON grammar. */ +char * +json_to_string(const struct json *json, int flags) +{ + struct json_serializer s; + ds_init(&s.ds); + s.depth = 0; + s.flags = flags; + json_to_ds(json, &s); + return ds_steal_cstr(&s.ds); +} + +static void +json_to_ds(const struct json *json, struct json_serializer *s) +{ + struct ds *ds = &s->ds; + + switch (json->type) { + case JSON_NULL: + ds_put_cstr(ds, "null"); + break; + + case JSON_FALSE: + ds_put_cstr(ds, "false"); + break; + + case JSON_TRUE: + ds_put_cstr(ds, "true"); + break; + + case JSON_OBJECT: + json_object_to_ds(json->u.object, s); + break; + + case JSON_ARRAY: + json_array_to_ds(&json->u.array, s); + break; + + case JSON_INTEGER: + ds_put_format(ds, "%lld", json->u.integer); + break; + + case JSON_REAL: + ds_put_format(ds, "%.*g", DBL_DIG, json->u.real); + break; + + case JSON_STRING: + json_string_to_ds(json->u.string, ds); + break; + + case JSON_N_TYPES: + default: + NOT_REACHED(); + } +} + +static void +indent_line(struct json_serializer *s) +{ + if (s->flags & JSSF_PRETTY) { + ds_put_char(&s->ds, '\n'); + ds_put_char_multiple(&s->ds, ' ', SPACES_PER_LEVEL * s->depth); + } +} + +static void +json_object_member_to_ds(size_t i, const struct shash_node *node, + struct json_serializer *s) +{ + struct ds *ds = &s->ds; + + if (i) { + ds_put_char(ds, ','); + indent_line(s); + } + + json_string_to_ds(node->name, ds); + ds_put_char(ds, ':'); + if (s->flags & JSSF_PRETTY) { + ds_put_char(ds, ' '); + } + json_to_ds(node->data, s); +} + +static void +json_object_to_ds(const struct shash *object, struct json_serializer *s) +{ + struct ds *ds = &s->ds; + + ds_put_char(ds, '{'); + + s->depth++; + indent_line(s); + + if (s->flags & JSSF_SORT) { + const struct shash_node **nodes; + size_t n, i; + + nodes = shash_sort(object); + n = shash_count(object); + for (i = 0; i < n; i++) { + json_object_member_to_ds(i, nodes[i], s); + } + free(nodes); + } else { + struct shash_node *node; + size_t i; + + i = 0; + SHASH_FOR_EACH (node, object) { + json_object_member_to_ds(i++, node, s); + } + } + + ds_put_char(ds, '}'); + s->depth--; +} + +static void +json_array_to_ds(const struct json_array *array, struct json_serializer *s) +{ + struct ds *ds = &s->ds; + size_t i; + + ds_put_char(ds, '['); + s->depth++; + + if (array->n > 0) { + indent_line(s); + + for (i = 0; i < array->n; i++) { + if (i) { + ds_put_char(ds, ','); + indent_line(s); + } + json_to_ds(array->elems[i], s); + } + } + + s->depth--; + ds_put_char(ds, ']'); +} + +static void +json_string_to_ds(const char *string, struct ds *ds) +{ + uint8_t c; + + ds_put_char(ds, '"'); + while ((c = *string++) != '\0') { + switch (c) { + case '"': + ds_put_cstr(ds, "\\\""); + break; + + case '\\': + ds_put_cstr(ds, "\\\\"); + break; + + case '\b': + ds_put_cstr(ds, "\\b"); + break; + + case '\f': + ds_put_cstr(ds, "\\f"); + break; + + case '\n': + ds_put_cstr(ds, "\\n"); + break; + + case '\r': + ds_put_cstr(ds, "\\r"); + break; + + case '\t': + ds_put_cstr(ds, "\\t"); + break; + + default: + if (c >= 32) { + ds_put_char(ds, c); + } else { + ds_put_format(ds, "\\u%04x", c); + } + break; + } + } + ds_put_char(ds, '"'); +} diff --git a/lib/json.h b/lib/json.h new file mode 100644 index 00000000..76740153 --- /dev/null +++ b/lib/json.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef JSON_H +#define JSON_H 1 + +/* This is an implementation of JavaScript Object Notation (JSON) as specified + * by RFC 4627. It is intended to fully comply with RFC 4627, with the + * following known exceptions and clarifications: + * + * - Null bytes (\u0000) are not allowed in strings. + * + * - Only UTF-8 encoding is supported (RFC 4627 allows for other Unicode + * encodings). + * + * - Names within an object must be unique (RFC 4627 says that they + * "should" be unique). + */ + +#include "shash.h" + +/* Type of a JSON value. */ +enum json_type { + JSON_NULL, /* null */ + JSON_FALSE, /* false */ + JSON_TRUE, /* true */ + JSON_OBJECT, /* {"a": b, "c": d, ...} */ + JSON_ARRAY, /* [1, 2, 3, ...] */ + JSON_INTEGER, /* 123. */ + JSON_REAL, /* 123.456. */ + JSON_STRING, /* "..." */ + JSON_N_TYPES +}; + +const char *json_type_to_string(enum json_type); + +/* A JSON array. */ +struct json_array { + size_t n, n_allocated; + struct json **elems; +}; + +/* A JSON value. */ +struct json { + enum json_type type; + union { + struct shash *object; /* Contains "struct json *"s. */ + struct json_array array; + long long int integer; + double real; + char *string; + } u; +}; + +struct json *json_null_create(void); +struct json *json_boolean_create(bool); +struct json *json_string_create(const char *); +struct json *json_string_create_nocopy(char *); +struct json *json_integer_create(long long int); +struct json *json_real_create(double); + +struct json *json_array_create_empty(void); +void json_array_add(struct json *, struct json *element); +void json_array_trim(struct json *); +struct json *json_array_create(struct json **, size_t n); +struct json *json_array_create_2(struct json *, struct json *); +struct json *json_array_create_3(struct json *, struct json *, struct json *); + +struct json *json_object_create(void); +void json_object_put(struct json *, const char *name, struct json *value); +void json_object_put_string(struct json *, + const char *name, const char *value); + +const char *json_string(const struct json *); +struct json_array *json_array(const struct json *); +struct shash *json_object(const struct json *); +bool json_boolean(const struct json *); +double json_real(const struct json *); +int64_t json_integer(const struct json *); + +struct json *json_clone(const struct json *); +void json_destroy(struct json *); + +size_t json_hash(const struct json *, size_t basis); +bool json_equal(const struct json *, const struct json *); + +/* Parsing JSON. */ +enum { + JSPF_TRAILER = 1 << 0 /* Check for garbage following input. */ +}; + +struct json_parser *json_parser_create(int flags); +size_t json_parser_feed(struct json_parser *, const char *, size_t); +bool json_parser_is_done(const struct json_parser *); +struct json *json_parser_finish(struct json_parser *); +void json_parser_abort(struct json_parser *); + +struct json *json_from_string(const char *string); +struct json *json_from_file(const char *file_name); + +/* Serializing JSON. */ + +enum { + JSSF_PRETTY = 1 << 0, /* Multiple lines with indentation, if true. */ + JSSF_SORT = 1 << 1 /* Object members in sorted order, if true. */ +}; +char *json_to_string(const struct json *, int flags); + +#endif /* json.h */ diff --git a/lib/unicode.c b/lib/unicode.c new file mode 100644 index 00000000..69ebcfc9 --- /dev/null +++ b/lib/unicode.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "unicode.h" + +/* Returns the unicode code point corresponding to leading surrogate 'leading' + * and trailing surrogate 'trailing'. The return value will not make any + * sense if 'leading' or 'trailing' are not in the correct ranges for leading + * or trailing surrogates. */ +int +utf16_decode_surrogate_pair(int leading, int trailing) +{ + /* + * Leading surrogate: 110110wwwwxxxxxx + * Trailing surrogate: 110111xxxxxxxxxx + * Code point: 000uuuuuxxxxxxxxxxxxxxxx + */ + int w = (leading >> 6) & 0xf; + int u = w + 1; + int x0 = leading & 0x3f; + int x1 = trailing & 0x3ff; + return (u << 16) | (x0 << 10) | x1; +} diff --git a/lib/unicode.h b/lib/unicode.h new file mode 100644 index 00000000..0f20bdc7 --- /dev/null +++ b/lib/unicode.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UNICODE_H +#define UNICODE_H 1 + +#include + +/* Returns true if 'c' is a Unicode code point, otherwise false. */ +static inline bool +uc_is_code_point(int c) +{ + return c >= 0 && c <= 0x10ffff; +} + +/* Returns true if 'c' is a Unicode code point for a leading surrogate. */ +static inline bool +uc_is_leading_surrogate(int c) +{ + return c >= 0xd800 && c <= 0xdbff; +} + +/* Returns true if 'c' is a Unicode code point for a trailing surrogate. */ +static inline bool +uc_is_trailing_surrogate(int c) +{ + return c >= 0xdc00 && c <= 0xdfff; +} + +/* Returns true if 'c' is a Unicode code point for a leading or trailing + * surrogate. */ +static inline bool +uc_is_surrogate(int c) +{ + return c >= 0xd800 && c <= 0xdfff; +} + +int utf16_decode_surrogate_pair(int leading, int trailing); + +#endif /* unicode.h */ diff --git a/lib/util.c b/lib/util.c index cecd5825..c9ef8b15 100644 --- a/lib/util.c +++ b/lib/util.c @@ -300,3 +300,58 @@ str_to_ullong(const char *s, int base, unsigned long long *ull) { return str_to_llong(s, base, (long long *) ull); } + +/* Converts floating-point string 's' into a double. If successful, stores + * the double in '*d' and returns true; on failure, stores 0 in '*d' and + * returns false. + * + * Underflow (e.g. "1e-9999") is not considered an error, but overflow + * (e.g. "1e9999)" is. */ +bool +str_to_double(const char *s, double *d) +{ + int save_errno = errno; + char *tail; + errno = 0; + *d = strtod(s, &tail); + if (errno == EINVAL || (errno == ERANGE && *d != 0) + || tail == s || *tail != '\0') { + errno = save_errno; + *d = 0; + return false; + } else { + errno = save_errno; + return true; + } +} + +/* Returns the value of 'c' as a hexadecimal digit. */ +int +hexit_value(int c) +{ + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return c - '0'; + + case 'a': case 'A': + return 0xa; + + case 'b': case 'B': + return 0xb; + + case 'c': case 'C': + return 0xc; + + case 'd': case 'D': + return 0xd; + + case 'e': case 'E': + return 0xe; + + case 'f': case 'F': + return 0xf; + } + + NOT_REACHED(); +} diff --git a/lib/util.h b/lib/util.h index 1290d33e..afa43f62 100644 --- a/lib/util.h +++ b/lib/util.h @@ -121,6 +121,10 @@ bool str_to_uint(const char *, int base, unsigned int *); bool str_to_ulong(const char *, int base, unsigned long *); bool str_to_ullong(const char *, int base, unsigned long long *); +bool str_to_double(const char *, double *); + +int hexit_value(int c); + #ifdef __cplusplus } #endif diff --git a/tests/.gitignore b/tests/.gitignore index 706aa147..caa2db65 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -5,6 +5,7 @@ /test-flows /test-hash /test-hmap +/test-json /test-list /test-stp /test-type-props diff --git a/tests/automake.mk b/tests/automake.mk index 1d236a47..59abe9af 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -8,6 +8,7 @@ TESTSUITE_AT = \ tests/testsuite.at \ tests/lcov-pre.at \ tests/library.at \ + tests/json.at \ tests/timeval.at \ tests/lockfile.at \ tests/stp.at \ @@ -60,6 +61,10 @@ noinst_PROGRAMS += tests/test-hmap tests_test_hmap_SOURCES = tests/test-hmap.c tests_test_hmap_LDADD = lib/libopenvswitch.a +noinst_PROGRAMS += tests/test-json +tests_test_json_SOURCES = tests/test-json.c +tests_test_json_LDADD = lib/libopenvswitch.a + noinst_PROGRAMS += tests/test-list tests_test_list_SOURCES = tests/test-list.c tests_test_list_LDADD = lib/libopenvswitch.a diff --git a/tests/json.at b/tests/json.at new file mode 100644 index 00000000..54d4f8cf --- /dev/null +++ b/tests/json.at @@ -0,0 +1,297 @@ +m4_define([JSON_CHECK_POSITIVE], + [AT_SETUP([$1]) + AT_KEYWORDS([json positive]) + AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) + OVS_CHECK_LCOV([test-json $4 input], [0], [$3 +], []) + AT_CLEANUP]) + +m4_define([JSON_CHECK_NEGATIVE], + [AT_SETUP([$1]) + AT_KEYWORDS([json negative]) + AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) + OVS_CHECK_LCOV([test-json $4 input], [1], [$3 +]) + AT_CLEANUP]) + +AT_BANNER([JSON -- arrays]) + +JSON_CHECK_POSITIVE([empty array], [[ [ ] ]], [[[]]]) +JSON_CHECK_POSITIVE([single-element array], [[ [ 1 ] ]], [[[1]]]) +JSON_CHECK_POSITIVE([2-element array], [[ [ 1, 2 ] ]], [[[1,2]]]) +JSON_CHECK_POSITIVE([many-element array], + [[ [ 1, 2, 3, 4, 5 ] ]], + [[[1,2,3,4,5]]]) +JSON_CHECK_NEGATIVE([missing comma], [[ [ 1, 2, 3 4, 5 ] ]], + [error: syntax error expecting '@:>@' or ',']) +JSON_CHECK_NEGATIVE([trailing comma not allowed], + [[[1,2,]]], [error: syntax error expecting value]) +JSON_CHECK_NEGATIVE([doubled comma not allowed], + [[[1,,2]]], [error: syntax error expecting value]) + +AT_BANNER([JSON -- strings]) + +JSON_CHECK_POSITIVE([empty string], [[[ "" ]]], [[[""]]]) +JSON_CHECK_POSITIVE([1-character strings], + [[[ "a", "b", "c" ]]], + [[["a","b","c"]]]) +JSON_CHECK_POSITIVE([escape sequences], + [[[ " \" \\ \/ \b \f \n \r \t" ]]], + [[[" \" \\ / \b \f \n \r \t"]]]) +JSON_CHECK_POSITIVE([Unicode escape sequences], + [[[ " \u0022 \u005c \u002F \u0008 \u000c \u000A \u000d \u0009" ]]], + [[[" \" \\ / \b \f \n \r \t"]]]) +JSON_CHECK_POSITIVE([surrogate pairs], + [[["\ud834\udd1e"]]], + [[["𝄞"]]]) +JSON_CHECK_NEGATIVE([a string by itself is not valid JSON], ["xxx"], + [error: syntax error at beginning of input]) +JSON_CHECK_NEGATIVE([end of line in quoted string], + [[["xxx +"]]], + [error: U+000A must be escaped in quoted string]) +JSON_CHECK_NEGATIVE([formfeed in quoted string], + [[["xxx "]]], + [error: U+000C must be escaped in quoted string]) +JSON_CHECK_NEGATIVE([bad escape in quoted string], + [[["\x12"]]], + [error: bad escape \x]) +JSON_CHECK_NEGATIVE([\u must be followed by 4 hex digits], + [[["\u1x"]]], + [error: malformed \u escape]) +JSON_CHECK_NEGATIVE([isolated leading surrogate not allowed], + [[["\ud834xxx"]]], + [error: malformed escaped surrogate pair]) +JSON_CHECK_NEGATIVE([surrogatess must paired properly], + [[["\ud834\u1234"]]], + [error: second half of escaped surrogate pair is not trailing surrogate]) +JSON_CHECK_NEGATIVE([null bytes not allowed], + [[["\u0000"]]], + [error: null bytes not supported in quoted strings]) + +AT_SETUP([end of input in quoted string]) +AT_KEYWORDS([json negative]) +AT_CHECK([printf '\"xxx' | test-json -], [1], + [error: unexpected end of input in quoted string +]) +AT_CLEANUP + +AT_BANNER([JSON -- objects]) + +JSON_CHECK_POSITIVE([empty object], [[{ }]], [[{}]]) +JSON_CHECK_POSITIVE([simple object], + [[{"b": 2, "a": 1, "c": 3}]], + [[{"a":1,"b":2,"c":3}]]) +JSON_CHECK_NEGATIVE([bad value], [[{"a": }, "b": 2]], + [error: syntax error expecting value]) +JSON_CHECK_NEGATIVE([missing colon], [[{"b": 2, "a" 1, "c": 3}]], + [error: syntax error parsing object expecting ':']) +JSON_CHECK_NEGATIVE([missing comma], [[{"b": 2 "a" 1, "c": 3}]], + [error: syntax error expecting '}' or ',']) +JSON_CHECK_NEGATIVE([trailing comma not allowed], + [[{"b": 2, "a": 1, "c": 3, }]], + [[error: syntax error parsing object expecting string]]) +JSON_CHECK_NEGATIVE([doubled comma not allowed], + [[{"b": 2, "a": 1,, "c": 3}]], + [[error: syntax error parsing object expecting string]]) +JSON_CHECK_NEGATIVE([names must be strings], + [[{1: 2}]], + [[error: syntax error parsing object expecting string]]) + +AT_BANNER([JSON -- literal names]) + +JSON_CHECK_POSITIVE([null], [[[ null ]]], [[[null]]]) +JSON_CHECK_POSITIVE([false], [[[ false ]]], [[[false]]]) +JSON_CHECK_POSITIVE([true], [[[ true ]]], [[[true]]]) +JSON_CHECK_NEGATIVE([a literal by itself is not valid JSON], [null], + [error: syntax error at beginning of input]) +JSON_CHECK_NEGATIVE([nullify is invalid], [[[ nullify ]]], + [error: invalid keyword 'nullify']) +JSON_CHECK_NEGATIVE([nubs is invalid], [[[ nubs ]]], + [error: invalid keyword 'nubs']) +JSON_CHECK_NEGATIVE([xxx is invalid], [[[ xxx ]]], + [error: invalid keyword 'xxx']) + +AT_BANNER([JSON -- numbers]) + +JSON_CHECK_POSITIVE( + [integers expressed as reals], + [[[1.0000000000, + 2.00000000000000000000000000000000000, + 2e5, + 2.1234e4, + 2.1230e3, + 0e-10000, + 0e10000]]], + [[[1,2,200000,21234,2123,0,0]]]) +JSON_CHECK_POSITIVE( + [large integers], + [[[9223372036854775807, -9223372036854775808]]], + [[[9223372036854775807,-9223372036854775808]]]) +JSON_CHECK_POSITIVE( + [large integers expressed as reals], + [[[9223372036854775807.0, -9223372036854775808.0, + 92233720.36854775807e11, -9.223372036854775808e18]]], + [[[9223372036854775807,-9223372036854775808,9223372036854775807,-9223372036854775808]]]) +# It seems likely that the following test will fail on some system that +# rounds slightly differently in arithmetic or in printf, but I'd like +# to keep it this way until we run into such a system. +JSON_CHECK_POSITIVE( + [large integers that overflow to reals], + [[[9223372036854775807000, -92233720368547758080000]]], + [[[9.22337203685478e+21,-9.22337203685478e+22]]]) + +JSON_CHECK_POSITIVE( + [negative zero], + [[[-0, -0.0, 1e-9999, -1e-9999]]], + [[[0,0,0,0]]]) + +JSON_CHECK_POSITIVE( + [reals], + [[[0.0, 1.0, 2.0, 3.0, 3.5, 81.250]]], + [[[0,1,2,3,3.5,81.25]]]) +JSON_CHECK_POSITIVE( + [scientific notation], + [[[1e3, 1E3, 2.5E2, 1e+3, 125e-3, 3.125e-2, 3125e-05, 1.525878906e-5]]], + [[[1000,1000,250,1000,0.125,0.03125,0.03125,1.525878906e-05]]]) +JSON_CHECK_POSITIVE( + [negative reals], + [[[-0, -1.0, -2.0, -3.0, -3.5, -8.1250]]], + [[[0,-1,-2,-3,-3.5,-8.125]]]) +JSON_CHECK_POSITIVE( + [negative scientific notation], + [[[-1e3, -1E3, -2.5E2, -1e+3, -125e-3, -3.125e-2, -3125e-05, -1.525878906e-5]]], + [[[-1000,-1000,-250,-1000,-0.125,-0.03125,-0.03125,-1.525878906e-05]]]) +JSON_CHECK_POSITIVE( + [1e-9999 underflows to 0], + [[[1e-9999]]], + [[[0]]]) +JSON_CHECK_NEGATIVE([a number by itself is not valid JSON], [1], + [error: syntax error at beginning of input]) +JSON_CHECK_NEGATIVE( + [leading zeros not allowed], + [[[0123]]], + [error: leading zeros not allowed]) +JSON_CHECK_NEGATIVE( + [1e9999 is too big], + [[[1e9999]]], + [error: number outside valid range]) +JSON_CHECK_NEGATIVE( + [exponent bigger than INT_MAX], + [[[1e9999999999999999999]]], + [error: exponent outside valid range]) +JSON_CHECK_NEGATIVE( + [decimal point must be followed by digit], + [[[1.]]], + [error: decimal point must be followed by digit]) +JSON_CHECK_NEGATIVE( + [exponent must contain at least one digit (1)], + [[[1e]]], + [error: exponent must contain at least one digit]) +JSON_CHECK_NEGATIVE( + [exponent must contain at least one digit (2)], + [[[1e+]]], + [error: exponent must contain at least one digit]) +JSON_CHECK_NEGATIVE( + [exponent must contain at least one digit (3)], + [[[1e-]]], + [error: exponent must contain at least one digit]) + +AT_BANNER([JSON -- RFC 4627 examples]) + +JSON_CHECK_POSITIVE([RFC 4267 object example], +[[{ + "Image": { + "Width": 800, + "Height": 600, + "Title": "View from 15th Floor", + "Thumbnail": { + "Url": "http://www.example.com/image/481989943", + "Height": 125, + "Width": "100" + }, + "IDs": [116, 943, 234, 38793] + } +}]], +[[{"Image":{"Height":600,"IDs":[116,943,234,38793],"Thumbnail":{"Height":125,"Url":"http://www.example.com/image/481989943","Width":"100"},"Title":"View from 15th Floor","Width":800}}]]) + +JSON_CHECK_POSITIVE([RFC 4267 array example], +[[[ + { + "precision": "zip", + "Latitude": 37.7668, + "Longitude": -122.3959, + "Address": "", + "City": "SAN FRANCISCO", + "State": "CA", + "Zip": "94107", + "Country": "US" + }, + { + "precision": "zip", + "Latitude": 37.371991, + "Longitude": -122.026020, + "Address": "", + "City": "SUNNYVALE", + "State": "CA", + "Zip": "94085", + "Country": "US" + } +]]], +[[[{"Address":"","City":"SAN FRANCISCO","Country":"US","Latitude":37.7668,"Longitude":-122.3959,"State":"CA","Zip":"94107","precision":"zip"},{"Address":"","City":"SUNNYVALE","Country":"US","Latitude":37.371991,"Longitude":-122.02602,"State":"CA","Zip":"94085","precision":"zip"}]]]) + +AT_BANNER([JSON -- pathological cases]) + +JSON_CHECK_NEGATIVE([trailing garbage], [[[1]null]], + [error: trailing garbage at end of input]) +JSON_CHECK_NEGATIVE([formfeeds are not valid white space], + [[[ ]]], [error: invalid character U+000c]) +JSON_CHECK_NEGATIVE([';' is not a valid token], + [;], [error: invalid character ';']) +JSON_CHECK_NEGATIVE([arrays nesting too deep], + [m4_for([i], [0], [1002], [1], [@<:@])dnl + m4_for([i], [0], [1002], [1], [@:>@])], + [error: input exceeds maximum nesting depth 1000]) +JSON_CHECK_NEGATIVE([objects nesting too deep], + [m4_for([i], [0], [1002], [1], [{"x":])dnl + m4_for([i], [0], [1002], [1], [}])], + [error: input exceeds maximum nesting depth 1000]) + +AT_SETUP([input may not be empty]) +AT_KEYWORDS([json negative]) +AT_CHECK([test-json /dev/null], [1], [error: empty input stream +]) +AT_CLEANUP + +AT_BANNER([JSON -- multiple inputs]) + +JSON_CHECK_POSITIVE([multiple adjacent objects], [[{}{}{}]], [[{} +{} +{}]], + [--multiple]) + +JSON_CHECK_POSITIVE([multiple space-separated objects], [[{} {} {}]], [[{} +{} +{}]], + [--multiple]) + +JSON_CHECK_POSITIVE([multiple objects on separate lines], [[{} +{} +{}]], [[{} +{} +{}]], + [--multiple]) + +JSON_CHECK_POSITIVE([multiple objects and arrays], [[{}[]{}[]]], [[{} +[] +{} +[]]], + [--multiple]) + +JSON_CHECK_NEGATIVE([garbage between multiple objects], [[{}x{}]], [[{} +error: invalid keyword 'x' +{}]], [--multiple]) + +JSON_CHECK_NEGATIVE([garbage after multiple objects], [[{}{}x]], [[{} +{} +error: invalid keyword 'x']], [--multiple]) diff --git a/tests/test-json.c b/tests/test-json.c new file mode 100644 index 00000000..bb9fadb0 --- /dev/null +++ b/tests/test-json.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "json.h" + +#include +#include +#include +#include + +#include "util.h" + +/* --pretty: If set, the JSON output is pretty-printed, instead of printed as + * compactly as possible. */ +static int pretty = 0; + +/* --multiple: If set, the input is a sequence of JSON objects or arrays, + * instead of exactly one object or array. */ +static int multiple = 0; + +static bool +print_and_free_json(struct json *json) +{ + bool ok; + if (json->type == JSON_STRING) { + printf("error: %s\n", json->u.string); + ok = false; + } else { + char *s = json_to_string(json, JSSF_SORT | (pretty ? JSSF_PRETTY : 0)); + puts(s); + free(s); + ok = true; + } + json_destroy(json); + return ok; +} + +static bool +refill(FILE *file, void *buffer, size_t buffer_size, size_t *n, size_t *used) +{ + *used = 0; + if (feof(file)) { + *n = 0; + return false; + } else { + *n = fread(buffer, 1, buffer_size, file); + if (ferror(file)) { + ovs_fatal(errno, "Error reading input file"); + } + return *n > 0; + } +} + +static bool +parse_multiple(const char *input_file) +{ + struct json_parser *parser; + char buffer[BUFSIZ]; + size_t n, used; + FILE *file; + bool ok; + + file = fopen(input_file, "r"); + if (!file) { + ovs_fatal(errno, "Cannot open \"%s\"", input_file); + } + + parser = NULL; + n = used = 0; + ok = true; + while (used < n || refill(file, buffer, sizeof buffer, &n, &used)) { + if (!parser && isspace((unsigned char) buffer[used])) { + /* Skip white space. */ + used++; + } else { + if (!parser) { + parser = json_parser_create(0); + } + + used = n - json_parser_feed(parser, &buffer[used], n - used); + if (used < n) { + if (!print_and_free_json(json_parser_finish(parser))) { + ok = false; + } + parser = NULL; + } + } + } + if (parser) { + if (!print_and_free_json(json_parser_finish(parser))) { + ok = false; + } + } + return ok; +} + +int +main(int argc, char *argv[]) +{ + const char *input_file; + bool ok; + + set_program_name(argv[0]); + + for (;;) { + static const struct option options[] = { + {"pretty", no_argument, &pretty, 1}, + {"multiple", no_argument, &multiple, 1}, + }; + int option_index = 0; + int c = getopt_long (argc, argv, "", options, &option_index); + + if (c == -1) { + break; + } + switch (c) { + case 0: + break; + + case '?': + exit(1); + + default: + abort(); + } + } + + if (argc - optind != 1) { + ovs_fatal(0, "usage: %s [--pretty] [--multiple] INPUT.json", + program_name); + } + + input_file = argv[optind]; + if (!strcmp(input_file, "-")) { + input_file = "/dev/stdin"; + } + + if (multiple) { + ok = parse_multiple(input_file); + } else { + ok = print_and_free_json(json_from_file(input_file)); + } + + return !ok; +} diff --git a/tests/testsuite.at b/tests/testsuite.at index b6083493..84f72939 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -19,6 +19,7 @@ AT_TESTED([ovs-vsctl]) m4_include([tests/lcov-pre.at]) m4_include([tests/library.at]) +m4_include([tests/json.at]) m4_include([tests/timeval.at]) m4_include([tests/lockfile.at]) m4_include([tests/stp.at]) -- 2.30.2