From 03308f6c2746a756a8404d00caa20f8f35248167 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 17 May 2010 17:50:01 -0300 Subject: [PATCH 01/10] json-lexer: Initialize 'x' and 'y' The 'lexer' variable is passed by the caller, it can contain anything (eg. garbage). Signed-off-by: Luiz Capitulino --- json-lexer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/json-lexer.c b/json-lexer.c index 9d649205a7..0b145d125c 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -275,6 +275,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) lexer->emit = func; lexer->state = IN_START; lexer->token = qstring_new(); + lexer->x = lexer->y = 0; } static int json_lexer_feed_char(JSONLexer *lexer, char ch) From 1041ba7a14260b490f3062f428b014b415a23f38 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Wed, 19 May 2010 16:57:28 -0300 Subject: [PATCH 02/10] json-lexer: Handle missing escapes The JSON escape sequence "\/" and "\\" are valid and should be handled. Signed-off-by: Luiz Capitulino --- json-lexer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/json-lexer.c b/json-lexer.c index 0b145d125c..5cc7e6c9a9 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -97,6 +97,8 @@ static const uint8_t json_lexer[][256] = { ['n'] = IN_DQ_STRING, ['r'] = IN_DQ_STRING, ['t'] = IN_DQ_STRING, + ['/'] = IN_DQ_STRING, + ['\\'] = IN_DQ_STRING, ['\''] = IN_DQ_STRING, ['\"'] = IN_DQ_STRING, ['u'] = IN_DQ_UCODE0, @@ -134,6 +136,8 @@ static const uint8_t json_lexer[][256] = { ['n'] = IN_SQ_STRING, ['r'] = IN_SQ_STRING, ['t'] = IN_SQ_STRING, + ['/'] = IN_DQ_STRING, + ['\\'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['\"'] = IN_SQ_STRING, ['u'] = IN_SQ_UCODE0, From bd0326950f99faa8e50cf52499dd1af42829aa93 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Wed, 19 May 2010 17:06:15 -0300 Subject: [PATCH 03/10] qjson: Handle "\f" It's valid JSON and should be handled. Signed-off-by: Luiz Capitulino --- json-parser.c | 4 ++++ qjson.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/json-parser.c b/json-parser.c index b55d76373e..83212bc65b 100644 --- a/json-parser.c +++ b/json-parser.c @@ -206,6 +206,10 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token qstring_append(str, "\b"); ptr++; break; + case 'f': + qstring_append(str, "\f"); + ptr++; + break; case 'n': qstring_append(str, "\n"); ptr++; diff --git a/qjson.c b/qjson.c index 483c6675db..e4ee433760 100644 --- a/qjson.c +++ b/qjson.c @@ -158,6 +158,9 @@ static void to_json(const QObject *obj, QString *str) case '\b': qstring_append(str, "\\b"); break; + case '\f': + qstring_append(str, "\\f"); + break; case '\n': qstring_append(str, "\\n"); break; From d22b0bd7fc85f991275ffc60a550ed42f4c1b04c Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Wed, 19 May 2010 17:08:37 -0300 Subject: [PATCH 04/10] check-qjson: Add more escape tests While there make the fail_unless() calls print error messages. IMPORTANT: The test for "\/" is failing, don't know why. Signed-off-by: Luiz Capitulino --- check-qjson.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/check-qjson.c b/check-qjson.c index 109e77753e..d3657998f7 100644 --- a/check-qjson.c +++ b/check-qjson.c @@ -29,6 +29,13 @@ START_TEST(escaped_string) const char *decoded; int skip; } test_cases[] = { + { "\"\\b\"", "\b" }, + { "\"\\f\"", "\f" }, + { "\"\\n\"", "\n" }, + { "\"\\r\"", "\r" }, + { "\"\\t\"", "\t" }, + { "\"\\/\"", "\\/" }, + { "\"\\\\\"", "\\" }, { "\"\\\"\"", "\"" }, { "\"hello world \\\"embedded string\\\"\"", "hello world \"embedded string\"" }, @@ -49,11 +56,14 @@ START_TEST(escaped_string) fail_unless(qobject_type(obj) == QTYPE_QSTRING); str = qobject_to_qstring(obj); - fail_unless(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0); + fail_unless(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0, + "%s != %s\n", qstring_get_str(str), test_cases[i].decoded); if (test_cases[i].skip == 0) { str = qobject_to_json(obj); - fail_unless(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0); + fail_unless(strcmp(qstring_get_str(str),test_cases[i].encoded) == 0, + "%s != %s\n", qstring_get_str(str), + test_cases[i].encoded); qobject_decref(obj); } From ecb50f5fefe7e1360818bd199218a295d87df042 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 17 May 2010 17:59:00 -0300 Subject: [PATCH 05/10] json-lexer: Drop 'buf' QString supports adding a single char, 'buf' is unneeded. Signed-off-by: Luiz Capitulino --- json-lexer.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/json-lexer.c b/json-lexer.c index 5cc7e6c9a9..1d9b81fef4 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -284,8 +284,6 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) static int json_lexer_feed_char(JSONLexer *lexer, char ch) { - char buf[2]; - lexer->x++; if (ch == '\n') { lexer->x = 0; @@ -313,10 +311,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch) break; } - buf[0] = ch; - buf[1] = 0; - - qstring_append(lexer->token, buf); + qstring_append_chr(lexer->token, ch); return 0; } From 2e89c0688993447ed62a1832dcd97c19c878a382 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Wed, 19 May 2010 17:17:05 -0300 Subject: [PATCH 06/10] json-streamer: Don't use qdict_put_obj() It's not needed, use qobject_put() instead and get a cleaner code. Signed-off-by: Luiz Capitulino --- json-streamer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/json-streamer.c b/json-streamer.c index 610ffea6db..f7e7a68d40 100644 --- a/json-streamer.c +++ b/json-streamer.c @@ -43,11 +43,11 @@ static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok } dict = qdict_new(); - qdict_put_obj(dict, "type", QOBJECT(qint_from_int(type))); + qdict_put(dict, "type", qint_from_int(type)); QINCREF(token); - qdict_put_obj(dict, "token", QOBJECT(token)); - qdict_put_obj(dict, "x", QOBJECT(qint_from_int(x))); - qdict_put_obj(dict, "y", QOBJECT(qint_from_int(y))); + qdict_put(dict, "token", token); + qdict_put(dict, "x", qint_from_int(x)); + qdict_put(dict, "y", qint_from_int(y)); qlist_append(parser->tokens, dict); From 7f8fca7c8add770d6533c44d2d001c0442ed0371 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 May 2010 09:39:51 +0200 Subject: [PATCH 07/10] add some tests for invalid JSON Signed-off-by: Paolo Bonzini Signed-off-by: Luiz Capitulino --- check-qjson.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/check-qjson.c b/check-qjson.c index d3657998f7..2e52450d12 100644 --- a/check-qjson.c +++ b/check-qjson.c @@ -638,11 +638,90 @@ START_TEST(simple_varargs) } END_TEST +START_TEST(empty_input) +{ + QObject *obj = qobject_from_json(""); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_string) +{ + QObject *obj = qobject_from_json("\"abc"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_sq_string) +{ + QObject *obj = qobject_from_json("'abc"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_escape) +{ + QObject *obj = qobject_from_json("\"abc\\\""); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_array) +{ + QObject *obj = qobject_from_json("[32"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_array_comma) +{ + QObject *obj = qobject_from_json("[32,"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(invalid_array_comma) +{ + QObject *obj = qobject_from_json("[32,}"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_dict) +{ + QObject *obj = qobject_from_json("{'abc':32"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_dict_comma) +{ + QObject *obj = qobject_from_json("{'abc':32,"); + fail_unless(obj == NULL); +} +END_TEST + +#if 0 +START_TEST(invalid_dict_comma) +{ + QObject *obj = qobject_from_json("{'abc':32,}"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_literal) +{ + QObject *obj = qobject_from_json("nul"); + fail_unless(obj == NULL); +} +END_TEST +#endif + static Suite *qjson_suite(void) { Suite *suite; TCase *string_literals, *number_literals, *keyword_literals; - TCase *dicts, *lists, *whitespace, *varargs; + TCase *dicts, *lists, *whitespace, *varargs, *errors; string_literals = tcase_create("String Literals"); tcase_add_test(string_literals, simple_string); @@ -668,6 +747,22 @@ static Suite *qjson_suite(void) varargs = tcase_create("Varargs"); tcase_add_test(varargs, simple_varargs); + errors = tcase_create("Invalid JSON"); + tcase_add_test(errors, empty_input); + tcase_add_test(errors, unterminated_string); + tcase_add_test(errors, unterminated_escape); + tcase_add_test(errors, unterminated_sq_string); + tcase_add_test(errors, unterminated_array); + tcase_add_test(errors, unterminated_array_comma); + tcase_add_test(errors, invalid_array_comma); + tcase_add_test(errors, unterminated_dict); + tcase_add_test(errors, unterminated_dict_comma); +#if 0 + /* FIXME: this print parse error messages on stderr. */ + tcase_add_test(errors, invalid_dict_comma); + tcase_add_test(errors, unterminated_literal); +#endif + suite = suite_create("QJSON test-suite"); suite_add_tcase(suite, string_literals); suite_add_tcase(suite, number_literals); @@ -676,6 +771,7 @@ static Suite *qjson_suite(void) suite_add_tcase(suite, lists); suite_add_tcase(suite, whitespace); suite_add_tcase(suite, varargs); + suite_add_tcase(suite, errors); return suite; } From f7c052747e4b139df16e1d5b7851f4729acc2bb7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 May 2010 09:39:52 +0200 Subject: [PATCH 08/10] implement optional lookahead in json lexer Not requiring one extra character when lookahead is not necessary ensures that clients behave properly even if they, for example, send QMP requests without a trailing newline. Signed-off-by: Paolo Bonzini Signed-off-by: Luiz Capitulino --- json-lexer.c | 58 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/json-lexer.c b/json-lexer.c index 1d9b81fef4..5ea64a75a7 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -65,6 +65,12 @@ enum json_lexer_state { #define TERMINAL(state) [0 ... 0x7F] = (state) +/* Return whether TERMINAL is a terminal state and the transition to it + from OLD_STATE required lookahead. This happens whenever the table + below uses the TERMINAL macro. */ +#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \ + (json_lexer[(old_state)][0] == (terminal)) + static const uint8_t json_lexer[][256] = { [IN_DONE_STRING] = { TERMINAL(JSON_STRING), @@ -284,35 +290,41 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) static int json_lexer_feed_char(JSONLexer *lexer, char ch) { + int char_consumed, new_state; + lexer->x++; if (ch == '\n') { lexer->x = 0; lexer->y++; } - lexer->state = json_lexer[lexer->state][(uint8_t)ch]; - - switch (lexer->state) { - case JSON_OPERATOR: - case JSON_ESCAPE: - case JSON_INTEGER: - case JSON_FLOAT: - case JSON_KEYWORD: - case JSON_STRING: - lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); - case JSON_SKIP: - lexer->state = json_lexer[IN_START][(uint8_t)ch]; - QDECREF(lexer->token); - lexer->token = qstring_new(); - break; - case ERROR: - return -EINVAL; - default: - break; - } - - qstring_append_chr(lexer->token, ch); + do { + new_state = json_lexer[lexer->state][(uint8_t)ch]; + char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); + if (char_consumed) { + qstring_append_chr(lexer->token, ch); + } + switch (new_state) { + case JSON_OPERATOR: + case JSON_ESCAPE: + case JSON_INTEGER: + case JSON_FLOAT: + case JSON_KEYWORD: + case JSON_STRING: + lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); + case JSON_SKIP: + QDECREF(lexer->token); + lexer->token = qstring_new(); + new_state = IN_START; + break; + case ERROR: + return -EINVAL; + default: + break; + } + lexer->state = new_state; + } while (!char_consumed); return 0; } @@ -334,7 +346,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) int json_lexer_flush(JSONLexer *lexer) { - return json_lexer_feed_char(lexer, 0); + return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0); } void json_lexer_destroy(JSONLexer *lexer) From 28e91a681a284b02b18cdbeee011430e5d061533 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 May 2010 09:39:53 +0200 Subject: [PATCH 09/10] remove unnecessary lookaheads Signed-off-by: Paolo Bonzini Signed-off-by: Luiz Capitulino --- json-lexer.c | 48 ++++++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/json-lexer.c b/json-lexer.c index 5ea64a75a7..c736f42900 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -29,7 +29,6 @@ enum json_lexer_state { ERROR = 0, - IN_DONE_STRING, IN_DQ_UCODE3, IN_DQ_UCODE2, IN_DQ_UCODE1, @@ -57,9 +56,7 @@ enum json_lexer_state { IN_ESCAPE_I, IN_ESCAPE_I6, IN_ESCAPE_I64, - IN_ESCAPE_DONE, IN_WHITESPACE, - IN_OPERATOR_DONE, IN_START, }; @@ -72,10 +69,6 @@ enum json_lexer_state { (json_lexer[(old_state)][0] == (terminal)) static const uint8_t json_lexer[][256] = { - [IN_DONE_STRING] = { - TERMINAL(JSON_STRING), - }, - /* double quote string */ [IN_DQ_UCODE3] = { ['0' ... '9'] = IN_DQ_STRING, @@ -112,7 +105,7 @@ static const uint8_t json_lexer[][256] = { [IN_DQ_STRING] = { [1 ... 0xFF] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, - ['"'] = IN_DONE_STRING, + ['"'] = JSON_STRING, }, /* single quote string */ @@ -151,7 +144,7 @@ static const uint8_t json_lexer[][256] = { [IN_SQ_STRING] = { [1 ... 0xFF] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, - ['\''] = IN_DONE_STRING, + ['\''] = JSON_STRING, }, /* Zero */ @@ -217,27 +210,18 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* operator */ - [IN_OPERATOR_DONE] = { - TERMINAL(JSON_OPERATOR), - }, - /* escape */ - [IN_ESCAPE_DONE] = { - TERMINAL(JSON_ESCAPE), - }, - [IN_ESCAPE_LL] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, }, [IN_ESCAPE_L] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, ['l'] = IN_ESCAPE_LL, }, [IN_ESCAPE_I64] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, }, [IN_ESCAPE_I6] = { @@ -249,11 +233,11 @@ static const uint8_t json_lexer[][256] = { }, [IN_ESCAPE] = { - ['d'] = IN_ESCAPE_DONE, - ['i'] = IN_ESCAPE_DONE, - ['p'] = IN_ESCAPE_DONE, - ['s'] = IN_ESCAPE_DONE, - ['f'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, + ['i'] = JSON_ESCAPE, + ['p'] = JSON_ESCAPE, + ['s'] = JSON_ESCAPE, + ['f'] = JSON_ESCAPE, ['l'] = IN_ESCAPE_L, ['I'] = IN_ESCAPE_I, }, @@ -265,12 +249,12 @@ static const uint8_t json_lexer[][256] = { ['0'] = IN_ZERO, ['1' ... '9'] = IN_NONZERO_NUMBER, ['-'] = IN_NEG_NONZERO_NUMBER, - ['{'] = IN_OPERATOR_DONE, - ['}'] = IN_OPERATOR_DONE, - ['['] = IN_OPERATOR_DONE, - [']'] = IN_OPERATOR_DONE, - [','] = IN_OPERATOR_DONE, - [':'] = IN_OPERATOR_DONE, + ['{'] = JSON_OPERATOR, + ['}'] = JSON_OPERATOR, + ['['] = JSON_OPERATOR, + [']'] = JSON_OPERATOR, + [','] = JSON_OPERATOR, + [':'] = JSON_OPERATOR, ['a' ... 'z'] = IN_KEYWORD, ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, From 02e95918b82efde24db1d759300ec3c61bd694b3 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 2 Jun 2010 09:06:03 +0200 Subject: [PATCH 10/10] hxtool: Fix line number reporting on SQMP/EQMP errors Signed-off-by: Jan Kiszka Signed-off-by: Luiz Capitulino --- hxtool | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hxtool b/hxtool index d499dc08ab..7ca83ed1ff 100644 --- a/hxtool +++ b/hxtool @@ -59,6 +59,7 @@ hxtoqmp() { IFS= flag=0 + line=1 while read -r str; do case "$str" in HXCOMM*) @@ -87,6 +88,7 @@ hxtoqmp() test $flag -eq 1 && echo "$str" ;; esac + line=$((line+1)) done }