Do not copy source string by JSON parser. (#1481)

The JSON parser required a zero terminated writable copy of
the original string. The requirement is eliminated from the
project to reduce peak memory consumption.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg
2016-12-13 08:36:01 +01:00
committed by GitHub
parent fb2818c137
commit 6904b9bd65
2 changed files with 191 additions and 92 deletions
@@ -29,6 +29,7 @@
#include "jrt.h"
#include "jrt-libc-includes.h"
#include "lit-char-helpers.h"
#include "lit-globals.h"
#ifndef CONFIG_DISABLE_JSON_BUILTIN
@@ -74,15 +75,15 @@ typedef enum
typedef struct
{
ecma_json_token_type_t type; /**< type of the current token */
lit_utf8_byte_t *current_p; /**< current position of the string processed by the parser */
const lit_utf8_byte_t *current_p; /**< current position of the string processed by the parser */
const lit_utf8_byte_t *end_p; /**< end of the string processed by the parser */
/**
* Fields depending on type.
*/
union
{
struct
{
const lit_utf8_byte_t *start_p; /**< when type is string_token, it contains the start of the string */
lit_utf8_size_t size; /**< when type is string_token, it contains the size of the string */
} string;
ecma_string_t *string_p; /**< when type is string_token it contains the string */
ecma_number_t number; /**< when type is number_token, it contains the value of the number */
} u;
} ecma_json_token_t;
@@ -93,19 +94,21 @@ typedef struct
* @return true if the match is successful
*/
static bool
ecma_builtin_json_check_id (lit_utf8_byte_t *string_p, /**< start position */
const char *id_p) /**< string identifier */
ecma_builtin_json_check_id (const lit_utf8_byte_t *string_p, /**< start position */
const lit_utf8_byte_t *end_p, /**< input end */
const char *string_id_p) /**< string identifier */
{
/*
* String comparison must not depend on lit_utf8_byte_t definition.
*/
JERRY_ASSERT (*string_p == *id_p);
JERRY_ASSERT (*string_p == *string_id_p);
do
string_p++;
string_id_p++;
while (string_p < end_p)
{
string_p++;
id_p++;
if (*id_p == LIT_CHAR_NULL)
if (*string_id_p == LIT_CHAR_NULL)
{
/* JSON lexer accepts input strings such as falsenull and
* returns with multiple tokens (false and null in this case).
@@ -116,10 +119,17 @@ ecma_builtin_json_check_id (lit_utf8_byte_t *string_p, /**< start position */
* type. */
return true;
}
}
while (*string_p == *id_p);
return false;
if (*string_p != *string_id_p)
{
return false;
}
string_p++;
string_id_p++;
}
return (*string_id_p == LIT_CHAR_NULL);
} /* ecma_builtin_json_check_id */
/**
@@ -128,51 +138,40 @@ ecma_builtin_json_check_id (lit_utf8_byte_t *string_p, /**< start position */
static void
ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument */
{
lit_utf8_byte_t *current_p = token_p->current_p;
lit_utf8_byte_t *write_p = current_p;
const lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *end_p = token_p->end_p;
bool has_escape_sequence = false;
lit_utf8_size_t buffer_size = 0;
token_p->u.string.start_p = current_p;
while (*current_p != LIT_CHAR_DOUBLE_QUOTE)
/* First step: syntax checking. */
while (true)
{
if (*current_p <= 0x1f)
if (current_p >= end_p || *current_p <= 0x1f)
{
return;
}
if (*current_p == LIT_CHAR_DOUBLE_QUOTE)
{
break;
}
if (*current_p == LIT_CHAR_BACKSLASH)
{
current_p++;
has_escape_sequence = true;
switch (*current_p)
{
case LIT_CHAR_DOUBLE_QUOTE:
case LIT_CHAR_SLASH:
case LIT_CHAR_BACKSLASH:
{
break;
}
case LIT_CHAR_LOWERCASE_B:
{
*current_p = LIT_CHAR_BS;
break;
}
case LIT_CHAR_LOWERCASE_F:
{
*current_p = LIT_CHAR_FF;
break;
}
case LIT_CHAR_LOWERCASE_N:
{
*current_p = LIT_CHAR_LF;
break;
}
case LIT_CHAR_LOWERCASE_R:
{
*current_p = LIT_CHAR_CR;
break;
}
case LIT_CHAR_LOWERCASE_T:
{
*current_p = LIT_CHAR_TAB;
break;
}
case LIT_CHAR_LOWERCASE_U:
@@ -185,7 +184,9 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
}
current_p += 5;
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
lit_utf8_byte_t char_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_UNIT];
buffer_size += lit_code_unit_to_utf8 (code_unit, char_buffer);
continue;
}
default:
@@ -194,12 +195,92 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
}
}
}
buffer_size++;
current_p++;
}
token_p->type = string_token;
if (!has_escape_sequence)
{
token_p->u.string_p = ecma_new_ecma_string_from_utf8 (token_p->current_p, buffer_size);
token_p->current_p = current_p + 1;
return;
}
JMEM_DEFINE_LOCAL_ARRAY (buffer_p, buffer_size, lit_utf8_byte_t);
lit_utf8_byte_t *write_p = buffer_p;
current_p = token_p->current_p;
while (*current_p != LIT_CHAR_DOUBLE_QUOTE)
{
if (*current_p == LIT_CHAR_BACKSLASH)
{
current_p++;
lit_utf8_byte_t special_character;
switch (*current_p)
{
case LIT_CHAR_LOWERCASE_B:
{
special_character = LIT_CHAR_BS;
break;
}
case LIT_CHAR_LOWERCASE_F:
{
special_character = LIT_CHAR_FF;
break;
}
case LIT_CHAR_LOWERCASE_N:
{
special_character = LIT_CHAR_LF;
break;
}
case LIT_CHAR_LOWERCASE_R:
{
special_character = LIT_CHAR_CR;
break;
}
case LIT_CHAR_LOWERCASE_T:
{
special_character = LIT_CHAR_TAB;
break;
}
case LIT_CHAR_LOWERCASE_U:
{
ecma_char_t code_unit;
lit_read_code_unit_from_hex (current_p + 1, 4, &code_unit);
current_p += 5;
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
continue;
}
default:
{
special_character = *current_p;
break;
}
}
*write_p++ = special_character;
current_p++;
continue;
}
*write_p++ = *current_p++;
}
token_p->u.string.size = (lit_utf8_size_t) (write_p - token_p->u.string.start_p);
JERRY_ASSERT (write_p == buffer_p + buffer_size);
token_p->u.string_p = ecma_new_ecma_string_from_utf8 (buffer_p, buffer_size);
JMEM_FINALIZE_LOCAL_ARRAY (buffer_p);
token_p->current_p = current_p + 1;
token_p->type = string_token;
} /* ecma_builtin_json_parse_string */
/**
@@ -208,18 +289,27 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
static void
ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument */
{
lit_utf8_byte_t *current_p = token_p->current_p;
lit_utf8_byte_t *start_p = current_p;
const lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *end_p = token_p->end_p;
const lit_utf8_byte_t *start_p = current_p;
JERRY_ASSERT (current_p < end_p);
if (*current_p == LIT_CHAR_MINUS)
{
current_p++;
}
if (current_p >= end_p)
{
return;
}
if (*current_p == LIT_CHAR_0)
{
current_p++;
if (lit_char_is_decimal_digit (*current_p))
if (current_p < end_p && lit_char_is_decimal_digit (*current_p))
{
return;
}
@@ -230,13 +320,14 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
{
current_p++;
}
while (lit_char_is_decimal_digit (*current_p));
while (current_p < end_p && lit_char_is_decimal_digit (*current_p));
}
if (*current_p == LIT_CHAR_DOT)
if (current_p < end_p && *current_p == LIT_CHAR_DOT)
{
current_p++;
if (!lit_char_is_decimal_digit (*current_p))
if (current_p >= end_p || !lit_char_is_decimal_digit (*current_p))
{
return;
}
@@ -245,18 +336,19 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
{
current_p++;
}
while (lit_char_is_decimal_digit (*current_p));
while (current_p < end_p && lit_char_is_decimal_digit (*current_p));
}
if (*current_p == LIT_CHAR_LOWERCASE_E || *current_p == LIT_CHAR_UPPERCASE_E)
if (current_p < end_p && (*current_p == LIT_CHAR_LOWERCASE_E || *current_p == LIT_CHAR_UPPERCASE_E))
{
current_p++;
if (*current_p == LIT_CHAR_PLUS || *current_p == LIT_CHAR_MINUS)
if (current_p < end_p && (*current_p == LIT_CHAR_PLUS || *current_p == LIT_CHAR_MINUS))
{
current_p++;
}
if (!lit_char_is_decimal_digit (*current_p))
if (current_p >= end_p || !lit_char_is_decimal_digit (*current_p))
{
return;
}
@@ -265,8 +357,9 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
{
current_p++;
}
while (lit_char_is_decimal_digit (*current_p));
while (current_p < end_p && lit_char_is_decimal_digit (*current_p));
}
token_p->type = number_token;
token_p->u.number = ecma_utf8_string_to_number (start_p, (lit_utf8_size_t) (current_p - start_p));
@@ -280,12 +373,14 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
* argument and advances the string pointer.
*/
static void
ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argument */
ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p, /**< token argument */
bool parse_string) /**< strings are allowed to parse */
{
lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *end_p = token_p->end_p;
token_p->type = invalid_token;
while (current_p < token_p->end_p
while (current_p < end_p
&& (*current_p == LIT_CHAR_SP
|| *current_p == LIT_CHAR_CR
|| *current_p == LIT_CHAR_LF
@@ -294,7 +389,7 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
current_p++;
}
if (current_p == token_p->end_p)
if (current_p == end_p)
{
token_p->type = end_token;
return;
@@ -334,13 +429,16 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
}
case LIT_CHAR_DOUBLE_QUOTE:
{
token_p->current_p = current_p + 1;
ecma_builtin_json_parse_string (token_p);
if (parse_string)
{
token_p->current_p = current_p + 1;
ecma_builtin_json_parse_string (token_p);
}
return;
}
case LIT_CHAR_LOWERCASE_N:
{
if (ecma_builtin_json_check_id (current_p, "null"))
if (ecma_builtin_json_check_id (current_p, token_p->end_p, "null"))
{
token_p->type = null_token;
token_p->current_p = current_p + 4;
@@ -350,7 +448,7 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
}
case LIT_CHAR_LOWERCASE_T:
{
if (ecma_builtin_json_check_id (current_p, "true"))
if (ecma_builtin_json_check_id (current_p, token_p->end_p, "true"))
{
token_p->type = true_token;
token_p->current_p = current_p + 4;
@@ -360,7 +458,7 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
}
case LIT_CHAR_LOWERCASE_F:
{
if (ecma_builtin_json_check_id (current_p, "false"))
if (ecma_builtin_json_check_id (current_p, token_p->end_p, "false"))
{
token_p->type = false_token;
token_p->current_p = current_p + 5;
@@ -391,24 +489,29 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
static bool
ecma_builtin_json_check_right_square_token (ecma_json_token_t *token_p) /**< token argument */
{
lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *end_p = token_p->end_p;
/*
* No need for end check since the string is zero terminated.
*/
while (*current_p == LIT_CHAR_SP || *current_p == LIT_CHAR_CR
|| *current_p == LIT_CHAR_LF || *current_p == LIT_CHAR_TAB)
while (current_p < end_p
&& (*current_p == LIT_CHAR_SP
|| *current_p == LIT_CHAR_CR
|| *current_p == LIT_CHAR_LF
|| *current_p == LIT_CHAR_TAB))
{
current_p++;
}
token_p->current_p = current_p;
if (*current_p == LIT_CHAR_RIGHT_SQUARE)
if (current_p < end_p && *current_p == LIT_CHAR_RIGHT_SQUARE)
{
token_p->current_p = current_p + 1;
return true;
}
return false;
} /* ecma_builtin_json_check_right_square_token */
@@ -444,7 +547,7 @@ ecma_builtin_json_define_value_property (ecma_object_t *obj_p, /**< this object
static ecma_value_t
ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument */
{
ecma_builtin_json_parse_next_token (token_p);
ecma_builtin_json_parse_next_token (token_p, true);
switch (token_p->type)
{
@@ -454,8 +557,7 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
}
case string_token:
{
ecma_string_t *string_p = ecma_new_ecma_string_from_utf8 (token_p->u.string.start_p, token_p->u.string.size);
return ecma_make_string_value (string_p);
return ecma_make_string_value (token_p->u.string_p);
}
case null_token:
{
@@ -476,7 +578,7 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
while (true)
{
ecma_builtin_json_parse_next_token (token_p);
ecma_builtin_json_parse_next_token (token_p, !parse_comma);
if (token_p->type == right_brace_token)
{
@@ -489,7 +591,8 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
{
break;
}
ecma_builtin_json_parse_next_token (token_p);
ecma_builtin_json_parse_next_token (token_p, true);
}
if (token_p->type != string_token)
@@ -497,12 +600,13 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
break;
}
const lit_utf8_byte_t *string_start_p = token_p->u.string.start_p;
lit_utf8_size_t string_size = token_p->u.string.size;
ecma_builtin_json_parse_next_token (token_p);
ecma_string_t *name_p = token_p->u.string_p;
ecma_builtin_json_parse_next_token (token_p, false);
if (token_p->type != colon_token)
{
ecma_deref_ecma_string (name_p);
break;
}
@@ -510,13 +614,14 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
if (ecma_is_value_undefined (value))
{
ecma_deref_ecma_string (name_p);
break;
}
ecma_string_t *name_p = ecma_new_ecma_string_from_utf8 (string_start_p, string_size);
ecma_builtin_json_define_value_property (object_p, name_p, value);
ecma_deref_ecma_string (name_p);
ecma_free_value (value);
parse_comma = true;
}
@@ -545,7 +650,8 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
if (parse_comma)
{
ecma_builtin_json_parse_next_token (token_p);
ecma_builtin_json_parse_next_token (token_p, false);
if (token_p->type != comma_token)
{
break;
@@ -707,15 +813,8 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
ret_value);
const ecma_string_t *string_p = ecma_get_string_from_value (string);
const ecma_length_t string_size = (ecma_length_t) ecma_string_get_size (string_p);
const lit_utf8_size_t buffer_size = sizeof (lit_utf8_byte_t) * (string_size + 1);
JMEM_DEFINE_LOCAL_ARRAY (str_start_p, buffer_size, lit_utf8_byte_t);
const lit_utf8_size_t sz = ecma_string_copy_to_utf8_buffer (string_p, str_start_p, buffer_size);
JERRY_ASSERT (sz == string_size);
str_start_p[string_size] = LIT_BYTE_NULL;
ECMA_STRING_TO_UTF8_STRING (string_p, str_start_p, string_size);
ecma_json_token_t token;
token.current_p = str_start_p;
@@ -725,7 +824,7 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
if (!ecma_is_value_undefined (final_result))
{
ecma_builtin_json_parse_next_token (&token);
ecma_builtin_json_parse_next_token (&token, false);
if (token.type != end_token)
{
@@ -736,7 +835,7 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
if (ecma_is_value_undefined (final_result))
{
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Could not parse JSON string."));
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("JSON string parse error."));
}
else
{
@@ -766,7 +865,7 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
}
}
JMEM_FINALIZE_LOCAL_ARRAY (str_start_p);
ECMA_FINALIZE_UTF8_STRING (str_start_p, string_size);
ECMA_FINALIZE (string);
return ret_value;