Do not copy source string by JSON parser. (#1481)

The JSON parser required a zero terminated writable copy of
the original string. The requirement is eliminated from the
project to reduce peak memory consumption.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg
2016-12-13 08:36:01 +01:00
committed by GitHub
parent fb2818c137
commit 6904b9bd65
2 changed files with 191 additions and 92 deletions
@@ -29,6 +29,7 @@
#include "jrt.h" #include "jrt.h"
#include "jrt-libc-includes.h" #include "jrt-libc-includes.h"
#include "lit-char-helpers.h" #include "lit-char-helpers.h"
#include "lit-globals.h"
#ifndef CONFIG_DISABLE_JSON_BUILTIN #ifndef CONFIG_DISABLE_JSON_BUILTIN
@@ -74,15 +75,15 @@ typedef enum
typedef struct typedef struct
{ {
ecma_json_token_type_t type; /**< type of the current token */ ecma_json_token_type_t type; /**< type of the current token */
lit_utf8_byte_t *current_p; /**< current position of the string processed by the parser */ const lit_utf8_byte_t *current_p; /**< current position of the string processed by the parser */
const lit_utf8_byte_t *end_p; /**< end of the string processed by the parser */ const lit_utf8_byte_t *end_p; /**< end of the string processed by the parser */
/**
* Fields depending on type.
*/
union union
{ {
struct ecma_string_t *string_p; /**< when type is string_token it contains the string */
{
const lit_utf8_byte_t *start_p; /**< when type is string_token, it contains the start of the string */
lit_utf8_size_t size; /**< when type is string_token, it contains the size of the string */
} string;
ecma_number_t number; /**< when type is number_token, it contains the value of the number */ ecma_number_t number; /**< when type is number_token, it contains the value of the number */
} u; } u;
} ecma_json_token_t; } ecma_json_token_t;
@@ -93,19 +94,21 @@ typedef struct
* @return true if the match is successful * @return true if the match is successful
*/ */
static bool static bool
ecma_builtin_json_check_id (lit_utf8_byte_t *string_p, /**< start position */ ecma_builtin_json_check_id (const lit_utf8_byte_t *string_p, /**< start position */
const char *id_p) /**< string identifier */ const lit_utf8_byte_t *end_p, /**< input end */
const char *string_id_p) /**< string identifier */
{ {
/* /*
* String comparison must not depend on lit_utf8_byte_t definition. * String comparison must not depend on lit_utf8_byte_t definition.
*/ */
JERRY_ASSERT (*string_p == *id_p); JERRY_ASSERT (*string_p == *string_id_p);
do string_p++;
string_id_p++;
while (string_p < end_p)
{ {
string_p++; if (*string_id_p == LIT_CHAR_NULL)
id_p++;
if (*id_p == LIT_CHAR_NULL)
{ {
/* JSON lexer accepts input strings such as falsenull and /* JSON lexer accepts input strings such as falsenull and
* returns with multiple tokens (false and null in this case). * returns with multiple tokens (false and null in this case).
@@ -116,10 +119,17 @@ ecma_builtin_json_check_id (lit_utf8_byte_t *string_p, /**< start position */
* type. */ * type. */
return true; return true;
} }
}
while (*string_p == *id_p);
return false; if (*string_p != *string_id_p)
{
return false;
}
string_p++;
string_id_p++;
}
return (*string_id_p == LIT_CHAR_NULL);
} /* ecma_builtin_json_check_id */ } /* ecma_builtin_json_check_id */
/** /**
@@ -128,51 +138,40 @@ ecma_builtin_json_check_id (lit_utf8_byte_t *string_p, /**< start position */
static void static void
ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument */ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument */
{ {
lit_utf8_byte_t *current_p = token_p->current_p; const lit_utf8_byte_t *current_p = token_p->current_p;
lit_utf8_byte_t *write_p = current_p; const lit_utf8_byte_t *end_p = token_p->end_p;
bool has_escape_sequence = false;
lit_utf8_size_t buffer_size = 0;
token_p->u.string.start_p = current_p; /* First step: syntax checking. */
while (true)
while (*current_p != LIT_CHAR_DOUBLE_QUOTE)
{ {
if (*current_p <= 0x1f) if (current_p >= end_p || *current_p <= 0x1f)
{ {
return; return;
} }
if (*current_p == LIT_CHAR_DOUBLE_QUOTE)
{
break;
}
if (*current_p == LIT_CHAR_BACKSLASH) if (*current_p == LIT_CHAR_BACKSLASH)
{ {
current_p++; current_p++;
has_escape_sequence = true;
switch (*current_p) switch (*current_p)
{ {
case LIT_CHAR_DOUBLE_QUOTE: case LIT_CHAR_DOUBLE_QUOTE:
case LIT_CHAR_SLASH: case LIT_CHAR_SLASH:
case LIT_CHAR_BACKSLASH: case LIT_CHAR_BACKSLASH:
{
break;
}
case LIT_CHAR_LOWERCASE_B: case LIT_CHAR_LOWERCASE_B:
{
*current_p = LIT_CHAR_BS;
break;
}
case LIT_CHAR_LOWERCASE_F: case LIT_CHAR_LOWERCASE_F:
{
*current_p = LIT_CHAR_FF;
break;
}
case LIT_CHAR_LOWERCASE_N: case LIT_CHAR_LOWERCASE_N:
{
*current_p = LIT_CHAR_LF;
break;
}
case LIT_CHAR_LOWERCASE_R: case LIT_CHAR_LOWERCASE_R:
{
*current_p = LIT_CHAR_CR;
break;
}
case LIT_CHAR_LOWERCASE_T: case LIT_CHAR_LOWERCASE_T:
{ {
*current_p = LIT_CHAR_TAB;
break; break;
} }
case LIT_CHAR_LOWERCASE_U: case LIT_CHAR_LOWERCASE_U:
@@ -185,7 +184,9 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
} }
current_p += 5; current_p += 5;
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
lit_utf8_byte_t char_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_UNIT];
buffer_size += lit_code_unit_to_utf8 (code_unit, char_buffer);
continue; continue;
} }
default: default:
@@ -194,12 +195,92 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
} }
} }
} }
buffer_size++;
current_p++;
}
token_p->type = string_token;
if (!has_escape_sequence)
{
token_p->u.string_p = ecma_new_ecma_string_from_utf8 (token_p->current_p, buffer_size);
token_p->current_p = current_p + 1;
return;
}
JMEM_DEFINE_LOCAL_ARRAY (buffer_p, buffer_size, lit_utf8_byte_t);
lit_utf8_byte_t *write_p = buffer_p;
current_p = token_p->current_p;
while (*current_p != LIT_CHAR_DOUBLE_QUOTE)
{
if (*current_p == LIT_CHAR_BACKSLASH)
{
current_p++;
lit_utf8_byte_t special_character;
switch (*current_p)
{
case LIT_CHAR_LOWERCASE_B:
{
special_character = LIT_CHAR_BS;
break;
}
case LIT_CHAR_LOWERCASE_F:
{
special_character = LIT_CHAR_FF;
break;
}
case LIT_CHAR_LOWERCASE_N:
{
special_character = LIT_CHAR_LF;
break;
}
case LIT_CHAR_LOWERCASE_R:
{
special_character = LIT_CHAR_CR;
break;
}
case LIT_CHAR_LOWERCASE_T:
{
special_character = LIT_CHAR_TAB;
break;
}
case LIT_CHAR_LOWERCASE_U:
{
ecma_char_t code_unit;
lit_read_code_unit_from_hex (current_p + 1, 4, &code_unit);
current_p += 5;
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
continue;
}
default:
{
special_character = *current_p;
break;
}
}
*write_p++ = special_character;
current_p++;
continue;
}
*write_p++ = *current_p++; *write_p++ = *current_p++;
} }
token_p->u.string.size = (lit_utf8_size_t) (write_p - token_p->u.string.start_p); JERRY_ASSERT (write_p == buffer_p + buffer_size);
token_p->u.string_p = ecma_new_ecma_string_from_utf8 (buffer_p, buffer_size);
JMEM_FINALIZE_LOCAL_ARRAY (buffer_p);
token_p->current_p = current_p + 1; token_p->current_p = current_p + 1;
token_p->type = string_token;
} /* ecma_builtin_json_parse_string */ } /* ecma_builtin_json_parse_string */
/** /**
@@ -208,18 +289,27 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
static void static void
ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument */ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument */
{ {
lit_utf8_byte_t *current_p = token_p->current_p; const lit_utf8_byte_t *current_p = token_p->current_p;
lit_utf8_byte_t *start_p = current_p; const lit_utf8_byte_t *end_p = token_p->end_p;
const lit_utf8_byte_t *start_p = current_p;
JERRY_ASSERT (current_p < end_p);
if (*current_p == LIT_CHAR_MINUS) if (*current_p == LIT_CHAR_MINUS)
{ {
current_p++; current_p++;
} }
if (current_p >= end_p)
{
return;
}
if (*current_p == LIT_CHAR_0) if (*current_p == LIT_CHAR_0)
{ {
current_p++; current_p++;
if (lit_char_is_decimal_digit (*current_p))
if (current_p < end_p && lit_char_is_decimal_digit (*current_p))
{ {
return; return;
} }
@@ -230,13 +320,14 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
{ {
current_p++; current_p++;
} }
while (lit_char_is_decimal_digit (*current_p)); while (current_p < end_p && lit_char_is_decimal_digit (*current_p));
} }
if (*current_p == LIT_CHAR_DOT) if (current_p < end_p && *current_p == LIT_CHAR_DOT)
{ {
current_p++; current_p++;
if (!lit_char_is_decimal_digit (*current_p))
if (current_p >= end_p || !lit_char_is_decimal_digit (*current_p))
{ {
return; return;
} }
@@ -245,18 +336,19 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
{ {
current_p++; current_p++;
} }
while (lit_char_is_decimal_digit (*current_p)); while (current_p < end_p && lit_char_is_decimal_digit (*current_p));
} }
if (*current_p == LIT_CHAR_LOWERCASE_E || *current_p == LIT_CHAR_UPPERCASE_E) if (current_p < end_p && (*current_p == LIT_CHAR_LOWERCASE_E || *current_p == LIT_CHAR_UPPERCASE_E))
{ {
current_p++; current_p++;
if (*current_p == LIT_CHAR_PLUS || *current_p == LIT_CHAR_MINUS)
if (current_p < end_p && (*current_p == LIT_CHAR_PLUS || *current_p == LIT_CHAR_MINUS))
{ {
current_p++; current_p++;
} }
if (!lit_char_is_decimal_digit (*current_p)) if (current_p >= end_p || !lit_char_is_decimal_digit (*current_p))
{ {
return; return;
} }
@@ -265,8 +357,9 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
{ {
current_p++; current_p++;
} }
while (lit_char_is_decimal_digit (*current_p)); while (current_p < end_p && lit_char_is_decimal_digit (*current_p));
} }
token_p->type = number_token; token_p->type = number_token;
token_p->u.number = ecma_utf8_string_to_number (start_p, (lit_utf8_size_t) (current_p - start_p)); token_p->u.number = ecma_utf8_string_to_number (start_p, (lit_utf8_size_t) (current_p - start_p));
@@ -280,12 +373,14 @@ ecma_builtin_json_parse_number (ecma_json_token_t *token_p) /**< token argument
* argument and advances the string pointer. * argument and advances the string pointer.
*/ */
static void static void
ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argument */ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p, /**< token argument */
bool parse_string) /**< strings are allowed to parse */
{ {
lit_utf8_byte_t *current_p = token_p->current_p; const lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *end_p = token_p->end_p;
token_p->type = invalid_token; token_p->type = invalid_token;
while (current_p < token_p->end_p while (current_p < end_p
&& (*current_p == LIT_CHAR_SP && (*current_p == LIT_CHAR_SP
|| *current_p == LIT_CHAR_CR || *current_p == LIT_CHAR_CR
|| *current_p == LIT_CHAR_LF || *current_p == LIT_CHAR_LF
@@ -294,7 +389,7 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
current_p++; current_p++;
} }
if (current_p == token_p->end_p) if (current_p == end_p)
{ {
token_p->type = end_token; token_p->type = end_token;
return; return;
@@ -334,13 +429,16 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
} }
case LIT_CHAR_DOUBLE_QUOTE: case LIT_CHAR_DOUBLE_QUOTE:
{ {
token_p->current_p = current_p + 1; if (parse_string)
ecma_builtin_json_parse_string (token_p); {
token_p->current_p = current_p + 1;
ecma_builtin_json_parse_string (token_p);
}
return; return;
} }
case LIT_CHAR_LOWERCASE_N: case LIT_CHAR_LOWERCASE_N:
{ {
if (ecma_builtin_json_check_id (current_p, "null")) if (ecma_builtin_json_check_id (current_p, token_p->end_p, "null"))
{ {
token_p->type = null_token; token_p->type = null_token;
token_p->current_p = current_p + 4; token_p->current_p = current_p + 4;
@@ -350,7 +448,7 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
} }
case LIT_CHAR_LOWERCASE_T: case LIT_CHAR_LOWERCASE_T:
{ {
if (ecma_builtin_json_check_id (current_p, "true")) if (ecma_builtin_json_check_id (current_p, token_p->end_p, "true"))
{ {
token_p->type = true_token; token_p->type = true_token;
token_p->current_p = current_p + 4; token_p->current_p = current_p + 4;
@@ -360,7 +458,7 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
} }
case LIT_CHAR_LOWERCASE_F: case LIT_CHAR_LOWERCASE_F:
{ {
if (ecma_builtin_json_check_id (current_p, "false")) if (ecma_builtin_json_check_id (current_p, token_p->end_p, "false"))
{ {
token_p->type = false_token; token_p->type = false_token;
token_p->current_p = current_p + 5; token_p->current_p = current_p + 5;
@@ -391,24 +489,29 @@ ecma_builtin_json_parse_next_token (ecma_json_token_t *token_p) /**< token argum
static bool static bool
ecma_builtin_json_check_right_square_token (ecma_json_token_t *token_p) /**< token argument */ ecma_builtin_json_check_right_square_token (ecma_json_token_t *token_p) /**< token argument */
{ {
lit_utf8_byte_t *current_p = token_p->current_p; const lit_utf8_byte_t *current_p = token_p->current_p;
const lit_utf8_byte_t *end_p = token_p->end_p;
/* /*
* No need for end check since the string is zero terminated. * No need for end check since the string is zero terminated.
*/ */
while (*current_p == LIT_CHAR_SP || *current_p == LIT_CHAR_CR while (current_p < end_p
|| *current_p == LIT_CHAR_LF || *current_p == LIT_CHAR_TAB) && (*current_p == LIT_CHAR_SP
|| *current_p == LIT_CHAR_CR
|| *current_p == LIT_CHAR_LF
|| *current_p == LIT_CHAR_TAB))
{ {
current_p++; current_p++;
} }
token_p->current_p = current_p; token_p->current_p = current_p;
if (*current_p == LIT_CHAR_RIGHT_SQUARE) if (current_p < end_p && *current_p == LIT_CHAR_RIGHT_SQUARE)
{ {
token_p->current_p = current_p + 1; token_p->current_p = current_p + 1;
return true; return true;
} }
return false; return false;
} /* ecma_builtin_json_check_right_square_token */ } /* ecma_builtin_json_check_right_square_token */
@@ -444,7 +547,7 @@ ecma_builtin_json_define_value_property (ecma_object_t *obj_p, /**< this object
static ecma_value_t static ecma_value_t
ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument */ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument */
{ {
ecma_builtin_json_parse_next_token (token_p); ecma_builtin_json_parse_next_token (token_p, true);
switch (token_p->type) switch (token_p->type)
{ {
@@ -454,8 +557,7 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
} }
case string_token: case string_token:
{ {
ecma_string_t *string_p = ecma_new_ecma_string_from_utf8 (token_p->u.string.start_p, token_p->u.string.size); return ecma_make_string_value (token_p->u.string_p);
return ecma_make_string_value (string_p);
} }
case null_token: case null_token:
{ {
@@ -476,7 +578,7 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
while (true) while (true)
{ {
ecma_builtin_json_parse_next_token (token_p); ecma_builtin_json_parse_next_token (token_p, !parse_comma);
if (token_p->type == right_brace_token) if (token_p->type == right_brace_token)
{ {
@@ -489,7 +591,8 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
{ {
break; break;
} }
ecma_builtin_json_parse_next_token (token_p);
ecma_builtin_json_parse_next_token (token_p, true);
} }
if (token_p->type != string_token) if (token_p->type != string_token)
@@ -497,12 +600,13 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
break; break;
} }
const lit_utf8_byte_t *string_start_p = token_p->u.string.start_p; ecma_string_t *name_p = token_p->u.string_p;
lit_utf8_size_t string_size = token_p->u.string.size;
ecma_builtin_json_parse_next_token (token_p); ecma_builtin_json_parse_next_token (token_p, false);
if (token_p->type != colon_token) if (token_p->type != colon_token)
{ {
ecma_deref_ecma_string (name_p);
break; break;
} }
@@ -510,13 +614,14 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
if (ecma_is_value_undefined (value)) if (ecma_is_value_undefined (value))
{ {
ecma_deref_ecma_string (name_p);
break; break;
} }
ecma_string_t *name_p = ecma_new_ecma_string_from_utf8 (string_start_p, string_size);
ecma_builtin_json_define_value_property (object_p, name_p, value); ecma_builtin_json_define_value_property (object_p, name_p, value);
ecma_deref_ecma_string (name_p); ecma_deref_ecma_string (name_p);
ecma_free_value (value); ecma_free_value (value);
parse_comma = true; parse_comma = true;
} }
@@ -545,7 +650,8 @@ ecma_builtin_json_parse_value (ecma_json_token_t *token_p) /**< token argument *
if (parse_comma) if (parse_comma)
{ {
ecma_builtin_json_parse_next_token (token_p); ecma_builtin_json_parse_next_token (token_p, false);
if (token_p->type != comma_token) if (token_p->type != comma_token)
{ {
break; break;
@@ -707,15 +813,8 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
ret_value); ret_value);
const ecma_string_t *string_p = ecma_get_string_from_value (string); const ecma_string_t *string_p = ecma_get_string_from_value (string);
const ecma_length_t string_size = (ecma_length_t) ecma_string_get_size (string_p);
const lit_utf8_size_t buffer_size = sizeof (lit_utf8_byte_t) * (string_size + 1);
JMEM_DEFINE_LOCAL_ARRAY (str_start_p, buffer_size, lit_utf8_byte_t); ECMA_STRING_TO_UTF8_STRING (string_p, str_start_p, string_size);
const lit_utf8_size_t sz = ecma_string_copy_to_utf8_buffer (string_p, str_start_p, buffer_size);
JERRY_ASSERT (sz == string_size);
str_start_p[string_size] = LIT_BYTE_NULL;
ecma_json_token_t token; ecma_json_token_t token;
token.current_p = str_start_p; token.current_p = str_start_p;
@@ -725,7 +824,7 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
if (!ecma_is_value_undefined (final_result)) if (!ecma_is_value_undefined (final_result))
{ {
ecma_builtin_json_parse_next_token (&token); ecma_builtin_json_parse_next_token (&token, false);
if (token.type != end_token) if (token.type != end_token)
{ {
@@ -736,7 +835,7 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
if (ecma_is_value_undefined (final_result)) if (ecma_is_value_undefined (final_result))
{ {
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Could not parse JSON string.")); ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("JSON string parse error."));
} }
else else
{ {
@@ -766,7 +865,7 @@ ecma_builtin_json_parse (ecma_value_t this_arg, /**< 'this' argument */
} }
} }
JMEM_FINALIZE_LOCAL_ARRAY (str_start_p); ECMA_FINALIZE_UTF8_STRING (str_start_p, string_size);
ECMA_FINALIZE (string); ECMA_FINALIZE (string);
return ret_value; return ret_value;
+2 -2
View File
@@ -646,8 +646,8 @@ lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte)
*/ */
lit_utf8_size_t lit_utf8_size_t
lit_code_unit_to_utf8 (ecma_char_t code_unit, /**< code unit */ lit_code_unit_to_utf8 (ecma_char_t code_unit, /**< code unit */
lit_utf8_byte_t *buf_p) /**< buffer where to store the result, lit_utf8_byte_t *buf_p) /**< buffer where to store the result and its size
* its size should be at least MAX_BYTES_IN_CODE_UNIT */ * should be at least LIT_UTF8_MAX_BYTES_IN_CODE_UNIT */
{ {
if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{ {