Implement \u{hex} support. (#3447)

A large rework because surrogate pairs must be combined.

Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg
2019-12-16 11:26:02 +01:00
committed by Dániel Bátyai
parent 1db16c3a1c
commit 40d930d62c
22 changed files with 765 additions and 370 deletions
+40 -12
View File
@@ -1553,31 +1553,59 @@ jerry_append_number_to_buffer (uint8_t *buffer_p, /**< buffer */
static bool
ecma_string_is_valid_identifier (const ecma_string_t *string_p)
{
bool result = false;
ECMA_STRING_TO_UTF8_STRING (string_p, str_buffer_p, str_buffer_size);
if (lit_char_is_identifier_start (str_buffer_p))
const uint8_t *str_p = str_buffer_p;
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
while (str_p < str_end_p)
{
const uint8_t *str_start_p = str_buffer_p;
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
lit_code_point_t code_point = *str_p;
lit_utf8_size_t utf8_length = 1;
result = true;
while (str_start_p < str_end_p)
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
{
if (!lit_char_is_identifier_part (str_start_p))
utf8_length = lit_read_code_point_from_utf8 (str_p,
(lit_utf8_size_t) (str_end_p - str_p),
&code_point);
#if ENABLED (JERRY_ES2015)
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
&& str_p + 3 < str_end_p)
{
lit_code_point_t low_surrogate;
lit_read_code_point_from_utf8 (str_p + 3,
(lit_utf8_size_t) (str_end_p - (str_p + 3)),
&low_surrogate);
if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
{
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
(ecma_char_t) low_surrogate);
utf8_length = 2 * 3;
}
}
#endif /* ENABLED (JERRY_ES2015) */
}
if (str_p == str_buffer_p)
{
if (!lit_code_point_is_identifier_start (code_point))
{
result = false;
break;
}
lit_utf8_incr (&str_start_p);
}
else if (!lit_code_point_is_identifier_part (code_point))
{
break;
}
str_p += utf8_length;
}
ECMA_FINALIZE_UTF8_STRING (str_buffer_p, str_buffer_size);
return result;
return str_p == str_end_p;
} /* ecma_string_is_valid_identifier */
#endif /* ENABLED (JERRY_SNAPSHOT_SAVE) */