Correctly handle celestial plane codepoints in ES5.1. (#3510)
Fixes #3498. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
committed by
Dániel Bátyai
parent
d6070a9fed
commit
0d7b461185
@@ -223,6 +223,8 @@ lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point
|
|||||||
/* TODO: detect these ranges correctly. */
|
/* TODO: detect these ranges correctly. */
|
||||||
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
||||||
}
|
}
|
||||||
|
#else /* !ENABLED (JERRY_ES2015) */
|
||||||
|
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
||||||
#endif /* ENABLED (JERRY_ES2015) */
|
#endif /* ENABLED (JERRY_ES2015) */
|
||||||
|
|
||||||
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
|
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
|
||||||
@@ -252,6 +254,8 @@ lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point
|
|||||||
/* TODO: detect these ranges correctly. */
|
/* TODO: detect these ranges correctly. */
|
||||||
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
||||||
}
|
}
|
||||||
|
#else /* !ENABLED (JERRY_ES2015) */
|
||||||
|
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
||||||
#endif /* ENABLED (JERRY_ES2015) */
|
#endif /* ENABLED (JERRY_ES2015) */
|
||||||
|
|
||||||
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
|
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
|
||||||
|
|||||||
@@ -693,12 +693,12 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
|
|||||||
|
|
||||||
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
|
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
|
||||||
{
|
{
|
||||||
|
#if ENABLED (JERRY_ES2015)
|
||||||
utf8_length = lit_read_code_point_from_utf8 (source_p,
|
utf8_length = lit_read_code_point_from_utf8 (source_p,
|
||||||
(lit_utf8_size_t) (source_end_p - source_p),
|
(lit_utf8_size_t) (source_end_p - source_p),
|
||||||
&code_point);
|
&code_point);
|
||||||
decoded_length = utf8_length;
|
decoded_length = utf8_length;
|
||||||
|
|
||||||
#if ENABLED (JERRY_ES2015)
|
|
||||||
/* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
|
/* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
|
||||||
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
|
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
|
||||||
&& source_p + 3 < source_end_p)
|
&& source_p + 3 < source_end_p)
|
||||||
@@ -717,11 +717,23 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
|
|||||||
char_count = 2;
|
char_count = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (source_p[0] >= LEXER_UTF8_4BYTE_START)
|
else if (source_p[0] >= LIT_UTF8_4_BYTE_MARKER)
|
||||||
{
|
{
|
||||||
decoded_length = 2 * 3;
|
decoded_length = 2 * 3;
|
||||||
has_escape = true;
|
has_escape = true;
|
||||||
}
|
}
|
||||||
|
#else /* !ENABLED (JERRY_ES2015) */
|
||||||
|
if (code_point < LIT_UTF8_4_BYTE_MARKER)
|
||||||
|
{
|
||||||
|
utf8_length = lit_read_code_point_from_utf8 (source_p,
|
||||||
|
(lit_utf8_size_t) (source_end_p - source_p),
|
||||||
|
&code_point);
|
||||||
|
decoded_length = utf8_length;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
code_point = 0;
|
||||||
|
}
|
||||||
#endif /* ENABLED (JERRY_ES2015) */
|
#endif /* ENABLED (JERRY_ES2015) */
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1091,7 +1103,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
|
|||||||
}
|
}
|
||||||
#endif /* ENABLED (JERRY_ES2015) */
|
#endif /* ENABLED (JERRY_ES2015) */
|
||||||
|
|
||||||
if (*source_p >= LEXER_UTF8_4BYTE_START)
|
if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
|
||||||
{
|
{
|
||||||
/* Processing 4 byte unicode sequence (even if it is
|
/* Processing 4 byte unicode sequence (even if it is
|
||||||
* after a backslash). Always converted to two 3 byte
|
* after a backslash). Always converted to two 3 byte
|
||||||
@@ -1893,7 +1905,7 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if ENABLED (JERRY_ES2015)
|
#if ENABLED (JERRY_ES2015)
|
||||||
if (*source_p >= LEXER_UTF8_4BYTE_START)
|
if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
|
||||||
{
|
{
|
||||||
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
|
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
|
||||||
|
|
||||||
@@ -2113,7 +2125,7 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
|
|||||||
}
|
}
|
||||||
#endif /* ENABLED (JERRY_ES2015) */
|
#endif /* ENABLED (JERRY_ES2015) */
|
||||||
|
|
||||||
if (*source_p >= LEXER_UTF8_4BYTE_START)
|
if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
|
||||||
{
|
{
|
||||||
/* Processing 4 byte unicode sequence (even if it is
|
/* Processing 4 byte unicode sequence (even if it is
|
||||||
* after a backslash). Always converted to two 3 byte
|
* after a backslash). Always converted to two 3 byte
|
||||||
@@ -3028,7 +3040,7 @@ lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier *
|
|||||||
|
|
||||||
escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
|
escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
|
||||||
}
|
}
|
||||||
else if (*left_p >= LEXER_UTF8_4BYTE_START)
|
else if (*left_p >= LIT_UTF8_4_BYTE_MARKER)
|
||||||
{
|
{
|
||||||
lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
|
lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
|
||||||
escape_size = 3 * 2;
|
escape_size = 3 * 2;
|
||||||
|
|||||||
@@ -201,7 +201,6 @@ typedef enum
|
|||||||
#define LEXER_NEWLINE_LS_PS_BYTE_1 0xe2
|
#define LEXER_NEWLINE_LS_PS_BYTE_1 0xe2
|
||||||
#define LEXER_NEWLINE_LS_PS_BYTE_23(source) \
|
#define LEXER_NEWLINE_LS_PS_BYTE_23(source) \
|
||||||
((source)[1] == LIT_UTF8_2_BYTE_CODE_POINT_MIN && ((source)[2] | 0x1) == 0xa9)
|
((source)[1] == LIT_UTF8_2_BYTE_CODE_POINT_MIN && ((source)[2] | 0x1) == 0xa9)
|
||||||
#define LEXER_UTF8_4BYTE_START 0xf0
|
|
||||||
|
|
||||||
#define LEXER_IS_LEFT_BRACKET(type) \
|
#define LEXER_IS_LEFT_BRACKET(type) \
|
||||||
((type) == LEXER_LEFT_BRACE || (type) == LEXER_LEFT_PAREN || (type) == LEXER_LEFT_SQUARE)
|
((type) == LEXER_LEFT_BRACE || (type) == LEXER_LEFT_PAREN || (type) == LEXER_LEFT_SQUARE)
|
||||||
|
|||||||
@@ -62,5 +62,17 @@ main (void)
|
|||||||
jerry_release_value (test_values[idx]);
|
jerry_release_value (test_values[idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char test_source[] = "\xF0\x9D\x84\x9E";
|
||||||
|
|
||||||
|
jerry_value_t result = jerry_parse (NULL,
|
||||||
|
0,
|
||||||
|
(const jerry_char_t *) test_source,
|
||||||
|
sizeof (test_source) - 1,
|
||||||
|
JERRY_PARSE_NO_OPTS);
|
||||||
|
TEST_ASSERT (jerry_value_is_error (result));
|
||||||
|
TEST_ASSERT (jerry_get_error_type (result) == JERRY_ERROR_SYNTAX);
|
||||||
|
|
||||||
|
jerry_release_value (result);
|
||||||
|
|
||||||
jerry_cleanup ();
|
jerry_cleanup ();
|
||||||
} /* main */
|
} /* main */
|
||||||
|
|||||||
Reference in New Issue
Block a user