Implement \u{hex} support. (#3447)

A large rework because surrogate pairs must be combined.

Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg
2019-12-16 11:26:02 +01:00
committed by Dániel Bátyai
parent 1db16c3a1c
commit 40d930d62c
22 changed files with 765 additions and 370 deletions
+5 -6
View File
@@ -75,10 +75,8 @@ bool lit_char_is_line_terminator (ecma_char_t c);
#define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */
/* LIT_CHAR_BACKSLASH defined above */
bool lit_char_is_identifier_start (const uint8_t *src_p);
bool lit_char_is_identifier_part (const uint8_t *src_p);
bool lit_char_is_identifier_start_character (ecma_char_t chr);
bool lit_char_is_identifier_part_character (ecma_char_t chr);
bool lit_code_point_is_identifier_start (lit_code_point_t code_point);
bool lit_code_point_is_identifier_part (lit_code_point_t code_point);
/*
* Punctuator characters (ECMA-262 v5, 7.7)
@@ -215,8 +213,9 @@ bool lit_char_is_octal_digit (ecma_char_t c);
bool lit_char_is_decimal_digit (ecma_char_t c);
bool lit_char_is_hex_digit (ecma_char_t c);
uint32_t lit_char_hex_to_int (ecma_char_t c);
size_t lit_char_to_utf8_bytes (uint8_t *dst_p, ecma_char_t chr);
size_t lit_char_get_utf8_length (ecma_char_t chr);
size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);
/* read a hex encoded code point from a zero terminated buffer */
bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, lit_utf8_size_t number_of_characters,