Implement \u{hex} support. (#3447)

A large rework because surrogate pairs must be combined. Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
2019-12-16 11:26:02 +01:00
parent 1db16c3a1c
commit 40d930d62c
22 changed files with 765 additions and 370 deletions
@@ -75,10 +75,8 @@ bool lit_char_is_line_terminator (ecma_char_t c);
 #define LIT_CHAR_UNDERSCORE  ((ecma_char_t) '_')  /* low line (underscore) */
 /* LIT_CHAR_BACKSLASH defined above */

-bool lit_char_is_identifier_start (const uint8_t *src_p);
-bool lit_char_is_identifier_part (const uint8_t *src_p);
-bool lit_char_is_identifier_start_character (ecma_char_t chr);
-bool lit_char_is_identifier_part_character (ecma_char_t chr);
+bool lit_code_point_is_identifier_start (lit_code_point_t code_point);
+bool lit_code_point_is_identifier_part (lit_code_point_t code_point);

 /*
 * Punctuator characters (ECMA-262 v5, 7.7)
@@ -215,8 +213,9 @@ bool lit_char_is_octal_digit (ecma_char_t c);
 bool lit_char_is_decimal_digit (ecma_char_t c);
 bool lit_char_is_hex_digit (ecma_char_t c);
 uint32_t lit_char_hex_to_int (ecma_char_t c);
-size_t lit_char_to_utf8_bytes (uint8_t *dst_p, ecma_char_t chr);
-size_t lit_char_get_utf8_length (ecma_char_t chr);
+size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
+size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
+void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);

 /* read a hex encoded code point from a zero terminated buffer */
 bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, lit_utf8_size_t number_of_characters,