diff --git a/jerry-core/ecma/base/ecma-helpers-string.c b/jerry-core/ecma/base/ecma-helpers-string.c index d034c105a..0fefdc1e7 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.c +++ b/jerry-core/ecma/base/ecma-helpers-string.c @@ -335,6 +335,33 @@ ecma_find_special_string (const lit_utf8_byte_t *string_p, /**< utf8 string */ return NULL; } /* ecma_find_special_string */ +/** + * Allocate new ecma-string and fill it with characters from ascii characters + * + * @return pointer to ecma-string descriptor + */ +ecma_string_t * +ecma_new_ecma_string_from_ascii (const lit_utf8_byte_t *string_p, /**< ascii string */ + lit_utf8_size_t string_size) /**< string size */ +{ + JERRY_ASSERT (string_p != NULL || string_size == 0); + + ecma_string_t *string_desc_p = ecma_find_special_string (string_p, string_size); + + if (string_desc_p != NULL) + { + return string_desc_p; + } + + lit_utf8_byte_t *data_p; + string_desc_p = ecma_new_ecma_string_from_utf8_buffer (string_size, string_size, &data_p); + + string_desc_p->u.hash = lit_utf8_string_calc_hash (string_p, string_size); + memcpy (data_p, string_p, string_size); + + return string_desc_p; +} /* ecma_new_ecma_string_from_ascii */ + /** * Allocate new ecma-string and fill it with characters from the utf8 string * @@ -2449,8 +2476,7 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin if (string_length == buffer_size) { - ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p + start_pos, - (lit_utf8_size_t) end_pos); + ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p + start_pos, (lit_utf8_size_t) end_pos); } else { diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index ec189759e..a4349536b 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -299,7 +299,10 @@ ecma_length_t ecma_op_advance_string_index (ecma_string_t *str_p, ecma_length_t ecma_string_t *ecma_new_map_key_string (ecma_value_t value); bool ecma_prop_name_is_map_key (ecma_string_t *string_p); #endif /* JERRY_BUILTIN_CONTAINER */ -ecma_string_t *ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size); +ecma_string_t *ecma_new_ecma_string_from_ascii (const lit_utf8_byte_t *string_p, + lit_utf8_size_t string_size); +ecma_string_t *ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, + lit_utf8_size_t string_size); ecma_string_t *ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size); ecma_string_t *ecma_new_ecma_external_string_from_cesu8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size, diff --git a/jerry-core/ecma/base/ecma-literal-storage.c b/jerry-core/ecma/base/ecma-literal-storage.c index 934a35de5..37fd9bb49 100644 --- a/jerry-core/ecma/base/ecma-literal-storage.c +++ b/jerry-core/ecma/base/ecma-literal-storage.c @@ -165,9 +165,11 @@ ecma_finalize_lit_storage (void) */ ecma_value_t ecma_find_or_create_literal_string (const lit_utf8_byte_t *chars_p, /**< string to be searched */ - lit_utf8_size_t size) /**< size of the string */ + lit_utf8_size_t size, /**< size of the string */ + bool is_ascii) /**< encode of the string */ { - ecma_string_t *string_p = ecma_new_ecma_string_from_utf8 (chars_p, size); + ecma_string_t *string_p = (is_ascii ? ecma_new_ecma_string_from_ascii (chars_p, size) + : ecma_new_ecma_string_from_utf8 (chars_p, size)); if (ECMA_IS_DIRECT_STRING (string_p)) { @@ -702,7 +704,7 @@ ecma_snapshot_get_literal (const uint8_t *literal_base_p, /**< literal start */ uint16_t length = *(const uint16_t *) literal_p; - return ecma_find_or_create_literal_string (literal_p + sizeof (uint16_t), length); + return ecma_find_or_create_literal_string (literal_p + sizeof (uint16_t), length, false); } /* ecma_snapshot_get_literal */ /** diff --git a/jerry-core/ecma/base/ecma-literal-storage.h b/jerry-core/ecma/base/ecma-literal-storage.h index 4080e51f2..d9cf6b28d 100644 --- a/jerry-core/ecma/base/ecma-literal-storage.h +++ b/jerry-core/ecma/base/ecma-literal-storage.h @@ -40,7 +40,7 @@ typedef struct void ecma_finalize_lit_storage (void); -ecma_value_t ecma_find_or_create_literal_string (const lit_utf8_byte_t *chars_p, lit_utf8_size_t size); +ecma_value_t ecma_find_or_create_literal_string (const lit_utf8_byte_t *chars_p, lit_utf8_size_t size, bool is_ascii); ecma_value_t ecma_find_or_create_literal_number (ecma_number_t number_arg); #if JERRY_BUILTIN_BIGINT ecma_value_t ecma_find_or_create_literal_bigint (ecma_value_t bigint); diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c index 9d3a1529c..36a96923b 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c @@ -680,7 +680,7 @@ ecma_date_to_string_format (ecma_number_t datetime_number, /**< datetime */ JERRY_ASSERT (dest_p <= date_buffer + date_buffer_length); - return ecma_make_string_value (ecma_new_ecma_string_from_utf8 (date_buffer, + return ecma_make_string_value (ecma_new_ecma_string_from_ascii (date_buffer, (lit_utf8_size_t) (dest_p - date_buffer))); } /* ecma_date_to_string_format */ diff --git a/jerry-core/ecma/operations/ecma-bigint.c b/jerry-core/ecma/operations/ecma-bigint.c index 6d2a2e275..de11b8159 100644 --- a/jerry-core/ecma/operations/ecma-bigint.c +++ b/jerry-core/ecma/operations/ecma-bigint.c @@ -250,7 +250,7 @@ ecma_bigint_to_string (ecma_value_t value, /**< BigInt value */ } ecma_string_t *string_p; - string_p = ecma_new_ecma_string_from_utf8 (string_buffer_p + char_start_p, char_size_p - char_start_p); + string_p = ecma_new_ecma_string_from_ascii (string_buffer_p + char_start_p, char_size_p - char_start_p); jmem_heap_free_block (string_buffer_p, char_size_p); return string_p; diff --git a/jerry-core/parser/js/common.h b/jerry-core/parser/js/common.h index 9baffab97..05775f048 100644 --- a/jerry-core/parser/js/common.h +++ b/jerry-core/parser/js/common.h @@ -74,8 +74,9 @@ typedef enum LEXER_FLAG_SOURCE_PTR = (1 << 2), /**< the literal is directly referenced in the source code * (no need to allocate memory) */ LEXER_FLAG_LATE_INIT = (1 << 3), /**< initialize this variable after the byte code is freed */ + LEXER_FLAG_ASCII = (1 << 4), /**< the literal contains only ascii characters */ #if JERRY_ESNEXT - LEXER_FLAG_GLOBAL = (1 << 4), /**< this local identifier is not a let or const declaration */ + LEXER_FLAG_GLOBAL = (1 << 5), /**< this local identifier is not a let or const declaration */ #endif /* JERRY_ESNEXT */ } lexer_literal_status_flags_t; diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c index ef1a0c8b6..fe8d5a747 100644 --- a/jerry-core/parser/js/js-lexer.c +++ b/jerry-core/parser/js/js-lexer.c @@ -642,7 +642,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ parser_line_counter_t column = context_p->column; const uint8_t *source_end_p = context_p->source_end_p; size_t length = 0; - uint8_t has_escape = false; + lexer_lit_location_flags_t status_flags = LEXER_LIT_LOCATION_IS_ASCII; do { @@ -657,7 +657,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ return true; } - has_escape = true; + status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE; #if JERRY_ESNEXT if (source_p + 5 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U) @@ -711,6 +711,8 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER)) { + status_flags &= (uint32_t) ~LEXER_LIT_LOCATION_IS_ASCII; + #if JERRY_ESNEXT utf8_length = lit_read_code_point_from_utf8 (source_p, (lit_utf8_size_t) (source_end_p - source_p), @@ -738,7 +740,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ else if (source_p[0] >= LIT_UTF8_4_BYTE_MARKER) { decoded_length = 2 * 3; - has_escape = true; + status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE; } #else /* !JERRY_ESNEXT */ if (code_point < LIT_UTF8_4_BYTE_MARKER) @@ -789,7 +791,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ context_p->token.type = LEXER_LITERAL; context_p->token.lit_location.type = LEXER_IDENT_LITERAL; - context_p->token.lit_location.has_escape = has_escape; + context_p->token.lit_location.status_flags = (uint8_t) status_flags; context_p->token.column = context_p->column; context_p->token.lit_location.char_p = context_p->source_p; @@ -807,7 +809,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ const uint8_t *ident_start_p = context_p->source_p; uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH]; - if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape)) + if (JERRY_UNLIKELY (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { lexer_convert_ident_to_cesu8 (buffer_p, ident_start_p, (prop_length_t) length); ident_start_p = buffer_p; @@ -953,7 +955,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ parser_line_counter_t original_line = line; parser_line_counter_t original_column = column; size_t length = 0; - uint8_t has_escape = false; + lexer_lit_location_flags_t status_flags = LEXER_LIT_LOCATION_IS_ASCII; #if JERRY_ESNEXT if (str_end_character == LIT_CHAR_RIGHT_BRACE) @@ -986,7 +988,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ continue; } - has_escape = true; + status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE; /* Newline is ignored. */ if (*source_p == LIT_CHAR_CR) @@ -1163,7 +1165,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ * after a backslash). Always converted to two 3 byte * long sequence. */ length += 2 * 3; - has_escape = true; + status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE; source_p += 4; #if JERRY_ESNEXT raw_length_adjust += 2; @@ -1192,7 +1194,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ Note: ECMAScript v6, 11.8.6.1 or are both normalized to */ if (*source_p == LIT_CHAR_CR) { - has_escape = true; + status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE; source_p++; length++; if (source_p < source_end_p @@ -1261,7 +1263,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ context_p->token.lit_location.char_p = string_start_p; context_p->token.lit_location.length = (prop_length_t) length; context_p->token.lit_location.type = LEXER_STRING_LITERAL; - context_p->token.lit_location.has_escape = has_escape; + context_p->token.lit_location.status_flags = (uint8_t) status_flags; context_p->source_p = source_p + 1; context_p->line = line; @@ -1328,7 +1330,7 @@ lexer_parse_number (parser_context_t *context_p) /**< context */ context_p->token.extra_value = LEXER_NUMBER_DECIMAL; context_p->token.lit_location.char_p = source_p; context_p->token.lit_location.type = LEXER_NUMBER_LITERAL; - context_p->token.lit_location.has_escape = false; + context_p->token.lit_location.status_flags = LEXER_LIT_LOCATION_IS_ASCII; if (source_p[0] == LIT_CHAR_0 && source_p + 1 < source_end_p) @@ -2240,7 +2242,7 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */ { JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL); - if (!literal_p->has_escape) + if (!(literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { return literal_p->char_p; } @@ -2601,6 +2603,11 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */ status_flags |= LEXER_FLAG_USED; } + if (lit_location_p->status_flags & LEXER_LIT_LOCATION_IS_ASCII) + { + literal_p->status_flags |= LEXER_FLAG_ASCII; + } + literal_p->status_flags = status_flags; context_p->lit_object.literal_p = literal_p; @@ -3490,7 +3497,7 @@ lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, /**< lef return false; } - if (!left_p->has_escape) + if (!(left_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { return memcmp (left_p->char_p, right_p, size) == 0; } @@ -3518,12 +3525,12 @@ lexer_compare_identifiers (parser_context_t *context_p, /**< context */ return false; } - if (!left_p->has_escape) + if (!(left_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { return lexer_compare_identifier_to_chars (right_p->char_p, left_p->char_p, length); } - if (!right_p->has_escape) + if (!(right_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { return lexer_compare_identifier_to_chars (left_p->char_p, right_p->char_p, length); } @@ -3568,7 +3575,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */ return false; } - if (!left_ident_p->has_escape && !right_ident_p->has_escape) + if (!((left_ident_p->status_flags | right_ident_p->status_flags) & LEXER_LIT_LOCATION_HAS_ESCAPE)) { return memcmp (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length) == 0; } @@ -3591,7 +3598,7 @@ lexer_string_is_use_strict (parser_context_t *context_p) /**< context */ && context_p->token.lit_location.type == LEXER_STRING_LITERAL); return (context_p->token.lit_location.length == 10 - && !context_p->token.lit_location.has_escape + && !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE) && memcmp (context_p->token.lit_location.char_p, "use strict", 10) == 0); } /* lexer_string_is_use_strict */ @@ -3649,7 +3656,7 @@ lexer_token_is_let (parser_context_t *context_p) /**< context */ JERRY_ASSERT (context_p->token.type == LEXER_LITERAL); return (context_p->token.keyword_type == LEXER_KEYW_LET - && !context_p->token.lit_location.has_escape); + && !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)); } /* lexer_token_is_let */ /** @@ -3667,7 +3674,7 @@ lexer_token_is_async (parser_context_t *context_p) /**< context */ || context_p->token.type == LEXER_TEMPLATE_LITERAL); return (context_p->token.keyword_type == LEXER_KEYW_ASYNC - && !context_p->token.lit_location.has_escape); + && !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)); } /* lexer_token_is_async */ #endif /* JERRY_ESNEXT */ diff --git a/jerry-core/parser/js/js-lexer.h b/jerry-core/parser/js/js-lexer.h index 85c87f28e..e9f948cb5 100644 --- a/jerry-core/parser/js/js-lexer.h +++ b/jerry-core/parser/js/js-lexer.h @@ -305,6 +305,16 @@ typedef enum #endif /* JERRY_BUILTIN_BIGINT */ } lexer_number_type_t; +/** + * Lexer literal flags. + **/ +typedef enum +{ + LEXER_LIT_LOCATION_NO_OPTS = 0, /**< no options */ + LEXER_LIT_LOCATION_HAS_ESCAPE = (1 << 0), /**< binding has escape */ + LEXER_LIT_LOCATION_IS_ASCII = (1 << 1), /**< all characters are ascii characters */ +} lexer_lit_location_flags_t; + /** * Lexer character (string / identifier) literal data. */ @@ -313,7 +323,7 @@ typedef struct const uint8_t *char_p; /**< start of identifier or string token */ prop_length_t length; /**< length or index of a literal */ uint8_t type; /**< type of the current literal */ - uint8_t has_escape; /**< has escape sequences */ + uint8_t status_flags; /**< any combination of lexer_lit_location_flags_t status bits */ } lexer_lit_location_t; /** diff --git a/jerry-core/parser/js/js-parser-expr.c b/jerry-core/parser/js/js-parser-expr.c index e6570b835..0088b20fe 100644 --- a/jerry-core/parser/js/js-parser-expr.c +++ b/jerry-core/parser/js/js-parser-expr.c @@ -1922,7 +1922,7 @@ parser_parse_unary_expression (parser_context_t *context_p, /**< context */ } #endif /* JERRY_MODULE_SYSTEM */ - if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape)) + if (JERRY_UNLIKELY (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD); } @@ -2281,7 +2281,7 @@ parser_parse_unary_expression (parser_context_t *context_p, /**< context */ JERRY_ASSERT ((context_p->status_flags & PARSER_IS_GENERATOR_FUNCTION) && !(context_p->status_flags & PARSER_DISALLOW_AWAIT_YIELD)); - if (context_p->token.lit_location.has_escape) + if (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE) { parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD); } diff --git a/jerry-core/parser/js/js-parser-internal.h b/jerry-core/parser/js/js-parser-internal.h index be9d99b6c..237051d82 100644 --- a/jerry-core/parser/js/js-parser-internal.h +++ b/jerry-core/parser/js/js-parser-internal.h @@ -724,6 +724,7 @@ void parser_emit_cbc_forward_branch (parser_context_t *context_p, uint16_t opcod parser_branch_node_t *parser_emit_cbc_forward_branch_item (parser_context_t *context_p, uint16_t opcode, parser_branch_node_t *next_p); void parser_emit_cbc_backward_branch (parser_context_t *context_p, uint16_t opcode, uint32_t offset); +ecma_string_t *parser_new_ecma_string_from_literal (lexer_literal_t *literal_p); void parser_set_branch_to_current_position (parser_context_t *context_p, parser_branch_t *branch_p); void parser_set_breaks_to_current_position (parser_context_t *context_p, parser_branch_node_t *current_p); void parser_set_continues_to_current_position (parser_context_t *context_p, parser_branch_node_t *current_p); diff --git a/jerry-core/parser/js/js-parser-module.c b/jerry-core/parser/js/js-parser-module.c index 31ccffb4b..3a7189e44 100644 --- a/jerry-core/parser/js/js-parser-module.c +++ b/jerry-core/parser/js/js-parser-module.c @@ -30,7 +30,7 @@ */ const lexer_lit_location_t lexer_default_literal = { - (const uint8_t *) "*default*", 9, LEXER_IDENT_LITERAL, false + (const uint8_t *) "*default*", 9, LEXER_IDENT_LITERAL, LEXER_LIT_LOCATION_IS_ASCII }; /** @@ -89,9 +89,7 @@ parser_module_append_export_name (parser_context_t *context_p) /**< parser conte } context_p->module_identifier_lit_p = context_p->lit_object.literal_p; - - ecma_string_t *name_p = ecma_new_ecma_string_from_utf8 (context_p->lit_object.literal_p->u.char_p, - context_p->lit_object.literal_p->prop.length); + ecma_string_t *name_p = parser_new_ecma_string_from_literal (context_p->lit_object.literal_p); if (parser_module_check_duplicate_export (context_p, name_p)) { @@ -250,13 +248,11 @@ parser_module_parse_export_clause (parser_context_t *context_p) /**< parser cont lexer_next_token (context_p); } - lexer_literal_t *literal_p = PARSER_GET_LITERAL (local_name_index); - local_name_p = ecma_new_ecma_string_from_utf8 (literal_p->u.char_p, literal_p->prop.length); + local_name_p = parser_new_ecma_string_from_literal (PARSER_GET_LITERAL (local_name_index)); if (export_name_index != PARSER_MAXIMUM_NUMBER_OF_LITERALS) { - lexer_literal_t *as_literal_p = PARSER_GET_LITERAL (export_name_index); - export_name_p = ecma_new_ecma_string_from_utf8 (as_literal_p->u.char_p, as_literal_p->prop.length); + export_name_p = parser_new_ecma_string_from_literal (PARSER_GET_LITERAL (export_name_index)); } else { @@ -353,13 +349,11 @@ parser_module_parse_import_clause (parser_context_t *context_p) /**< parser cont lexer_next_token (context_p); } - lexer_literal_t *literal_p = PARSER_GET_LITERAL (import_name_index); - import_name_p = ecma_new_ecma_string_from_utf8 (literal_p->u.char_p, literal_p->prop.length); + import_name_p = parser_new_ecma_string_from_literal (PARSER_GET_LITERAL (import_name_index)); if (local_name_index != PARSER_MAXIMUM_NUMBER_OF_LITERALS) { - lexer_literal_t *as_literal_p = PARSER_GET_LITERAL (local_name_index); - local_name_p = ecma_new_ecma_string_from_utf8 (as_literal_p->u.char_p, as_literal_p->prop.length); + local_name_p = parser_new_ecma_string_from_literal (PARSER_GET_LITERAL (local_name_index)); } else { @@ -460,7 +454,7 @@ parser_module_handle_module_specifier (parser_context_t *context_p, /**< parser lexer_next_token (context_p); /* The lexer_next_token may throw an error, so the path is constructed after its call. */ - ecma_string_t *path_string_p = ecma_new_ecma_string_from_utf8 (path_p->u.char_p, path_p->prop.length); + ecma_string_t *path_string_p = parser_new_ecma_string_from_literal (path_p); ecma_module_node_t *node_p = JERRY_CONTEXT (module_current_p)->imports_p; ecma_module_node_t *last_node_p = NULL; diff --git a/jerry-core/parser/js/js-parser-statm.c b/jerry-core/parser/js/js-parser-statm.c index ccc789fc0..bd69b6966 100644 --- a/jerry-core/parser/js/js-parser-statm.c +++ b/jerry-core/parser/js/js-parser-statm.c @@ -1174,7 +1174,7 @@ parser_parse_for_statement_start (parser_context_t *context_p) /**< context */ if (context_p->token.type == LEXER_KEYW_AWAIT) { - if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape)) + if (JERRY_UNLIKELY (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD); } @@ -1188,7 +1188,7 @@ parser_parse_for_statement_start (parser_context_t *context_p) /**< context */ #if JERRY_ESNEXT if (context_p->token.type == LEXER_LITERAL && context_p->token.keyword_type == LEXER_KEYW_AWAIT - && !context_p->token.lit_location.has_escape) + && !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { parser_raise_error (context_p, PARSER_ERR_FOR_AWAIT_NO_ASYNC); } @@ -2447,8 +2447,7 @@ parser_parse_import_statement (parser_context_t *context_p) /**< parser context /* Handle ImportedDefaultBinding */ lexer_construct_literal_object (context_p, &context_p->token.lit_location, LEXER_IDENT_LITERAL); - ecma_string_t *local_name_p = ecma_new_ecma_string_from_utf8 (context_p->lit_object.literal_p->u.char_p, - context_p->lit_object.literal_p->prop.length); + ecma_string_t *local_name_p = parser_new_ecma_string_from_literal (context_p->lit_object.literal_p); if (parser_module_check_duplicate_import (context_p, local_name_p)) { @@ -2496,8 +2495,7 @@ parser_parse_import_statement (parser_context_t *context_p) /**< parser context lexer_construct_literal_object (context_p, &context_p->token.lit_location, LEXER_IDENT_LITERAL); - ecma_string_t *local_name_p = ecma_new_ecma_string_from_utf8 (context_p->lit_object.literal_p->u.char_p, - context_p->lit_object.literal_p->prop.length); + ecma_string_t *local_name_p = parser_new_ecma_string_from_literal (context_p->lit_object.literal_p); if (parser_module_check_duplicate_import (context_p, local_name_p)) { @@ -2598,8 +2596,8 @@ parser_parse_export_statement (parser_context_t *context_p) /**< context */ parser_parse_expression_statement (context_p, PARSE_EXPR_NO_COMMA | PARSE_EXPR_HAS_LITERAL); } - ecma_string_t *name_p = ecma_new_ecma_string_from_utf8 (context_p->module_identifier_lit_p->u.char_p, - context_p->module_identifier_lit_p->prop.length); + ecma_string_t *name_p = parser_new_ecma_string_from_literal (context_p->module_identifier_lit_p); + ecma_string_t *export_name_p = ecma_get_magic_string (LIT_MAGIC_STRING_DEFAULT); if (parser_module_check_duplicate_export (context_p, export_name_p)) @@ -2637,8 +2635,7 @@ parser_parse_export_statement (parser_context_t *context_p) /**< context */ lexer_construct_literal_object (context_p, &context_p->token.lit_location, LEXER_NEW_IDENT_LITERAL); lexer_literal_t *literal_p = PARSER_GET_LITERAL (context_p->lit_object.index); - ecma_string_t *export_name_p = ecma_new_ecma_string_from_utf8 (literal_p->u.char_p, - literal_p->prop.length); + ecma_string_t *export_name_p = parser_new_ecma_string_from_literal (literal_p); if (parser_module_check_duplicate_export (context_p, export_name_p)) { diff --git a/jerry-core/parser/js/js-parser-tagged-template-literal.c b/jerry-core/parser/js/js-parser-tagged-template-literal.c index 439818f3b..560a3e581 100644 --- a/jerry-core/parser/js/js-parser-tagged-template-literal.c +++ b/jerry-core/parser/js/js-parser-tagged-template-literal.c @@ -43,7 +43,7 @@ parser_tagged_template_literal_append_strings (parser_context_t *context_p, /**< { lexer_lit_location_t *lit_loc_p = &context_p->token.lit_location; - if (lit_loc_p->length == 0 && !lit_loc_p->has_escape) + if (lit_loc_p->length == 0 && !(lit_loc_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)) { ecma_builtin_helper_def_prop_by_index (template_obj_p, prop_idx, @@ -64,10 +64,13 @@ parser_tagged_template_literal_append_strings (parser_context_t *context_p, /**< LEXER_STRING_NO_OPTS); ecma_string_t *raw_str_p; - ecma_string_t *cooked_str_p = ecma_new_ecma_string_from_utf8 (source_p, lit_loc_p->length); + ecma_string_t *cooked_str_p = ((lit_loc_p->status_flags & LEXER_FLAG_ASCII) + ? ecma_new_ecma_string_from_ascii (source_p, lit_loc_p->length) + : ecma_new_ecma_string_from_utf8 (source_p, lit_loc_p->length)); + parser_free_allocated_buffer (context_p); - if (lit_loc_p->has_escape) + if (lit_loc_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE) { context_p->source_p = context_p->token.lit_location.char_p - 1; lexer_parse_string (context_p, LEXER_STRING_RAW); @@ -76,7 +79,10 @@ parser_tagged_template_literal_append_strings (parser_context_t *context_p, /**< local_byte_array, LEXER_STRING_RAW); - raw_str_p = ecma_new_ecma_string_from_utf8 (source_p, lit_loc_p->length); + raw_str_p = ((lit_loc_p->status_flags & LEXER_FLAG_ASCII) + ? ecma_new_ecma_string_from_ascii (source_p, lit_loc_p->length) + : ecma_new_ecma_string_from_utf8 (source_p, lit_loc_p->length)); + parser_free_allocated_buffer (context_p); } else diff --git a/jerry-core/parser/js/js-parser-util.c b/jerry-core/parser/js/js-parser-util.c index ce0819cd4..3b590f0e9 100644 --- a/jerry-core/parser/js/js-parser-util.c +++ b/jerry-core/parser/js/js-parser-util.c @@ -14,6 +14,7 @@ */ #include "js-parser-internal.h" +#include "ecma-helpers.h" #if JERRY_PARSER @@ -650,6 +651,30 @@ parser_emit_cbc_backward_branch (parser_context_t *context_p, /**< context */ #undef PARSER_CHECK_LAST_POSITION #undef PARSER_APPEND_TO_BYTE_CODE +/** + * Helper function for parser. + * + * @return a new string based on encode. + */ +ecma_string_t * +parser_new_ecma_string_from_literal (lexer_literal_t *literal_p) /**< literal */ +{ + JERRY_ASSERT (literal_p != NULL); + + ecma_string_t *new_string = NULL; + + if (literal_p->status_flags & LEXER_FLAG_ASCII) + { + new_string = ecma_new_ecma_string_from_ascii (literal_p->u.char_p, literal_p->prop.length); + } + else + { + new_string = ecma_new_ecma_string_from_utf8 (literal_p->u.char_p, literal_p->prop.length); + } + + return new_string; +} /* parser_new_ecma_string_from_literal */ + /** * Set a branch to the current byte code position */ diff --git a/jerry-core/parser/js/js-parser.c b/jerry-core/parser/js/js-parser.c index 4ba85031a..5fa044800 100644 --- a/jerry-core/parser/js/js-parser.c +++ b/jerry-core/parser/js/js-parser.c @@ -130,7 +130,8 @@ parser_compute_indicies (parser_context_t *context_p, /**< context */ if (char_p != NULL) { literal_p->u.value = ecma_find_or_create_literal_string (char_p, - literal_p->prop.length); + literal_p->prop.length, + (literal_p->status_flags & LEXER_FLAG_ASCII) != 0); if (!(literal_p->status_flags & LEXER_FLAG_SOURCE_PTR)) { @@ -1274,7 +1275,8 @@ parser_post_processing (parser_context_t *context_p) /**< context */ uint32_t source_data = literal_p->u.source_data; const uint8_t *char_p = context_p->source_end_p - (source_data & 0xfffff); ecma_value_t lit_value = ecma_find_or_create_literal_string (char_p, - source_data >> 20); + source_data >> 20, + (literal_p->status_flags & LEXER_FLAG_ASCII) != 0); literal_pool_p[literal_p->prop.index] = lit_value; } } @@ -2896,7 +2898,9 @@ parser_compiled_code_set_function_name (parser_context_t *context_p, /**< contex memcpy (name_buffer_p + 4, name_lit_p->u.char_p, name_lit_p->prop.length); } - *func_name_start_p = ecma_find_or_create_literal_string (name_buffer_p, name_length); + *func_name_start_p = ecma_find_or_create_literal_string (name_buffer_p, + name_length, + (status_flags & LEXER_FLAG_ASCII) != 0); if (name_buffer_p != name_lit_p->u.char_p) { diff --git a/jerry-core/parser/js/js-scanner-util.c b/jerry-core/parser/js/js-scanner-util.c index dd1b5e61e..b3df249f5 100644 --- a/jerry-core/parser/js/js-scanner-util.c +++ b/jerry-core/parser/js/js-scanner-util.c @@ -406,9 +406,9 @@ scanner_scope_find_lexical_declaration (parser_context_t *context_p, /**< contex return false; } - if (JERRY_LIKELY (!literal_p->has_escape)) + if (JERRY_LIKELY (!(literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { - name_p = ecma_new_ecma_string_from_utf8 (literal_p->char_p, literal_p->length); + name_p = parser_new_ecma_string_from_literal ((lexer_literal_t *) literal_p); } else { @@ -416,7 +416,8 @@ scanner_scope_find_lexical_declaration (parser_context_t *context_p, /**< contex lexer_convert_ident_to_cesu8 (destination_p, literal_p->char_p, literal_p->length); - name_p = ecma_new_ecma_string_from_utf8 (destination_p, literal_p->length); + name_p = parser_new_ecma_string_from_literal ((lexer_literal_t *) literal_p); + scanner_free (destination_p, literal_p->length); } @@ -1114,7 +1115,7 @@ scanner_pop_literal_pool (parser_context_t *context_p, /**< context */ } #endif /* JERRY_ESNEXT */ - if (literal_p->has_escape) + if (literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE) { type |= SCANNER_STREAM_HAS_ESCAPE; } @@ -1384,13 +1385,13 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */ const uint8_t *char_p = literal_location_p->char_p; prop_length_t length = literal_location_p->length; - if (JERRY_LIKELY (!literal_location_p->has_escape)) + if (JERRY_LIKELY (!(literal_location_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL) { if (literal_p->length == length) { - if (JERRY_LIKELY (!literal_p->has_escape)) + if (JERRY_LIKELY (!(literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { if (memcmp (literal_p->char_p, char_p, length) == 0) { @@ -1401,7 +1402,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */ { /* The non-escaped version is preferred. */ literal_p->char_p = char_p; - literal_p->has_escape = 0; + literal_p->status_flags = LEXER_LIT_LOCATION_NO_OPTS; return literal_p; } } @@ -1494,13 +1495,13 @@ scanner_append_argument (parser_context_t *context_p, /**< context */ JERRY_ASSERT (SCANNER_LITERAL_POOL_MAY_HAVE_ARGUMENTS (literal_pool_p->status_flags)); - if (JERRY_LIKELY (!context_p->token.lit_location.has_escape)) + if (JERRY_LIKELY (!(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL) { if (literal_p->length == length) { - if (JERRY_LIKELY (!literal_p->has_escape)) + if (JERRY_LIKELY (!(literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { if (memcmp (literal_p->char_p, char_p, length) == 0) { @@ -1603,7 +1604,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */ parser_list_iterator_init (&literal_pool_p->literal_pool, &literal_iterator); lexer_lit_location_t *literal_p; - if (JERRY_LIKELY (!context_p->token.lit_location.has_escape)) + if (JERRY_LIKELY (!(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL) { @@ -1614,7 +1615,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */ && (literal_p->type & SCANNER_LITERAL_IS_LOCAL) != SCANNER_LITERAL_IS_LOCAL && literal_p->length == length) { - if (JERRY_LIKELY (!literal_p->has_escape)) + if (JERRY_LIKELY (!(literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))) { if (memcmp (literal_p->char_p, char_p, length) == 0) { @@ -1862,7 +1863,7 @@ scanner_append_hole (parser_context_t *context_p, scanner_context_t *scanner_con literal_p->char_p = NULL; literal_p->length = 0; literal_p->type = SCANNER_LITERAL_IS_ARG; - literal_p->has_escape = 0; + literal_p->status_flags = LEXER_LIT_LOCATION_NO_OPTS; } /* scanner_append_hole */ #endif /* JERRY_ESNEXT */ @@ -2203,7 +2204,7 @@ scanner_try_scan_new_target (parser_context_t *context_p) /**< parser/scanner co */ const lexer_lit_location_t lexer_arguments_literal = { - (const uint8_t *) "arguments", 9, LEXER_IDENT_LITERAL, false + (const uint8_t *) "arguments", 9, LEXER_IDENT_LITERAL, LEXER_LIT_LOCATION_IS_ASCII }; /** @@ -2279,7 +2280,8 @@ scanner_check_variables (parser_context_t *context_p) /**< context */ literal.length = data_p[1]; literal.type = LEXER_IDENT_LITERAL; - literal.has_escape = (data_p[0] & SCANNER_STREAM_HAS_ESCAPE) ? 1 : 0; + literal.status_flags = ((data_p[0] & SCANNER_STREAM_HAS_ESCAPE) ? LEXER_LIT_LOCATION_HAS_ESCAPE + : LEXER_LIT_LOCATION_NO_OPTS); lexer_construct_literal_object (context_p, &literal, LEXER_NEW_IDENT_LITERAL); literal.char_p += data_p[1]; @@ -2524,7 +2526,8 @@ scanner_create_variables (parser_context_t *context_p, /**< context */ { literal.length = data_p[1]; literal.type = LEXER_IDENT_LITERAL; - literal.has_escape = (data_p[0] & SCANNER_STREAM_HAS_ESCAPE) ? 1 : 0; + literal.status_flags = ((data_p[0] & SCANNER_STREAM_HAS_ESCAPE) ? LEXER_LIT_LOCATION_HAS_ESCAPE + : LEXER_LIT_LOCATION_NO_OPTS); /* Literal must be exists. */ lexer_construct_literal_object (context_p, &literal, LEXER_IDENT_LITERAL); @@ -2549,7 +2552,8 @@ scanner_create_variables (parser_context_t *context_p, /**< context */ literal.length = data_p[1]; literal.type = LEXER_IDENT_LITERAL; - literal.has_escape = (data_p[0] & SCANNER_STREAM_HAS_ESCAPE) ? 1 : 0; + literal.status_flags = ((data_p[0] & SCANNER_STREAM_HAS_ESCAPE) ? LEXER_LIT_LOCATION_HAS_ESCAPE + : LEXER_LIT_LOCATION_NO_OPTS); lexer_construct_literal_object (context_p, &literal, LEXER_NEW_IDENT_LITERAL); literal.char_p += data_p[1]; diff --git a/tests/unit-core/test-literal-storage.c b/tests/unit-core/test-literal-storage.c index c471c03f6..2d3ff0b34 100644 --- a/tests/unit-core/test-literal-storage.c +++ b/tests/unit-core/test-literal-storage.c @@ -78,7 +78,7 @@ main (void) { lengths[j] = (lit_utf8_size_t) (rand () % max_characters_in_string + 1); generate_string (strings[j], lengths[j]); - ecma_find_or_create_literal_string (strings[j], lengths[j]); + ecma_find_or_create_literal_string (strings[j], lengths[j], false); strings[j][lengths[j]] = '\0'; ptrs[j] = strings[j]; TEST_ASSERT (ptrs[j]); @@ -89,7 +89,7 @@ main (void) ptrs[j] = lit_get_magic_string_utf8 (msi); TEST_ASSERT (ptrs[j]); lengths[j] = (lit_utf8_size_t) lit_zt_utf8_string_size (ptrs[j]); - ecma_find_or_create_literal_string (ptrs[j], lengths[j]); + ecma_find_or_create_literal_string (ptrs[j], lengths[j], false); } else { @@ -100,7 +100,7 @@ main (void) } /* Add empty string. */ - ecma_find_or_create_literal_string (NULL, 0); + ecma_find_or_create_literal_string (NULL, 0, false); for (uint32_t j = 0; j < test_sub_iters; j++) { @@ -108,8 +108,8 @@ main (void) ecma_value_t lit2; if (ptrs[j]) { - lit1 = ecma_find_or_create_literal_string (ptrs[j], lengths[j]); - lit2 = ecma_find_or_create_literal_string (ptrs[j], lengths[j]); + lit1 = ecma_find_or_create_literal_string (ptrs[j], lengths[j], false); + lit2 = ecma_find_or_create_literal_string (ptrs[j], lengths[j], false); TEST_ASSERT (ecma_is_value_string (lit1)); TEST_ASSERT (ecma_is_value_string (lit2)); TEST_ASSERT (lit1 == lit2); @@ -125,7 +125,7 @@ main (void) } /* Check empty string exists. */ - TEST_ASSERT (ecma_find_or_create_literal_string (NULL, 0) != JMEM_CP_NULL); + TEST_ASSERT (ecma_find_or_create_literal_string (NULL, 0, false) != JMEM_CP_NULL); } ecma_finalize_lit_storage (); diff --git a/tests/unit-core/test-stringbuilder.c b/tests/unit-core/test-stringbuilder.c index 7aa4eab30..99a1defb3 100644 --- a/tests/unit-core/test-stringbuilder.c +++ b/tests/unit-core/test-stringbuilder.c @@ -35,7 +35,7 @@ main (void) ecma_stringbuilder_append_raw (&builder, string_data, sizeof (string_data) - 1); ecma_string_t *result_p = ecma_stringbuilder_finalize (&builder); - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (string_data, sizeof (string_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (string_data, sizeof (string_data) - 1); TEST_ASSERT (ecma_compare_ecma_strings (result_p, str_p)); ecma_deref_ecma_string (result_p); ecma_deref_ecma_string (str_p); @@ -57,7 +57,7 @@ main (void) ecma_stringbuilder_append_char (&builder, LIT_CHAR_LOWERCASE_A); ecma_string_t *result_p = ecma_stringbuilder_finalize (&builder); - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (string_data, sizeof (string_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (string_data, sizeof (string_data) - 1); TEST_ASSERT (ecma_compare_ecma_strings (result_p, str_p)); ecma_deref_ecma_string (result_p); ecma_deref_ecma_string (str_p); @@ -65,7 +65,7 @@ main (void) { static const lit_utf8_byte_t string_data[] = "A simple string"; - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (string_data, sizeof (string_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (string_data, sizeof (string_data) - 1); ecma_stringbuilder_t builder = ecma_stringbuilder_create (); ecma_stringbuilder_append (&builder, str_p); @@ -94,7 +94,7 @@ main (void) ecma_stringbuilder_append_char (&builder, LIT_CHAR_LOWERCASE_C); ecma_string_t *result_p = ecma_stringbuilder_finalize (&builder); - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (string_data, sizeof (string_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (string_data, sizeof (string_data) - 1); TEST_ASSERT (ecma_compare_ecma_strings (result_p, str_p)); ecma_deref_ecma_string (result_p); ecma_deref_ecma_string (str_p); @@ -125,7 +125,7 @@ main (void) ecma_string_t *result_p = ecma_stringbuilder_finalize (&builder); static const lit_utf8_byte_t expected_data[] = "1abc234string"; - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (expected_data, sizeof (expected_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (expected_data, sizeof (expected_data) - 1); TEST_ASSERT (ecma_compare_ecma_strings (result_p, str_p)); ecma_deref_ecma_string (result_p); ecma_deref_ecma_string (str_p); @@ -177,14 +177,14 @@ main (void) ecma_stringbuilder_append_char (&builder, LIT_CHAR_1); ecma_stringbuilder_append_raw (&builder, string_data, sizeof (string_data) - 1); - ecma_string_t *another_string = ecma_new_ecma_string_from_utf8 (string_data, sizeof (string_data) - 1); + ecma_string_t *another_string = ecma_new_ecma_string_from_ascii (string_data, sizeof (string_data) - 1); ecma_stringbuilder_append (&builder, uint_str_p); ecma_stringbuilder_append_magic (&builder, LIT_MAGIC_STRING_STRING); ecma_string_t *result_p = ecma_stringbuilder_finalize (&builder); static const lit_utf8_byte_t expected_data[] = "1abc234string"; - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (expected_data, sizeof (expected_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (expected_data, sizeof (expected_data) - 1); TEST_ASSERT (ecma_compare_ecma_strings (result_p, str_p)); ecma_deref_ecma_string (result_p); ecma_deref_ecma_string (str_p); @@ -201,7 +201,7 @@ main (void) ecma_string_t *result_p = ecma_stringbuilder_finalize (&builder); static const lit_utf8_byte_t expected_data[] = "234abcstring"; - ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (expected_data, sizeof (expected_data) - 1); + ecma_string_t *str_p = ecma_new_ecma_string_from_ascii (expected_data, sizeof (expected_data) - 1); TEST_ASSERT (ecma_compare_ecma_strings (result_p, str_p)); ecma_deref_ecma_string (result_p); ecma_deref_ecma_string (str_p);