diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.c b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.c index 4dc768072..b6c25f9c9 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.c @@ -27,6 +27,7 @@ #include "ecma-objects.h" #include "ecma-try-catch-macro.h" #include "lit-magic-strings.h" +#include "lit-char-helpers.h" /** \addtogroup ecma ECMA * @{ @@ -839,6 +840,185 @@ ecma_builtin_helper_def_prop (ecma_object_t *obj_p, /**< object */ &prop_desc); } /* ecma_builtin_helper_def_prop */ +/** + * GetSubstitution abstract operation + * + * See: + * ECMA-262 v6.0 21.1.3.14.1 + */ +void +ecma_builtin_replace_substitute (ecma_replace_context_t *ctx_p) /**< replace context */ +{ + JERRY_ASSERT (ctx_p->string_p != NULL); + JERRY_ASSERT (ctx_p->matched_p == NULL + || (ctx_p->matched_p >= ctx_p->string_p + && ctx_p->matched_p <= ctx_p->string_p + ctx_p->string_size)); + + lit_utf8_size_t replace_size; + uint8_t replace_flags = ECMA_STRING_FLAG_IS_ASCII; + const lit_utf8_byte_t *replace_buf_p = ecma_string_get_chars (ctx_p->replace_str_p, + &replace_size, + NULL, + NULL, + &replace_flags); + + const lit_utf8_byte_t *const replace_end_p = replace_buf_p + replace_size; + const lit_utf8_byte_t *curr_p = replace_buf_p; + const lit_utf8_byte_t *last_inserted_end_p = replace_buf_p; + + while (curr_p < replace_end_p) + { + if (*curr_p++ == LIT_CHAR_DOLLAR_SIGN) + { + ecma_stringbuilder_append_raw (&(ctx_p->builder), + last_inserted_end_p, + (lit_utf8_size_t) (curr_p - last_inserted_end_p - 1)); + if (curr_p >= replace_end_p) + { + last_inserted_end_p = curr_p - 1; + break; + } + + const lit_utf8_byte_t c = *curr_p++; + + switch (c) + { + case LIT_CHAR_DOLLAR_SIGN: + { + ecma_stringbuilder_append_byte (&(ctx_p->builder), LIT_CHAR_DOLLAR_SIGN); + break; + } + case LIT_CHAR_AMPERSAND: + { +#if ENABLED (JERRY_ES2015) + if (JERRY_UNLIKELY (ctx_p->matched_p == NULL)) + { + JERRY_ASSERT (ctx_p->capture_count == 0); + JERRY_ASSERT (ctx_p->u.collection_p != NULL); + JERRY_ASSERT (ctx_p->u.collection_p->item_count > 0); + const ecma_value_t match_value = ctx_p->u.collection_p->buffer_p[0]; + + JERRY_ASSERT (ecma_is_value_string (match_value)); + ecma_stringbuilder_append (&(ctx_p->builder), ecma_get_string_from_value (match_value)); + break; + } +#endif /* ENABLED (JERRY_ES2015) */ + + JERRY_ASSERT (ctx_p->matched_p != NULL); + ecma_stringbuilder_append_raw (&(ctx_p->builder), ctx_p->matched_p, ctx_p->matched_size); + break; + } + case LIT_CHAR_GRAVE_ACCENT: + { + ecma_stringbuilder_append_raw (&(ctx_p->builder), ctx_p->string_p, ctx_p->match_byte_pos); + break; + } + case LIT_CHAR_SINGLE_QUOTE: + { +#if ENABLED (JERRY_ES2015) + if (JERRY_UNLIKELY (ctx_p->matched_p == NULL)) + { + JERRY_ASSERT (ctx_p->capture_count == 0); + JERRY_ASSERT (ctx_p->u.collection_p != NULL); + JERRY_ASSERT (ctx_p->u.collection_p->item_count > 0); + const ecma_value_t match_value = ctx_p->u.collection_p->buffer_p[0]; + + JERRY_ASSERT (ecma_is_value_string (match_value)); + const ecma_string_t *const matched_p = ecma_get_string_from_value (match_value); + const lit_utf8_size_t match_size = ecma_string_get_size (matched_p); + const lit_utf8_byte_t *const begin_p = ctx_p->string_p + ctx_p->match_byte_pos + match_size; + + ecma_stringbuilder_append_raw (&(ctx_p->builder), + begin_p, + (lit_utf8_size_t) (ctx_p->string_p + ctx_p->string_size - begin_p)); + break; + } +#endif /* ENABLED (JERRY_ES2015) */ + + JERRY_ASSERT (ctx_p->matched_p != NULL); + ecma_stringbuilder_append_raw (&(ctx_p->builder), + ctx_p->matched_p + ctx_p->matched_size, + ctx_p->string_size - ctx_p->match_byte_pos - ctx_p->matched_size); + break; + } + default: + { + const lit_utf8_byte_t *const number_begin_p = curr_p - 1; + + if (lit_char_is_decimal_digit (c)) + { + uint32_t capture_count = ctx_p->capture_count; +#if ENABLED (JERRY_ES2015) + if (capture_count == 0 && ctx_p->u.collection_p != NULL) + { + capture_count = ctx_p->u.collection_p->item_count; + } +#endif /* ENABLED (JERRY_ES2015) */ + + uint8_t idx = (uint8_t) (c - LIT_CHAR_0); + if (curr_p < replace_end_p && lit_char_is_decimal_digit (*(curr_p))) + { + uint8_t two_digit_index = (uint8_t) (idx * 10 + (uint8_t) (*(curr_p) - LIT_CHAR_0)); + if (two_digit_index < capture_count) + { + idx = two_digit_index; + curr_p++; + } + } + + if (idx > 0 && idx < capture_count) + { + if (ctx_p->capture_count > 0) + { +#if ENABLED (JERRY_BUILTIN_REGEXP) + JERRY_ASSERT (ctx_p->u.captures_p != NULL); + const ecma_regexp_capture_t *const capture_p = ctx_p->u.captures_p + idx; + + if (ECMA_RE_IS_CAPTURE_DEFINED (capture_p)) + { + ecma_stringbuilder_append_raw (&(ctx_p->builder), + capture_p->begin_p, + (lit_utf8_size_t) (capture_p->end_p - capture_p->begin_p)); + } + + break; +#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ + } +#if ENABLED (JERRY_ES2015) + else if (ctx_p->u.collection_p != NULL) + { + const ecma_value_t capture_value = ctx_p->u.collection_p->buffer_p[idx]; + if (!ecma_is_value_undefined (capture_value)) + { + ecma_stringbuilder_append (&(ctx_p->builder), ecma_get_string_from_value (capture_value)); + } + + break; + } +#endif /* ENABLED (JERRY_ES2015) */ + } + } + + ecma_stringbuilder_append_byte (&(ctx_p->builder), LIT_CHAR_DOLLAR_SIGN); + curr_p = number_begin_p; + break; + } + } + + last_inserted_end_p = curr_p; + } + } + + ecma_stringbuilder_append_raw (&(ctx_p->builder), + last_inserted_end_p, + (lit_utf8_size_t) (replace_end_p - last_inserted_end_p)); + + if (replace_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) replace_buf_p, replace_size); + } +} /* ecma_builtin_replace_substitute */ + /** * @} * @} diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h index 5e8327396..10c7ab960 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h @@ -18,6 +18,8 @@ #include "ecma-globals.h" #include "ecma-exceptions.h" +#include "ecma-helpers.h" +#include "ecma-regexp-object.h" /** \addtogroup ecma ECMA * @{ @@ -63,6 +65,41 @@ ecma_builtin_helper_def_prop (ecma_object_t *obj_p, ecma_string_t *index_p, ecma ecma_value_t ecma_builtin_helper_def_prop_by_index (ecma_object_t *obj_p, uint32_t index, ecma_value_t value, uint32_t opts); +/** + * Context for replace substitutions + */ +typedef struct +{ + ecma_stringbuilder_t builder; /**< result string builder */ + const lit_utf8_byte_t *string_p; /**< source string */ + lit_utf8_size_t string_size; /**< source string size */ + const lit_utf8_byte_t *matched_p; /**< matched string */ + lit_utf8_size_t matched_size; /**< matcehd string size */ + lit_utf8_size_t match_byte_pos; /**< byte position of the match in the source string */ + + /** + * Capture results + */ + union + { +#if ENABLED (JERRY_BUILTIN_REGEXP) + const ecma_regexp_capture_t *captures_p; /**< array of regexp capturing groups */ +#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ + const ecma_collection_t *collection_p; /**< collection of captured substrings */ + } u; + + uint32_t capture_count; /**< number of captures in the capturing group array */ + ecma_string_t *replace_str_p; /**< replacement string */ +} ecma_replace_context_t; + +void +ecma_builtin_replace_substitute (ecma_replace_context_t *ctx_p); + +#if ENABLED (JERRY_ES2015) +bool +ecma_builtin_is_regexp_exec (ecma_extended_object_t *obj_p); +#endif /* ENABLED (JERRY_ES2015) */ + #if ENABLED (JERRY_BUILTIN_DATE) /** diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c index d5ca38fdb..eb6c8b07c 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c @@ -16,11 +16,15 @@ #include "ecma-alloc.h" #include "ecma-array-object.h" #include "ecma-builtins.h" +#include "ecma-builtin-helpers.h" #include "ecma-conversion.h" #include "ecma-exceptions.h" +#include "ecma-function-object.h" #include "ecma-globals.h" +#include "ecma-gc.h" #include "ecma-helpers.h" #include "ecma-objects.h" +#include "ecma-regexp-object.h" #include "ecma-try-catch-macro.h" #include "lit-char-helpers.h" @@ -469,6 +473,38 @@ ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argume return ecma_make_string_value (ecma_stringbuilder_finalize (&result)); } /* ecma_builtin_regexp_prototype_to_string */ +#if ENABLED (JERRY_ES2015) +/** + * Helper function to determine if method is the builtin exec method + * + * @return true, if function is the builtin exec method + * false, otherwise + */ +inline bool JERRY_ATTR_ALWAYS_INLINE +ecma_builtin_is_regexp_exec (ecma_extended_object_t *obj_p) +{ + return (ecma_get_object_is_builtin ((ecma_object_t *) obj_p) + && obj_p->u.built_in.routine_id == ECMA_ROUTINE_LIT_MAGIC_STRING_EXECecma_builtin_regexp_prototype_exec); +} /* ecma_builtin_is_regexp_exec */ + +/** + * The RegExp.prototype object's '@@replace' routine + * + * See also: + * ECMA-262 v6.0, 21.2.5.8 + * + * @return ecma value + * Returned value must be freed with ecma_free_value. + */ +static ecma_value_t +ecma_builtin_regexp_prototype_symbol_replace (ecma_value_t this_arg, /**< this argument */ + ecma_value_t string_arg, /**< source string */ + ecma_value_t replace_arg) /**< replace string */ +{ + return ecma_regexp_replace_helper (this_arg, string_arg, replace_arg); +} /* ecma_builtin_regexp_prototype_symbol_replace */ +#endif /* ENABLED (JERRY_ES2015) */ + /** * @} * @} diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h index 33a1375bf..05f822d96 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h @@ -46,6 +46,8 @@ ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_IGNORECASE_UL, ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_MULTILINE, ecma_builtin_regexp_prototype_get_multiline, ECMA_PROPERTY_FIXED) + +ROUTINE (LIT_GLOBAL_SYMBOL_REPLACE, ecma_builtin_regexp_prototype_symbol_replace, 2, 2) #else /* !ENABLED (JERRY_ES2015) */ /* ECMA-262 v5, 15.10.7.1 */ STRING_VALUE (LIT_MAGIC_STRING_SOURCE, diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c index cfa6df617..403593e84 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c @@ -492,653 +492,196 @@ cleanup: return ret_value; } /* ecma_builtin_string_prototype_object_match */ -/** - * This structure is the context which represents - * the state of the ongoing string replace. - */ -typedef struct -{ - /* General part. */ - bool is_regexp; /**< whether we search a regexp or string */ - bool is_global; /**< global search or not */ - bool is_replace_callable; /**< replace part is callable or not */ - ecma_value_t input_string; /**< input string */ - ecma_length_t input_length; /**< input string length */ - ecma_value_t regexp_or_search_string; /**< regular expression or search string - * depending on the value of is_regexp */ - ecma_length_t match_start; /**< starting position of the match */ - ecma_length_t match_end; /**< end position of the match */ - - /* Replace value callable part. */ - ecma_object_t *replace_function_p; /**< replace function */ - - /* Replace value string part. */ - ecma_string_t *replace_string_p; /**< replace string */ - lit_utf8_byte_t *replace_str_curr_p; /**< replace string iterator */ -} ecma_builtin_replace_search_ctx_t; - -/** - * Generic helper function to append a substring at the end of a base string - * - * @return the constructed string - */ -static ecma_string_t * -ecma_builtin_string_prototype_object_replace_append_substr (ecma_string_t *base_string_p, /**< base string */ - ecma_string_t *appended_string_p, /**< appended string */ - ecma_length_t start, /**< start position */ - ecma_length_t end) /**< end position */ -{ - JERRY_ASSERT (start <= end); - JERRY_ASSERT (end <= ecma_string_get_length (appended_string_p)); - - if (start < end) - { - ecma_string_t *substring_p = ecma_string_substr (appended_string_p, start, end); - - base_string_p = ecma_concat_ecma_strings (base_string_p, substring_p); - - ecma_deref_ecma_string (substring_p); - } - - return base_string_p; -} /* ecma_builtin_string_prototype_object_replace_append_substr */ - -/** - * Generic helper function to perform the find the next match - * - * @return ecma value - * Returned value must be freed with ecma_free_value. - */ -static ecma_value_t -ecma_builtin_string_prototype_object_replace_match (ecma_builtin_replace_search_ctx_t *context_p) /**< search - * context */ -{ - context_p->match_start = 0; - context_p->match_end = 0; - - if (context_p->is_regexp) - { - ecma_value_t match_value = ecma_regexp_exec_helper (context_p->regexp_or_search_string, - context_p->input_string, - false); - - if (ECMA_IS_VALUE_ERROR (match_value)) - { - return match_value; - } - - if (ecma_is_value_null (match_value)) - { - return match_value; - } - - JERRY_ASSERT (ecma_is_value_object (match_value)); - - ecma_object_t *match_object_p = ecma_get_object_from_value (match_value); - - ecma_value_t index_value = ecma_op_object_get_by_magic_id (match_object_p, LIT_MAGIC_STRING_INDEX); - - if (ECMA_IS_VALUE_ERROR (index_value)) - { - ecma_deref_object (match_object_p); - return index_value; - } - - JERRY_ASSERT (ecma_is_value_number (index_value)); - ecma_value_t result_string_value = ecma_op_object_get_by_uint32_index (match_object_p, 0); - - if (ECMA_IS_VALUE_ERROR (result_string_value)) - { - ecma_free_number (index_value); - ecma_deref_object (match_object_p); - return result_string_value; - } - - /* We directly call the built-in exec, so - * we can trust in the returned value. */ - - JERRY_ASSERT (ecma_is_value_string (result_string_value)); - - /* We use the length of the result string to determine the - * match end. This works regardless the global flag is set. */ - ecma_string_t *result_string_p = ecma_get_string_from_value (result_string_value); - ecma_number_t index_number = ecma_get_number_from_value (index_value); - - context_p->match_start = (ecma_length_t) (index_number); - context_p->match_end = context_p->match_start + (ecma_length_t) ecma_string_get_length (result_string_p); - - JERRY_ASSERT ((ecma_length_t) ecma_number_to_uint32 (index_number) == context_p->match_start); - - ecma_deref_ecma_string (result_string_p); - ecma_free_number (index_value); - - return match_value; - } - - JERRY_ASSERT (!context_p->is_global); - - ecma_string_t *search_string_p = ecma_get_string_from_value (context_p->regexp_or_search_string); - ecma_string_t *input_string_p = ecma_get_string_from_value (context_p->input_string); - - ecma_length_t index_of = 0; - if (ecma_builtin_helper_string_find_index (input_string_p, search_string_p, true, 0, &index_of)) - { - ecma_value_t arguments_list_p[1] = { context_p->regexp_or_search_string }; - - context_p->match_start = index_of; - context_p->match_end = index_of + ecma_string_get_length (search_string_p); - - return ecma_op_create_array_object (arguments_list_p, 1, false); - } - - return ECMA_VALUE_NULL; -} /* ecma_builtin_string_prototype_object_replace_match */ - -/** - * Generic helper function to construct the string which replaces the matched part - * - * @return ecma value - * Returned value must be freed with ecma_free_value. - */ -static ecma_value_t -ecma_builtin_string_prototype_object_replace_get_string (ecma_builtin_replace_search_ctx_t *context_p, /**< search - * context */ - ecma_value_t match_value) /**< returned match value */ -{ - ecma_object_t *match_object_p = ecma_get_object_from_value (match_value); - JERRY_ASSERT (ecma_get_object_type (match_object_p) == ECMA_OBJECT_TYPE_ARRAY); - - ecma_length_t match_length = ecma_array_get_length (match_object_p); - - JERRY_ASSERT (match_length >= 1); - - if (context_p->is_replace_callable) - { - ecma_value_t ret_value = ECMA_VALUE_ERROR; - JMEM_DEFINE_LOCAL_ARRAY (arguments_list, - match_length + 2, - ecma_value_t); - - /* An error might occure during the array copy and - * uninitalized elements must not be freed. */ - ecma_length_t values_copied = 0; - - for (ecma_length_t i = 0; i < match_length ;i++) - { - ecma_value_t current_value = ecma_op_object_get_by_uint32_index (match_object_p, i); - - if (ECMA_IS_VALUE_ERROR (current_value)) - { - goto cleanup; - } - - arguments_list[i] = current_value; - values_copied++; - } - - arguments_list[match_length] = ecma_make_uint32_value (context_p->match_start); - arguments_list[match_length + 1] = ecma_copy_value (context_p->input_string); - - ecma_value_t result_value = ecma_op_function_call (context_p->replace_function_p, - ECMA_VALUE_UNDEFINED, - arguments_list, - match_length + 2); - - ecma_free_value (arguments_list[match_length]); - ecma_free_value (arguments_list[match_length + 1]); - - if (ECMA_IS_VALUE_ERROR (result_value)) - { - goto cleanup; - } - - ecma_string_t *to_string_p = ecma_op_to_string (result_value); - ecma_free_value (result_value); - - if (JERRY_LIKELY (to_string_p != NULL)) - { - ret_value = ecma_make_string_value (to_string_p); - } - -cleanup: - for (ecma_length_t i = 0; i < values_copied; i++) - { - ecma_free_value (arguments_list[i]); - } - - JMEM_FINALIZE_LOCAL_ARRAY (arguments_list); - - return ret_value; - } - - /* Although the ECMA standard does not specify how $nn (n is a decimal - * number) captures should be replaced if nn is greater than the maximum - * capture index, we follow the test-262 expected behaviour: - * - * if maximum capture index is < 10 - * we replace only those $n and $0n captures, where n < maximum capture index - * otherwise - * we replace only those $nn captures, where nn < maximum capture index - * - * other $n $nn sequences left unchanged - * - * example: "".replace(/(x)y/, "$1,$2,$01,$12") === "" - */ - - ecma_string_t *result_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY); - - ecma_length_t previous_start = 0; - ecma_length_t current_position = 0; - - lit_utf8_byte_t *replace_str_curr_p = context_p->replace_str_curr_p; - lit_utf8_byte_t *replace_str_end_p = replace_str_curr_p + ecma_string_get_size (context_p->replace_string_p); - - while (replace_str_curr_p < replace_str_end_p) - { - ecma_char_t action = LIT_CHAR_NULL; - - if (*replace_str_curr_p != LIT_CHAR_DOLLAR_SIGN) - { - /* if not a continuation byte */ - if ((*replace_str_curr_p & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER) - { - current_position++; - } - replace_str_curr_p++; - continue; - } - - replace_str_curr_p++; - - if (replace_str_curr_p < replace_str_end_p) - { - action = *replace_str_curr_p; - - if (action == LIT_CHAR_DOLLAR_SIGN) - { - current_position++; - } - else if (action >= LIT_CHAR_0 && action <= LIT_CHAR_9) - { - uint32_t index = 0; - - index = (uint32_t) (action - LIT_CHAR_0); - - if (index >= match_length) - { - action = LIT_CHAR_NULL; - } - else if (index == 0 || match_length > 10) - { - replace_str_curr_p++; - - if (replace_str_curr_p < replace_str_end_p) - { - ecma_char_t next_character = *replace_str_curr_p; - - if (next_character >= LIT_CHAR_0 && next_character <= LIT_CHAR_9) - { - uint32_t full_index = index * 10 + (uint32_t) (next_character - LIT_CHAR_0); - if (full_index > 0 && full_index < match_length) - { - index = match_length; - } - } - } - - replace_str_curr_p--; - - if (index == 0) - { - action = LIT_CHAR_NULL; - } - } - } - else if (action != LIT_CHAR_AMPERSAND - && action != LIT_CHAR_GRAVE_ACCENT - && action != LIT_CHAR_SINGLE_QUOTE) - { - action = LIT_CHAR_NULL; - } - } - - if (action != LIT_CHAR_NULL) - { - result_string_p = ecma_builtin_string_prototype_object_replace_append_substr (result_string_p, - context_p->replace_string_p, - previous_start, - current_position); - replace_str_curr_p++; - current_position++; - - if (action == LIT_CHAR_DOLLAR_SIGN) - { - current_position--; - } - else if (action == LIT_CHAR_GRAVE_ACCENT) - { - ecma_string_t *input_string_p = ecma_get_string_from_value (context_p->input_string); - result_string_p = ecma_builtin_string_prototype_object_replace_append_substr (result_string_p, - input_string_p, - 0, - context_p->match_start); - } - else if (action == LIT_CHAR_SINGLE_QUOTE) - { - ecma_string_t *input_string_p = ecma_get_string_from_value (context_p->input_string); - result_string_p = ecma_builtin_string_prototype_object_replace_append_substr (result_string_p, - input_string_p, - context_p->match_end, - context_p->input_length); - } - else - { - /* Everything else is submatch reading. */ - uint32_t index = 0; - - JERRY_ASSERT (action == LIT_CHAR_AMPERSAND || (action >= LIT_CHAR_0 && action <= LIT_CHAR_9)); - - if (action >= LIT_CHAR_0 && action <= LIT_CHAR_9) - { - index = (uint32_t) (action - LIT_CHAR_0); - - if ((match_length > 10 || index == 0) - && replace_str_curr_p < replace_str_end_p) - { - action = *replace_str_curr_p; - if (action >= LIT_CHAR_0 && action <= LIT_CHAR_9) - { - uint32_t full_index = index * 10 + (uint32_t) (action - LIT_CHAR_0); - if (full_index < match_length) - { - index = full_index; - replace_str_curr_p++; - current_position++; - } - } - } - JERRY_ASSERT (index > 0 && index < match_length); - } - - ecma_value_t submatch_value = ecma_op_object_get_by_uint32_index (match_object_p, index); - - if (ECMA_IS_VALUE_ERROR (submatch_value)) - { - return submatch_value; - } - - /* Undefined values are converted to empty string. */ - if (!ecma_is_value_undefined (submatch_value)) - { - JERRY_ASSERT (ecma_is_value_string (submatch_value)); - ecma_string_t *submatch_string_p = ecma_get_string_from_value (submatch_value); - - result_string_p = ecma_concat_ecma_strings (result_string_p, submatch_string_p); - ecma_free_value (submatch_value); - } - } - - current_position++; - previous_start = current_position; - } - else - { - current_position++; - } - } - - result_string_p = ecma_builtin_string_prototype_object_replace_append_substr (result_string_p, - context_p->replace_string_p, - previous_start, - current_position); - - return ecma_make_string_value (result_string_p); -} /* ecma_builtin_string_prototype_object_replace_get_string */ - -/** - * Generic helper function to do the string replace - * - * @return ecma value - * Returned value must be freed with ecma_free_value. - */ -static ecma_value_t -ecma_builtin_string_prototype_object_replace_loop (ecma_builtin_replace_search_ctx_t *context_p) /**< search - * context */ -{ - ecma_length_t previous_start = 0; - - ecma_string_t *result_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY); - ecma_string_t *input_string_p = ecma_get_string_from_value (context_p->input_string); - - while (true) - { - ecma_value_t match_value = ecma_builtin_string_prototype_object_replace_match (context_p); - - if (ECMA_IS_VALUE_ERROR (match_value)) - { - break; - } - - if (!ecma_is_value_null (match_value)) - { - result_string_p = ecma_builtin_string_prototype_object_replace_append_substr (result_string_p, - input_string_p, - previous_start, - context_p->match_start); - - ecma_value_t string_value = ecma_builtin_string_prototype_object_replace_get_string (context_p, match_value); - - if (ECMA_IS_VALUE_ERROR (string_value)) - { - ecma_free_value (match_value); - break; - } - - JERRY_ASSERT (ecma_is_value_string (string_value)); - - ecma_string_t *string_p = ecma_get_string_from_value (string_value); - - result_string_p = ecma_concat_ecma_strings (result_string_p, string_p); - - ecma_deref_ecma_string (string_p); - - previous_start = context_p->match_end; - - if (context_p->is_global - && context_p->match_start == context_p->match_end) - { - JERRY_ASSERT (context_p->is_regexp); - - if (context_p->match_end == context_p->input_length) - { - /* Aborts the match. */ - context_p->is_global = false; - } - else - { - ecma_object_t *regexp_obj_p = ecma_get_object_from_value (context_p->regexp_or_search_string); - - ecma_value_t put_value = ecma_op_object_put (regexp_obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_uint32_value (context_p->match_end + 1), - true); - - JERRY_ASSERT (ecma_is_value_boolean (put_value) - || ecma_is_value_empty (put_value) - || ECMA_IS_VALUE_ERROR (put_value)); - - if (ECMA_IS_VALUE_ERROR (put_value)) - { - ecma_free_value (match_value); - break; - } - } - } - } - - ecma_free_value (match_value); - - if (!context_p->is_global || ecma_is_value_null (match_value)) - { - /* No more matches */ - ecma_string_t *appended_string_p; - - appended_string_p = ecma_builtin_string_prototype_object_replace_append_substr (result_string_p, - input_string_p, - previous_start, - context_p->input_length); - return ecma_make_string_value (appended_string_p); - } - } - - ecma_deref_ecma_string (result_string_p); - - return ECMA_VALUE_ERROR; -} /* ecma_builtin_string_prototype_object_replace_loop */ - -/** - * Generic helper function to check whether the search value is callable. - * If it is not, the function converts the search value to string. The - * appropriate fields of the context were filled as well and the search - * loop is run afterwards. - * - * @return ecma value - * Returned value must be freed with ecma_free_value. - */ -static ecma_value_t -ecma_builtin_string_prototype_object_replace_main (ecma_builtin_replace_search_ctx_t *context_p, /**< search - * context */ - ecma_value_t replace_value) /**< replacement for a match */ -{ - if (ecma_op_is_callable (replace_value)) - { - context_p->is_replace_callable = true; - context_p->replace_function_p = ecma_get_object_from_value (replace_value); - - return ecma_builtin_string_prototype_object_replace_loop (context_p); - } - - - context_p->is_replace_callable = false; - - ecma_string_t *replace_string_p = ecma_op_to_string (replace_value); - - if (JERRY_UNLIKELY (replace_string_p == NULL)) - { - return ECMA_VALUE_ERROR; - } - - ecma_value_t ret_value; - - ECMA_STRING_TO_UTF8_STRING (replace_string_p, replace_start_p, replace_start_size); - - context_p->replace_string_p = replace_string_p; - context_p->replace_str_curr_p = (lit_utf8_byte_t *) replace_start_p; - - ret_value = ecma_builtin_string_prototype_object_replace_loop (context_p); - - ECMA_FINALIZE_UTF8_STRING (replace_start_p, replace_start_size); - ecma_deref_ecma_string (replace_string_p); - - return ret_value; -} /* ecma_builtin_string_prototype_object_replace_main */ - /** * The String.prototype object's 'replace' routine * - * The replace algorithm is splitted into several helper functions. - * - * To share data between these helper functions, we created a - * structure called ecma_builtin_replace_search_ctx_t, which - * represents the current state of the replace. - * - * The helper functions are called in the following order: - * - * 1) ecma_builtin_string_prototype_object_replace is called - * it initialise the context depending on search_value (regexp or string) - * 2) ecma_builtin_string_prototype_object_replace_main is called - * it initialise the context depending on replace_value (callable or string) - * 3) ecma_builtin_string_prototype_object_replace_loop is called - * this function has a loop which repeatedly calls - * - ecma_builtin_string_prototype_object_replace_match - * which performs a match - * - ecma_builtin_string_prototype_object_replace_get_string - * which computes the replacement string - * - * The final string is created from several string fragments appended - * together by ecma_builtin_string_prototype_object_replace_append_substr. - * * See also: * ECMA-262 v5, 15.5.4.11 + * ECMA-262 v6, 21.1.3.14 * * @return ecma value * Returned value must be freed with ecma_free_value. */ static ecma_value_t -ecma_builtin_string_prototype_object_replace (ecma_value_t to_string_value, /**< this argument */ +ecma_builtin_string_prototype_object_replace (ecma_value_t this_value, /**< this argument */ ecma_value_t search_value, /**< routine's first argument */ ecma_value_t replace_value) /**< routine's second argument */ { - ecma_builtin_replace_search_ctx_t context; +#if ENABLED (JERRY_ES2015) + if (!(ecma_is_value_undefined (search_value) || ecma_is_value_null (search_value))) + { + ecma_object_t *obj_p = ecma_get_object_from_value (ecma_op_to_object (search_value)); + ecma_value_t replace_symbol = ecma_op_object_get_by_symbol_id (obj_p, LIT_MAGIC_STRING_REPLACE); + ecma_deref_object (obj_p); + if (ECMA_IS_VALUE_ERROR (replace_symbol)) + { + return replace_symbol; + } + + if (!ecma_is_value_undefined (replace_symbol) && !ecma_is_value_null (replace_symbol)) + { + if (!ecma_op_is_callable (replace_symbol)) + { + ecma_free_value (replace_symbol); + return ecma_raise_type_error (ECMA_ERR_MSG ("@@replace is not callable")); + } + + ecma_object_t *replace_method = ecma_get_object_from_value (replace_symbol); + + ecma_value_t arguments[] = { this_value, replace_value }; + ecma_value_t replace_result = ecma_op_function_call (replace_method, search_value, arguments, 2); + + ecma_deref_object (replace_method); + return replace_result; + } + } +#else /* !ENABLED (JERRY_ES2015) */ if (ecma_is_value_object (search_value) && ecma_object_class_is (ecma_get_object_from_value (search_value), LIT_MAGIC_STRING_REGEXP_UL)) { - ecma_object_t *regexp_obj_p = ecma_get_object_from_value (search_value); + return ecma_regexp_replace_helper (search_value, this_value, replace_value); + } +#endif /* ENABLED (JERRY_ES2015) */ - ecma_value_t global_value = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_GLOBAL); + ecma_string_t *input_str_p = ecma_get_string_from_value (this_value); - if (ECMA_IS_VALUE_ERROR (global_value)) + ecma_value_t result = ECMA_VALUE_ERROR; + + ecma_string_t *search_str_p = ecma_op_to_string (search_value); + if (search_str_p == NULL) + { + return result; + } + + ecma_replace_context_t replace_ctx; + replace_ctx.capture_count = 0; + replace_ctx.u.captures_p = NULL; + + replace_ctx.replace_str_p = NULL; + if (!ecma_op_is_callable (replace_value)) + { + replace_ctx.replace_str_p = ecma_op_to_string (replace_value); + if (replace_ctx.replace_str_p == NULL) { - return global_value; + goto cleanup_search; } + } - JERRY_ASSERT (ecma_is_value_boolean (global_value)); + uint8_t input_flags = ECMA_STRING_FLAG_IS_ASCII; + replace_ctx.string_p = ecma_string_get_chars (input_str_p, + &(replace_ctx.string_size), + NULL, + NULL, + &input_flags); - context.is_regexp = true; - context.is_global = ecma_is_value_true (global_value); - context.input_string = to_string_value; - context.input_length = ecma_string_get_length (ecma_get_string_from_value (to_string_value)); - context.regexp_or_search_string = search_value; + lit_utf8_size_t search_size; + uint8_t search_flags = ECMA_STRING_FLAG_IS_ASCII; + const lit_utf8_byte_t *search_buf_p = ecma_string_get_chars (search_str_p, + &search_size, + NULL, + NULL, + &search_flags); - if (context.is_global) + ecma_string_t *result_string_p = NULL; + + if (replace_ctx.string_size >= search_size) + { + replace_ctx.matched_size = search_size; + const lit_utf8_byte_t *const input_end_p = replace_ctx.string_p + replace_ctx.string_size; + const lit_utf8_byte_t *const loop_end_p = input_end_p - search_size; + + uint32_t pos = 0; + for (const lit_utf8_byte_t *curr_p = replace_ctx.string_p; + curr_p <= loop_end_p; + lit_utf8_incr (&curr_p), pos++) { - ecma_value_t put_value = ecma_op_object_put (regexp_obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_integer_value (0), - true); - - JERRY_ASSERT (ecma_is_value_boolean (put_value) - || ecma_is_value_empty (put_value) - || ECMA_IS_VALUE_ERROR (put_value)); - - if (ECMA_IS_VALUE_ERROR (put_value)) + if (!memcmp (curr_p, search_buf_p, search_size)) { - return put_value; + replace_ctx.builder = ecma_stringbuilder_create (); + const lit_utf8_size_t byte_offset = (lit_utf8_size_t) (curr_p - replace_ctx.string_p); + ecma_stringbuilder_append_raw (&replace_ctx.builder, replace_ctx.string_p, byte_offset); + + if (replace_ctx.replace_str_p == NULL) + { + ecma_object_t *function_p = ecma_get_object_from_value (replace_value); + + ecma_value_t args[] = + { + ecma_make_string_value (search_str_p), + ecma_make_uint32_value (pos), + ecma_make_string_value (input_str_p) + }; + + result = ecma_op_function_call (function_p, + ECMA_VALUE_UNDEFINED, + args, + 3); + + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_stringbuilder_destroy (&replace_ctx.builder); + goto cleanup_search; + } + + ecma_string_t *const result_str_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (result_str_p == NULL) + { + ecma_stringbuilder_destroy (&replace_ctx.builder); + result = ECMA_VALUE_ERROR; + goto cleanup_replace; + } + + ecma_stringbuilder_append (&replace_ctx.builder, result_str_p); + ecma_deref_ecma_string (result_str_p); + } + else + { + replace_ctx.matched_p = curr_p; + replace_ctx.match_byte_pos = byte_offset; + + ecma_builtin_replace_substitute (&replace_ctx); + } + + const lit_utf8_byte_t *const match_end_p = curr_p + search_size; + ecma_stringbuilder_append_raw (&replace_ctx.builder, + match_end_p, + (lit_utf8_size_t) (input_end_p - match_end_p)); + result_string_p = ecma_stringbuilder_finalize (&replace_ctx.builder); + break; } } - - return ecma_builtin_string_prototype_object_replace_main (&context, replace_value); } - ecma_string_t *to_string_search_p = ecma_op_to_string (search_value); - - if (JERRY_UNLIKELY (to_string_search_p == NULL)) + if (result_string_p == NULL) { - return ECMA_VALUE_ERROR; + ecma_ref_ecma_string (input_str_p); + result_string_p = input_str_p; } - context.is_regexp = false; - context.is_global = false; - context.input_string = to_string_value; - context.input_length = ecma_string_get_length (ecma_get_string_from_value (to_string_value)); - context.regexp_or_search_string = ecma_make_string_value (to_string_search_p); + result = ecma_make_string_value (result_string_p); - ecma_value_t ret_value = ecma_builtin_string_prototype_object_replace_main (&context, replace_value); +cleanup_replace: + if (input_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) replace_ctx.string_p, replace_ctx.string_size); + } - ecma_deref_ecma_string (to_string_search_p); + if (search_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) search_buf_p, search_size); + } - return ret_value; + if (replace_ctx.replace_str_p != NULL) + { + ecma_deref_ecma_string (replace_ctx.replace_str_p); + } + +cleanup_search: + ecma_deref_ecma_string (search_str_p); + return result; } /* ecma_builtin_string_prototype_object_replace */ /** diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index d2debe616..803323161 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -15,11 +15,13 @@ #include "ecma-alloc.h" #include "ecma-array-object.h" +#include "ecma-builtins.h" #include "ecma-builtin-helpers.h" #include "ecma-exceptions.h" #include "ecma-gc.h" #include "ecma-globals.h" #include "ecma-objects.h" +#include "ecma-function-object.h" #include "ecma-regexp-object.h" #include "ecma-try-catch-macro.h" #include "jcontext.h" @@ -1016,36 +1018,46 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */ } } /* ecma_regexp_match */ +/* + * Helper function to get the result of a capture + * + * @return string value, if capture is defined + * undefined, otherwise + */ +ecma_value_t +ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p) /**< capture */ +{ + if (ECMA_RE_IS_CAPTURE_DEFINED (capture_p)) + { + const lit_utf8_size_t capture_size = (lit_utf8_size_t) (capture_p->end_p - capture_p->begin_p); + ecma_string_t *const capture_str_p = ecma_new_ecma_string_from_utf8 (capture_p->begin_p, capture_size); + return ecma_make_string_value (capture_str_p); + } + + return ECMA_VALUE_UNDEFINED; +} /* ecma_regexp_get_capture_value */ + +/** + * Helper function to create a result array from the captures in a regexp context + * + * @return ecma value containing the created array object + */ static ecma_value_t -ecma_regexp_create_result_object (ecma_regexp_ctx_t *re_ctx_p, - ecma_string_t *input_string_p, - uint32_t index) +ecma_regexp_create_result_object (ecma_regexp_ctx_t *re_ctx_p, /**< regexp context */ + ecma_string_t *input_string_p, /**< input ecma string */ + uint32_t index) /**< match index */ { ecma_value_t result_array = ecma_op_create_array_object (0, 0, false); ecma_object_t *result_p = ecma_get_object_from_value (result_array); for (uint32_t i = 0; i < re_ctx_p->captures_count; i++) { - const ecma_regexp_capture_t capture = re_ctx_p->captures_p[i]; - - if (capture.begin_p != NULL && capture.end_p >= capture.begin_p) - { - const lit_utf8_size_t capture_size = (lit_utf8_size_t) (capture.end_p - capture.begin_p); - ecma_string_t *const capture_str_p = ecma_new_ecma_string_from_utf8 (capture.begin_p, capture_size); - const ecma_value_t capture_value = ecma_make_string_value (capture_str_p); - ecma_builtin_helper_def_prop_by_index (result_p, - i, - capture_value, - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - ecma_deref_ecma_string (capture_str_p); - } - else - { - ecma_builtin_helper_def_prop_by_index (result_p, - i, - ECMA_VALUE_UNDEFINED, - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - } + ecma_value_t capture_value = ecma_regexp_get_capture_value (re_ctx_p->captures_p + i); + ecma_builtin_helper_def_prop_by_index (result_p, + i, + capture_value, + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + ecma_free_value (capture_value); } ecma_builtin_helper_def_prop (result_p, @@ -1061,6 +1073,55 @@ ecma_regexp_create_result_object (ecma_regexp_ctx_t *re_ctx_p, return result_array; } /* ecma_regexp_create_result_object */ +/** + * Helper function to initialize a regexp match context + */ +static void +ecma_regexp_initialize_context (ecma_regexp_ctx_t *ctx_p, /**< regexp context */ + const re_compiled_code_t *bc_p, /**< regexp bytecode */ + const lit_utf8_byte_t *input_start_p, /**< pointer to input string */ + const lit_utf8_byte_t *input_end_p) /**< pointer to end of input string */ +{ + JERRY_ASSERT (ctx_p != NULL); + JERRY_ASSERT (bc_p != NULL); + JERRY_ASSERT (input_start_p != NULL); + JERRY_ASSERT (input_end_p >= input_start_p); + + ctx_p->input_start_p = input_start_p; + ctx_p->input_end_p = input_end_p; + + ctx_p->captures_count = bc_p->captures_count; + ctx_p->captures_p = jmem_heap_alloc_block (ctx_p->captures_count * sizeof (ecma_regexp_capture_t)); + memset (ctx_p->captures_p, 0, ctx_p->captures_count * sizeof (ecma_regexp_capture_t)); + + ctx_p->non_captures_count = bc_p->non_captures_count; + ctx_p->non_captures_p = jmem_heap_alloc_block (ctx_p->non_captures_count * sizeof (ecma_regexp_non_capture_t)); + memset (ctx_p->non_captures_p, 0, ctx_p->non_captures_count * sizeof (ecma_regexp_non_capture_t)); + + const uint32_t iters_length = ctx_p->captures_count + ctx_p->non_captures_count - 1; + ctx_p->iterations_p = jmem_heap_alloc_block (iters_length * sizeof (uint32_t)); + memset (ctx_p->iterations_p, 0, iters_length * sizeof (uint32_t)); +} /* ecma_regexp_initialize_context */ + +/** + * Helper function to clean up a regexp context + */ +static void +ecma_regexp_cleanup_context (ecma_regexp_ctx_t *ctx_p) /**< regexp context */ +{ + JERRY_ASSERT (ctx_p != NULL); + jmem_heap_free_block (ctx_p->captures_p, ctx_p->captures_count * sizeof (ecma_regexp_capture_t)); + if (ctx_p->non_captures_p != NULL) + { + jmem_heap_free_block (ctx_p->non_captures_p, ctx_p->non_captures_count * sizeof (ecma_regexp_non_capture_t)); + } + if (ctx_p->iterations_p != NULL) + { + const uint32_t iters_length = ctx_p->captures_count + ctx_p->non_captures_count - 1; + jmem_heap_free_block (ctx_p->iterations_p, iters_length * sizeof (uint32_t)); + } +} /* ecma_regexp_cleanup_context */ + /** * RegExp helper function to start the recursive matching algorithm * and create the result Array object @@ -1191,31 +1252,21 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */ } } - re_ctx.input_start_p = input_buffer_p; - const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_size; - re_ctx.input_end_p = input_end_p; + const lit_utf8_byte_t *input_end_p = input_buffer_p + input_size; + ecma_regexp_initialize_context (&re_ctx, + bc_p, + input_buffer_p, + input_end_p); + + /* 2. Try to match */ + uint8_t *bc_start_p = (uint8_t *) (bc_p + 1); + const lit_utf8_byte_t *matched_p = NULL; JERRY_TRACE_MSG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n", re_ctx.flags & RE_FLAG_GLOBAL, re_ctx.flags & RE_FLAG_IGNORE_CASE, re_ctx.flags & RE_FLAG_MULTILINE); - re_ctx.captures_count = bc_p->captures_count; - re_ctx.captures_p = jmem_heap_alloc_block (re_ctx.captures_count * sizeof (ecma_regexp_capture_t)); - memset (re_ctx.captures_p, 0, re_ctx.captures_count * sizeof (ecma_regexp_capture_t)); - - re_ctx.non_captures_count = bc_p->non_captures_count; - re_ctx.non_captures_p = jmem_heap_alloc_block (re_ctx.non_captures_count * sizeof (ecma_regexp_non_capture_t)); - memset (re_ctx.non_captures_p, 0, re_ctx.non_captures_count * sizeof (ecma_regexp_non_capture_t)); - - const uint32_t iters_length = re_ctx.captures_count + re_ctx.non_captures_count - 1; - re_ctx.iterations_p = jmem_heap_alloc_block (iters_length * sizeof (uint32_t)); - memset (re_ctx.iterations_p, 0, iters_length * sizeof (uint32_t)); - - /* 2. Try to match */ - uint8_t *bc_start_p = (uint8_t *) (bc_p + 1); - const lit_utf8_byte_t *matched_p = NULL; - JERRY_ASSERT (index <= input_length); while (true) { @@ -1295,15 +1346,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */ ret_value = ecma_regexp_create_result_object (&re_ctx, input_string_p, index); cleanup_context: - jmem_heap_free_block (re_ctx.captures_p, re_ctx.captures_count * sizeof (ecma_regexp_capture_t)); - if (re_ctx.non_captures_p != NULL) - { - jmem_heap_free_block (re_ctx.non_captures_p, re_ctx.non_captures_count * sizeof (ecma_regexp_non_capture_t)); - } - if (re_ctx.iterations_p != NULL) - { - jmem_heap_free_block (re_ctx.iterations_p, iters_length * sizeof (uint32_t)); - } + ecma_regexp_cleanup_context (&re_ctx); cleanup_string: if (input_flags & ECMA_STRING_FLAG_MUST_BE_FREED) @@ -1339,6 +1382,627 @@ ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg) /**< the RegExp p return ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP); } /* ecma_regexp_read_pattern_str_helper */ +/** + * Fast path for RegExp based replace operation + * + * This method assumes the following: + * - The RegExp object is a built-in RegExp + * - The 'exec' method of the RegExp object is the built-in 'exec' method + * - The 'lastIndex' property is writable + * + * The standard would normally require us to first execute the regexp and collect the results, + * and after that iterate over the collected results and replace them. + * The assumptions above guarantee that during the matching phase there will be no exceptions thrown, + * which means we can do the match/replace in a single loop, without collecting the results. + * + * @return string value if successful + * thrown value otherwise + */ +static ecma_value_t +ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**header.status_flags; + + const lit_utf8_byte_t *const string_end_p = ctx_p->string_p + ctx_p->string_size; + ecma_regexp_initialize_context (&re_ctx, + bc_p, + ctx_p->string_p, + string_end_p); + + ctx_p->builder = ecma_stringbuilder_create (); + ctx_p->capture_count = re_ctx.captures_count; + ctx_p->u.captures_p = re_ctx.captures_p; + + const uint8_t *const bc_start_p = (const uint8_t *) (bc_p + 1); + const lit_utf8_byte_t *matched_p = NULL; + const lit_utf8_byte_t *current_p = ctx_p->string_p; + const lit_utf8_byte_t *last_append_p = current_p; + uint32_t index = 0; + + while (true) + { + memset (re_ctx.captures_p, 0, re_ctx.captures_count); + matched_p = ecma_regexp_match (&re_ctx, bc_start_p, current_p); + + if (matched_p != NULL) + { + if (ECMA_RE_STACK_LIMIT_REACHED (matched_p)) + { + result = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded.")); + goto cleanup_context; + } + + const lit_utf8_size_t remaining_size = (lit_utf8_size_t) (current_p - last_append_p); + ecma_stringbuilder_append_raw (&(ctx_p->builder), last_append_p, remaining_size); + + if (ctx_p->replace_str_p != NULL) + { + ctx_p->matched_p = current_p; + const ecma_regexp_capture_t *const global_capture_p = re_ctx.captures_p; + ctx_p->matched_size = (lit_utf8_size_t) (global_capture_p->end_p - global_capture_p->begin_p); + ctx_p->match_byte_pos = (lit_utf8_size_t) (current_p - re_ctx.input_start_p); + + ecma_builtin_replace_substitute (ctx_p); + } + else + { + ecma_collection_t *arguments_p = ecma_new_collection (); + + for (uint32_t i = 0; i < re_ctx.captures_count; i++) + { + ecma_value_t capture = ecma_regexp_get_capture_value (re_ctx.captures_p + i); + ecma_collection_push_back (arguments_p, capture); + } + + ecma_collection_push_back (arguments_p, ecma_make_uint32_value (index)); + ecma_ref_ecma_string (string_p); + ecma_collection_push_back (arguments_p, ecma_make_string_value (string_p)); + ecma_object_t *function_p = ecma_get_object_from_value (replace_arg); + + result = ecma_op_function_call (function_p, + ECMA_VALUE_UNDEFINED, + arguments_p->buffer_p, + arguments_p->item_count); + + ecma_collection_free (arguments_p); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_builder; + } + + /* 16.m.v */ + ecma_string_t *const replace_result_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (replace_result_p == NULL) + { + result = ECMA_VALUE_ERROR; + goto cleanup_builder; + } + + ecma_stringbuilder_append (&(ctx_p->builder), replace_result_p); + ecma_deref_ecma_string (replace_result_p); + } + + const ecma_regexp_capture_t *global_capture_p = re_ctx.captures_p; + last_append_p = global_capture_p->end_p; + + if (!(re_ctx.flags & RE_FLAG_GLOBAL)) + { + break; + } + + const lit_utf8_size_t matched_size = (lit_utf8_size_t) (global_capture_p->end_p - global_capture_p->begin_p); + if (matched_size > 1) + { + index += lit_utf8_string_length (current_p, matched_size); + current_p = last_append_p; + continue; + } + } + + if (current_p >= string_end_p) + { + break; + } + + index++; + lit_utf8_incr (¤t_p); + } + + const lit_utf8_size_t trailing_size = (lit_utf8_size_t) (string_end_p - last_append_p); + ecma_stringbuilder_append_raw (&(ctx_p->builder), last_append_p, trailing_size); + + result = ecma_make_string_value (ecma_stringbuilder_finalize (&(ctx_p->builder))); + goto cleanup_context; + +cleanup_builder: + ecma_stringbuilder_destroy (&(ctx_p->builder)); + +cleanup_context: + ecma_regexp_cleanup_context (&re_ctx); + + return result; +} /* ecma_regexp_replace_helper_fast */ + +/** + * Helper function for RegExp based replacing + * + * See also: + * String.prototype.replace + * RegExp.prototype[@@replace] + * + * @return result string of the replacement, if successful + * error value, otherwise + */ +ecma_value_t +ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ + ecma_value_t string_arg, /**< source string */ + ecma_value_t replace_arg) /**< replace string */ +{ + /* 2. */ + if (!ecma_is_value_object (this_arg)) + { + return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object.")); + } + + ecma_object_t *this_obj_p = ecma_get_object_from_value (this_arg); + + ecma_replace_context_t replace_ctx; + + /* 3. */ + ecma_string_t *string_p = ecma_op_to_string (string_arg); + if (string_p == NULL) + { + return ECMA_VALUE_ERROR; + } + + lit_utf8_size_t string_length; + uint8_t string_flags = ECMA_STRING_FLAG_IS_ASCII; + replace_ctx.string_p = ecma_string_get_chars (string_p, + &(replace_ctx.string_size), + &string_length, + NULL, + &string_flags); + + ecma_value_t result = ECMA_VALUE_ERROR; + + /* 6. */ + replace_ctx.replace_str_p = NULL; + if (!ecma_op_is_callable (replace_arg)) + { + replace_ctx.replace_str_p = ecma_op_to_string (replace_arg); + + if (replace_ctx.replace_str_p == NULL) + { + goto cleanup_string; + } + } + + /* 8 */ + result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_GLOBAL); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_replace; + } + + const bool global = ecma_op_to_boolean (result); + ecma_free_value (result); + + /* 10. */ + if (global) + { + result = ecma_op_object_put (this_obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (0), + true); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_replace; + } + + JERRY_ASSERT (ecma_is_value_boolean (result)); + } + +#if !ENABLED (JERRY_ES2015) + ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) this_obj_p; + const re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t, + re_obj_p->u.class_prop.u.value); + /* In ES5.1 the RegExp prototype object is a valid regexp, but we don't store bytecode for it to save memory. + * Handling this would be very awkward, so we temporarily compile bytecode for it. */ + if (ecma_builtin_is (this_obj_p, ECMA_BUILTIN_ID_REGEXP_PROTOTYPE)) + { + JERRY_ASSERT (bc_p == NULL); + ecma_value_t compile = re_compile_bytecode (&bc_p, + ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP), + RE_FLAG_EMPTY); + JERRY_ASSERT (ecma_is_value_empty (compile)); + } + + result = ecma_regexp_replace_helper_fast (&replace_ctx, + bc_p, + string_p, + replace_arg); + + if (ecma_builtin_is (this_obj_p, ECMA_BUILTIN_ID_REGEXP_PROTOTYPE)) + { + ecma_bytecode_deref ((ecma_compiled_code_t *) bc_p); + } + + goto cleanup_replace; +#else /* ENABLED (JERRY_ES2015) */ + result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_EXEC); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_replace; + } + + /* Check for fast path. */ + if (ecma_op_is_callable (result)) + { + ecma_extended_object_t *function_p = (ecma_extended_object_t *) ecma_get_object_from_value (result); + if (ecma_object_class_is (this_obj_p, LIT_MAGIC_STRING_REGEXP_UL) + && !ecma_builtin_is (this_obj_p, ECMA_BUILTIN_ID_REGEXP_PROTOTYPE) + && ecma_builtin_is_regexp_exec (function_p)) + { + ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) this_obj_p; + const re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t, + re_obj_p->u.class_prop.u.value); + + result = ecma_regexp_replace_helper_fast (&replace_ctx, + bc_p, + string_p, + replace_arg); + + ecma_deref_object ((ecma_object_t *) function_p); + goto cleanup_replace; + } + } + + ecma_collection_t *results_p = ecma_new_collection (); + + while (true) + { + /* 13.a */ + if (ecma_op_is_callable (result)) + { + ecma_object_t *const function_p = ecma_get_object_from_value (result); + + ecma_value_t arguments[] = { ecma_make_string_value (string_p) }; + result = ecma_op_function_call (function_p, this_arg, arguments, 1); + + ecma_deref_object (function_p); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_results; + } + + if (!ecma_is_value_object (result) && !ecma_is_value_null (result)) + { + ecma_free_value (result); + result = ecma_raise_type_error (ECMA_ERR_MSG ("Return value of 'exec' must be an Object or Null")); + goto cleanup_results; + } + } + else + { + ecma_free_value (result); + + if (!ecma_object_class_is (this_obj_p, LIT_MAGIC_STRING_REGEXP_UL)) + { + result = ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not a valid RegExp object")); + goto cleanup_results; + } + + result = ecma_regexp_exec_helper (this_arg, ecma_make_string_value (string_p), false); + } + + /* 13.c */ + if (ecma_is_value_null (result)) + { + break; + } + + /* 13.d.i */ + ecma_collection_push_back (results_p, result); + + if (!global) + { + break; + } + + /* 13.d.iii.1 */ + result = ecma_op_object_get_by_uint32_index (ecma_get_object_from_value (result), 0); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_results; + } + + ecma_string_t *match_str_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (match_str_p == NULL) + { + result = ECMA_VALUE_ERROR; + goto cleanup_results; + } + + const bool is_empty = ecma_string_is_empty (match_str_p); + ecma_deref_ecma_string (match_str_p); + + /* 13.d.iii.3 */ + if (is_empty) + { + result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_LASTINDEX_UL); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_results; + } + + uint32_t length; + if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (result, &length))) + { + ecma_free_value (result); + result = ECMA_VALUE_ERROR; + goto cleanup_results; + } + + ecma_free_value (result); + + /* 10.d.iii.3.c */ + result = ecma_op_object_put (this_obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (length + 1), + true); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_results; + } + + JERRY_ASSERT (ecma_is_value_boolean (result)); + } + + result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_EXEC); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_results; + } + } + + /* 14. */ + replace_ctx.builder = ecma_stringbuilder_create (); + replace_ctx.matched_p = NULL; + replace_ctx.capture_count = 0; + + /* 15. */ + const lit_utf8_byte_t *source_position_p = replace_ctx.string_p; + lit_utf8_size_t source_index = 0; + + const lit_utf8_byte_t *const string_end_p = replace_ctx.string_p + replace_ctx.string_size; + + /* 16. */ + for (ecma_value_t *current_p = results_p->buffer_p; + current_p < results_p->buffer_p + results_p->item_count; + current_p++) + { + /* 16.a */ + ecma_object_t *current_object_p = ecma_get_object_from_value (*current_p); + + uint32_t capture_count; + result = ecma_op_object_get_length (current_object_p, &capture_count); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_builder; + } + + /* 16.c */ + capture_count = (capture_count > 0) ? capture_count - 1 : capture_count; + + /* 16.d */ + result = ecma_op_object_get_by_uint32_index (current_object_p, 0); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_builder; + } + + ecma_string_t *matched_str_p = ecma_op_to_string (result); + ecma_free_value (result); + + /* 16.e */ + if (matched_str_p == NULL) + { + result = ECMA_VALUE_ERROR; + goto cleanup_builder; + } + + /* 16.g */ + result = ecma_op_object_get_by_magic_id (current_object_p, LIT_MAGIC_STRING_INDEX); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_deref_ecma_string (matched_str_p); + goto cleanup_builder; + } + + const ecma_value_t index_value = result; + + ecma_number_t position_num; + result = ecma_op_to_integer (index_value, &position_num); + ecma_free_value (index_value); + + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_deref_ecma_string (matched_str_p); + goto cleanup_builder; + } + + /* 16.i */ + lit_utf8_size_t position = JERRY_MIN ((lit_utf8_size_t) JERRY_MAX (position_num, 0.0f), string_length); + + /* 16.k */ + ecma_collection_t *arguments_p = ecma_new_collection (); + ecma_collection_push_back (arguments_p, ecma_make_string_value (matched_str_p)); + + /* 16.j, l */ + uint32_t n = 1; + while (n <= capture_count) + { + result = ecma_op_object_get_by_uint32_index (current_object_p, n); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_collection_free (arguments_p); + goto cleanup_builder; + } + + /* 16.l.iii */ + if (!ecma_is_value_undefined (result)) + { + ecma_string_t *capture_str_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (capture_str_p == NULL) + { + ecma_collection_free (arguments_p); + result = ECMA_VALUE_ERROR; + goto cleanup_builder; + } + + result = ecma_make_string_value (capture_str_p); + } + + /* 16.l.iv */ + ecma_collection_push_back (arguments_p, result); + n++; + } + + const bool should_replace = (position >= source_index); + /* 16.p */ + if (should_replace) + { + const lit_utf8_byte_t *match_position_p; + const lit_utf8_size_t matched_str_size = ecma_string_get_size (matched_str_p); + const lit_utf8_size_t matched_str_length = ecma_string_get_length (matched_str_p); + + if (string_flags & ECMA_STRING_FLAG_IS_ASCII) + { + match_position_p = replace_ctx.string_p + position; + } + else + { + match_position_p = source_position_p; + lit_utf8_size_t distance = position - source_index; + while (distance--) + { + lit_utf8_incr (&match_position_p); + } + } + + ecma_stringbuilder_append_raw (&replace_ctx.builder, + source_position_p, + (lit_utf8_size_t) (match_position_p - source_position_p)); + replace_ctx.match_byte_pos = (lit_utf8_size_t) (match_position_p - replace_ctx.string_p); + + source_position_p = JERRY_MIN (match_position_p + matched_str_size, string_end_p); + source_index = JERRY_MIN (position + matched_str_length, string_length); + } + + /* 16.m */ + if (replace_ctx.replace_str_p == NULL) + { + /* 16.m.i-ii. + * arguments_p already contains <> */ + + /* 16.m.iii */ + ecma_collection_push_back (arguments_p, ecma_make_uint32_value (position)); + ecma_ref_ecma_string (string_p); + ecma_collection_push_back (arguments_p, ecma_make_string_value (string_p)); + + result = ecma_op_function_call (ecma_get_object_from_value (replace_arg), + ECMA_VALUE_UNDEFINED, + arguments_p->buffer_p, + arguments_p->item_count); + + ecma_collection_free (arguments_p); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_builder; + } + + /* 16.m.v */ + ecma_string_t *const replace_result_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (replace_result_p == NULL) + { + result = ECMA_VALUE_ERROR; + goto cleanup_builder; + } + + /* 16.m/p */ + if (should_replace) + { + ecma_stringbuilder_append (&replace_ctx.builder, replace_result_p); + } + + ecma_deref_ecma_string (replace_result_p); + } + else + { + /* 16.n/p */ + if (should_replace) + { + replace_ctx.u.collection_p = arguments_p; + ecma_builtin_replace_substitute (&replace_ctx); + } + + ecma_collection_free (arguments_p); + } + } + + /* 18. */ + JERRY_ASSERT (source_index <= string_length); + ecma_stringbuilder_append_raw (&(replace_ctx.builder), + source_position_p, + (lit_utf8_size_t) (string_end_p - source_position_p)); + + result = ecma_make_string_value (ecma_stringbuilder_finalize (&replace_ctx.builder)); + goto cleanup_results; + +cleanup_builder: + ecma_stringbuilder_destroy (&replace_ctx.builder); + +cleanup_results: + ecma_collection_free (results_p); +#endif /* !ENABLED (JERRY_ES2015) */ + +cleanup_replace: + if (replace_ctx.replace_str_p != NULL) + { + ecma_deref_ecma_string (replace_ctx.replace_str_p); + } + +cleanup_string: + if (string_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) replace_ctx.string_p, replace_ctx.string_size); + } + + ecma_deref_ecma_string (string_p); + + return result; +} /* ecma_regexp_replace_helper */ + /** * @} * @} diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index a32d26ec4..a4d507d82 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -50,6 +50,14 @@ typedef struct const lit_utf8_byte_t *end_p; /**< substring end pointer */ } ecma_regexp_capture_t; +/** + * Check if an ecma_regexp_capture_t contains a defined capture + */ +#define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL && (c)->end_p >= (c)->begin_p) + +ecma_value_t +ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p); + /** * Structure for storing non-capturing group results */ @@ -96,6 +104,10 @@ ecma_char_t ecma_regexp_canonicalize_char (ecma_char_t ch); ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p); void ecma_regexp_initialize_props (ecma_object_t *re_obj_p, ecma_string_t *source_p, uint16_t flags); +ecma_value_t +ecma_regexp_replace_helper (ecma_value_t this_arg, + ecma_value_t string_arg, + ecma_value_t replace_arg); /** * @} * @} diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index 8b78960d3..9862d9fe0 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -291,10 +291,7 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FREEZE, "freeze") #if ENABLED (JERRY_BUILTIN_DATE) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_DAY_UL, "getDay") #endif -#if !ENABLED (JERRY_ES2015) && ENABLED (JERRY_BUILTIN_REGEXP) \ -|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_BUILTIN_STRING) \ -|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) \ -|| ENABLED (JERRY_BUILTIN_REGEXP) && !( ENABLED (JERRY_ES2015)) +#if ENABLED (JERRY_BUILTIN_REGEXP) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GLOBAL, "global") #endif #if ENABLED (JERRY_ES2015_BUILTIN_TYPEDARRAY) diff --git a/jerry-core/parser/regexp/re-bytecode.c b/jerry-core/parser/regexp/re-bytecode.c index 7d7028aaf..9b547c66c 100644 --- a/jerry-core/parser/regexp/re-bytecode.c +++ b/jerry-core/parser/regexp/re-bytecode.c @@ -33,13 +33,14 @@ * * @return pointer to the RegExp compiled code header */ -#define REGEXP_BYTECODE_BLOCK_SIZE 64UL +#define REGEXP_BYTECODE_BLOCK_SIZE 8UL void re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */ { - bc_ctx_p->block_start_p = jmem_heap_alloc_block (REGEXP_BYTECODE_BLOCK_SIZE); - bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + REGEXP_BYTECODE_BLOCK_SIZE; + const size_t initial_size = JERRY_ALIGNUP (REGEXP_BYTECODE_BLOCK_SIZE + sizeof (re_compiled_code_t), JMEM_ALIGNMENT); + bc_ctx_p->block_start_p = jmem_heap_alloc_block (initial_size); + bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + initial_size; bc_ctx_p->current_p = bc_ctx_p->block_start_p + sizeof (re_compiled_code_t); } /* re_initialize_regexp_bytecode */ diff --git a/tests/jerry/es2015/regression-test-issue-3070.js b/tests/jerry/es2015/regression-test-issue-3070.js new file mode 100644 index 000000000..192d54592 --- /dev/null +++ b/tests/jerry/es2015/regression-test-issue-3070.js @@ -0,0 +1,17 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r = new RegExp("([X]{6}|.*)", "g"); +var s = "a"; +s.replace(r, () => r.compile("[PqaCZlWQUT]{0}", "m")); diff --git a/tests/jerry/es2015/symbol-replace.js b/tests/jerry/es2015/symbol-replace.js new file mode 100644 index 000000000..dc9cdce5c --- /dev/null +++ b/tests/jerry/es2015/symbol-replace.js @@ -0,0 +1,662 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var replace = RegExp.prototype[Symbol.replace]; + +try { + replace.call (0, "string", "replace"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + replace.call (new RegExp(), { + toString: () => { + throw "abrupt string" + } + }, "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt string"); +} + +try { + replace.call (new RegExp(), "string", { + toString: () => { + throw "abrupt replace" + } + }); + assert (false); +} catch (e) { + assert (e === "abrupt replace"); +} + +try { + replace.call ({ + get global() { + throw "abrupt global" + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt global"); +} + +try { + replace.call ({ + global: true, + set lastIndex(idx) { + throw "abrupt lastIndex" + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt lastIndex"); +} + +try { + replace.call ({ + get exec() { + throw "abrupt exec" + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt exec"); +} + +try { + replace.call ({ + exec: RegExp.prototype.exec + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + replace.call ({ + exec: 42 + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + replace.call ({ + exec: () => { + throw "abrupt exec result" + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt exec result"); +} + +try { + replace.call ({ + exec: () => { + return 1 + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + replace.call ({ + exec: () => { + return { + get length() { + throw "abrupt result length" + } + } + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt result length"); +} + +try { + replace.call ({ + global: true, + exec: () => { + return { + length: 1, + get 0() { + throw "abrupt match" + } + } + } + }, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt match"); +} + +try { + replace.call ({ + global: true, + exec: () => { + return { + length: 1, + get 0() { + return { + toString: () => { + throw "abrupt match toString" + } + } + } + } + } + }, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt match toString"); +} + +var result_obj = { + toString: () => { + Object.defineProperty (result_obj, 'toString', { + value: () => { + throw "abrupt match toString delayed"; + } + }); + return "str"; + } +} + +var first = true; +try { + replace.call ({ + global: true, + exec: () => { + if (!first) { + return null; + } + + first = false; + return { + length: 1, + get 0() { + return result_obj; + } + } + } + }, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt match toString delayed"); +} + +try { + replace.call ({ + global: true, + get lastIndex() { + throw "abrupt lastIndex get" + }, + set lastIndex(i) {}, + exec: () => { + return { + length: 1, + get 0() { + return { + toString: () => { + return "" + } + } + } + } + } + }, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt lastIndex get"); +} + +try { + replace.call ({ + global: true, + get lastIndex() { + return { + valueOf: () => { + throw "abrupt lastIndex toNumber" + } + } + }, + set lastIndex(i) {}, + exec: () => { + return { + length: 1, + get 0() { + return { + toString: () => { + return "" + } + } + } + } + } + }, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt lastIndex toNumber"); +} + +var o = { + global: true, + exec: () => { + return { + length: 1, + get 0() { + return { + toString: () => { + return "" + } + } + } + } + } +} +Object.defineProperty (o, 'lastIndex', { + configurable: true, + get: () => { + Object.defineProperty (o, 'lastIndex', { + get: () => { + return { + valueOf: () => { + return 42 + } + }; + }, + set: (i) => { + throw "abrupt lastIndex put"; + }, + configurable: true + }); + return { + valueOf: () => { + return 24 + } + }; + }, + set: (i) => {} +}); + +try { + replace.call (o, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt lastIndex put"); +} + +o = { + global: true, + exec: () => { + return { + length: 1, + get 0() { + return { + toString: () => { + return "" + } + } + } + } + }, +}; +Object.defineProperty (o, 'lastIndex', { + get: () => { + Object.defineProperty (o, 'lastIndex', { + value: 0, + writable: false + }); + return 0; + }, + set: () => {} +}); + +try { + replace.call (o, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +o = { + global: true +}; +Object.defineProperty (o, 'exec', { + configurable: true, + value: () => { + Object.defineProperty (o, 'exec', { + get: () => { + throw "abrupt exec" + }, + set: (v) => {} + }); + return { + length: 1, + 0: "thisisastring" + } + } +}); + +try { + replace.call (o, + "string", + "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt exec"); +} + +try { + replace.call ({ + exec: () => { + return { + length: 1, + 0: "str", + get index() { + throw "abrupt index" + } + } + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt index"); +} + +try { + replace.call ({ + exec: () => { + return { + length: 1, + 0: "str", + get index() { + return { + valueOf: () => { + throw "abrupt index toNumber" + } + } + } + } + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt index toNumber"); +} + +try { + replace.call ({ + exec: () => { + return { + length: 2, + 0: "str", + index: 0, + get 1() { + throw "abrupt capture" + } + } + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt capture"); +} + +try { + replace.call ({ + exec: () => { + return { + length: 2, + 0: "str", + index: 0, + 1: { + toString: () => { + throw "abrupt capture toString" + } + } + } + } + }, "string", "replace"); + assert (false); +} catch (e) { + assert (e === "abrupt capture toString"); +} + +try { + replace.call ({ + exec: () => { + return { + length: 2, + 0: "str", + index: 0, + 1: "st" + } + } + }, "string", () => { + throw "abrupt replace" + }); + assert (false); +} catch (e) { + assert (e === "abrupt replace"); +} + +try { + replace.call ({ + exec: () => { + return { + length: 2, + 0: "str", + index: 0, + 1: "st" + } + } + }, "string", () => { + return { + toString: () => { + throw "abrupt replace toString" + } + } + }); + assert (false); +} catch (e) { + assert (e === "abrupt replace toString"); +} + +try { + replace.call (/abc/, "abc", () => { + throw "fastpath abrupt replace" + }); + assert (false); +} catch (e) { + assert (e === "fastpath abrupt replace"); +} + +try { + replace.call (/abc/, "abc", () => { + return { + toString: () => { + throw "fastpath abrupt replace" + } + } + }); + assert (false); +} catch (e) { + assert (e === "fastpath abrupt replace"); +} + +assert (replace.call (/abc/, "abc", "xyz") === "xyz"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "xyz") === "abxyzfg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$$-") === "ab-$-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$&-") === "ab-cde-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$`-") === "ab-ab-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$'-") === "ab-fg-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$0-") === "ab-$0-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$1-") === "ab-c-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$2-") === "ab-d-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$3-") === "ab-d-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$4-") === "ab--fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$5-") === "ab-e-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$6-") === "ab-$6-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$00-") === "ab-$00-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$01-") === "ab-c-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$10-") === "ab-c0-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$99-") === "ab-$99-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "-$$1-") === "ab-$1-fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "$") === "ab$fg"); +assert (replace.call (/(c)((d)|(x))(e)/, "abcdefg", "$@") === "ab$@fg"); + +replace.call (/(c)((d)|(x))(e)/, "abcdefg", function () { + assert (arguments[0] === "cde"); + assert (arguments[1] === "c"); + assert (arguments[2] === "d"); + assert (arguments[3] === "d"); + assert (arguments[4] === undefined); + assert (arguments[5] === "e"); + assert (arguments[6] === 2); + assert (arguments[7] === "abcdefg"); +}); + +var re = /ab/g; +assert (replace.call (re, "-ab-ab-ab-ab-", "cd") === "-cd-cd-cd-cd-"); +assert (re.lastIndex === 0); + +re.lastIndex = 5; +assert (replace.call (re, "-ab-ab-ab-ab-", "cd") === "-cd-cd-cd-cd-"); +assert (re.lastIndex === 0); + +assert (replace.call (/(?:)/g, "string", "Duck") === "DucksDucktDuckrDuckiDucknDuckgDuck"); + +class Regexplike { + constructor() { + this.index = 0; + this.global = true; + } + + exec() { + if (this.index > 0) { + return null; + } + + this.index = 39; + var result = { + length: 1, + 0: "Duck", + index: this.index + }; + return result; + } +} + +re = new Regexplike(); + +/* Well-behaved RegExp-like object. */ +assert (replace.call (re, "What have you brought upon this cursed land", "$&") === "What have you brought upon this cursed Duck"); + +var replace_count = 0; + +function replacer() { + replace_count++; + return arguments[0]; +} + +re.index = 0; +re.exec = function () { + if (this.index > 3) { + return null; + } + + var result = { + length: 1, + 0: "Duck", + index: this.index++ + }; + return result; +} + +/* Mis-behaving RegExp-like object, replace function is called on each match, but the result is ignored for inconsistent matches. */ +assert (replace.call (re, "Badger", replacer) === "Ducker"); +assert (replace_count === 4); + +re.index = 0; +assert (replace.call (re, "Badger", "Ord") === "Order"); + +try { + replace.call (RegExp.prototype, "string", "replace"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +/* Object with custom @@replace method */ +var o = {} +o[Symbol.replace] = function () { + return "Duck" +}; +assert ("string".replace (o, "Mallard") === "Duck"); + +o[Symbol.replace] = 42; +try { + "string".replace (o, "Duck"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +Object.defineProperty (o, Symbol.replace, { + get: () => { + throw "abrupt @@replace get" + }, + set: (v) => {} +}); + +try { + "string".replace (o, "Duck"); + assert (false); +} catch (e) { + assert (e === "abrupt @@replace get"); +} + +o = {}; +o[Symbol.replace] = function () { + throw "abrupt @@replace" +}; +try { + "string".replace (o, "str"); + assert (false); +} catch (e) { + assert (e === "abrupt @@replace") +} diff --git a/tests/jerry/es5.1/string-prototype-replace.js b/tests/jerry/es5.1/string-prototype-replace.js new file mode 100644 index 000000000..5727f2074 --- /dev/null +++ b/tests/jerry/es5.1/string-prototype-replace.js @@ -0,0 +1,18 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Changing exec should not affect replace. +Object.getPrototypeOf(/x/).exec = function () { return 1234; } +assert (/y/.exec("y") === 1234); +assert ("y".replace (/y/, "x") === "x"); diff --git a/tests/jerry/string-prototype-replace.js b/tests/jerry/string-prototype-replace.js index b8d3a0445..872ac408d 100644 --- a/tests/jerry/string-prototype-replace.js +++ b/tests/jerry/string-prototype-replace.js @@ -14,6 +14,8 @@ assert ("abcabc".replace("bc", ":") === "a:abc"); assert ("hello".replace("", ":") === ":hello"); +assert ("hello".replace("h", "") === "ello"); +assert ("".replace("", "h") === "h"); assert ("xabcxabcx".replace (/abc/g, "[$&][$`][$']") === "x[abc][x][xabcx]x[abc][xabcx][x]x"); assert ("abc".replace (/a(b)c|d()/, "[$1][$01][$2][$02][$99][$123][$012]") === "[b][b][][][$99][b23][b2]"); @@ -110,10 +112,23 @@ try { assert (e instanceof TypeError); } -// The real "exec" never returns with a number. -Object.getPrototypeOf(/x/).exec = function () { return 1234; } +try { + "str".replace ({toString: function () {throw "abrupt search toString"}}, ""); + assert (false); +} catch (e) { + assert (e === "abrupt search toString"); +} -assert (/y/.exec("y") === 1234); +try { + "str".replace ("str", {toString: function () {throw "abrupt search toString"}}); + assert (false); +} catch (e) { + assert (e === "abrupt search toString"); +} -// Changing exec should not affect replace. -assert ("y".replace (/y/, "x") === "x"); +try { + "str".replace ("str", function () {return {toString: function () {throw "abrupt replacer toString"}}}); + assert (false); +} catch (e) { + assert (e === "abrupt replacer toString"); +}