From 74781c28c28b2357459721cb89f39f6b6d04a7e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=C3=A1tyai?= Date: Mon, 20 Jul 2020 12:54:06 +0200 Subject: [PATCH] Fix sticky flag handling in @@replace (#4005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai daniel.batyai@h-lab.eu --- .../builtin-objects/ecma-builtin-helpers.h | 2 +- .../ecma/operations/ecma-regexp-object.c | 205 ++++++++++-------- jerry-core/lit/lit-magic-strings.inc.h | 3 +- tests/jerry/es.next/symbol-replace.js | 40 +++- tests/jerry/string-prototype-replace.js | 14 ++ tests/test262-es6-excludelist.xml | 22 +- 6 files changed, 182 insertions(+), 104 deletions(-) diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h index 484b3af27..acd5fd4ee 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h @@ -76,7 +76,7 @@ typedef struct const lit_utf8_byte_t *matched_p; /**< matched string */ lit_utf8_size_t matched_size; /**< matcehd string size */ lit_utf8_size_t match_byte_pos; /**< byte position of the match in the source string */ - ecma_length_t index; /**< current match index */ + uint16_t flags; /**< replace flags */ /** * Capture results diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index 897e380f4..49c5dfc93 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -2525,10 +2525,13 @@ cleanup_string: */ static ecma_value_t ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**u.class_prop.u.value); + JERRY_ASSERT (bc_p != NULL); ecma_value_t result = ECMA_VALUE_EMPTY; @@ -2545,26 +2548,7 @@ ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**string_p; const lit_utf8_byte_t *last_append_p = current_p; - JERRY_ASSERT (ctx_p->index <= string_length); - -#if ENABLED (JERRY_ESNEXT) - /* Global matches always start at index 0, but Sticky matches may have a non-zero lastIndex. */ - if (ctx_p->index > 0) - { - if (string_flags & ECMA_STRING_FLAG_IS_ASCII) - { - current_p += ctx_p->index; - } - else - { - ecma_length_t index = ctx_p->index; - while (index--) - { - lit_utf8_incr (¤t_p); - } - } - } -#endif /* ENABLED (JERRY_ESNEXT) */ + ecma_length_t index; ecma_regexp_ctx_t re_ctx; ecma_regexp_initialize_context (&re_ctx, @@ -2572,6 +2556,57 @@ ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**string_p, string_end_p); + /* lastIndex must be accessed to remain consistent with the standard, even though we may not need the value. */ + ecma_value_t lastindex_value = ecma_op_object_get_by_magic_id ((ecma_object_t *) re_obj_p, + LIT_MAGIC_STRING_LASTINDEX_UL); + result = ecma_op_to_length (lastindex_value, &index); + ecma_free_value (lastindex_value); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_context; + } + +#if ENABLED (JERRY_ESNEXT) + /* Only non-global sticky matches use the lastIndex value, otherwise the starting index is 0. */ + if (JERRY_UNLIKELY ((ctx_p->flags & RE_FLAG_GLOBAL) == 0 && (re_ctx.flags & RE_FLAG_STICKY) != 0)) + { + if (index > string_length) + { + result = ecma_op_object_put ((ecma_object_t *) re_obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (0), + true); + + if (!ECMA_IS_VALUE_ERROR (result)) + { + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_ref_ecma_string (string_p); + result = ecma_make_string_value (string_p); + } + + goto cleanup_context; + } + + if (string_flags & ECMA_STRING_FLAG_IS_ASCII) + { + current_p += index; + } + else + { + ecma_length_t counter = index; + while (counter--) + { + lit_utf8_incr (¤t_p); + } + } + } + else +#endif /* ENABLED (JERRY_ESNEXT) */ + { + index = 0; + } + ctx_p->builder = ecma_stringbuilder_create (); ctx_p->capture_count = re_ctx.captures_count; ctx_p->u.captures_p = re_ctx.captures_p; @@ -2610,7 +2645,7 @@ ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**index)); + ecma_collection_push_back (arguments_p, ecma_make_uint32_value (index)); ecma_ref_ecma_string (string_p); ecma_collection_push_back (arguments_p, ecma_make_string_value (string_p)); ecma_object_t *function_p = ecma_get_object_from_value (replace_arg); @@ -2644,19 +2679,53 @@ ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**end_p; - if (!(re_ctx.flags & RE_FLAG_GLOBAL)) + const lit_utf8_size_t matched_size = (lit_utf8_size_t) (global_capture_p->end_p - global_capture_p->begin_p); + + const bool is_ascii = (string_flags & ECMA_STRING_FLAG_IS_ASCII) != 0; + index += is_ascii ? matched_size : lit_utf8_string_length (current_p, matched_size); + + if (!(ctx_p->flags & RE_FLAG_GLOBAL)) { +#if ENABLED (JERRY_ESNEXT) + if (JERRY_UNLIKELY ((re_ctx.flags & RE_FLAG_STICKY) != 0)) + { + result = ecma_op_object_put ((ecma_object_t *) re_obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (index), + true); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_builder; + } + } +#endif /* ENABLED (JERRY_ESNEXT) */ + break; } - const lit_utf8_size_t matched_size = (lit_utf8_size_t) (global_capture_p->end_p - global_capture_p->begin_p); if (matched_size > 0) { - ctx_p->index += lit_utf8_string_length (current_p, matched_size); current_p = last_append_p; continue; } } +#if ENABLED (JERRY_ESNEXT) + else if (JERRY_UNLIKELY ((re_ctx.flags & RE_FLAG_STICKY) != 0)) + { + result = ecma_op_object_put ((ecma_object_t *) re_obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (0), + true); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_builder; + } + + break; + } +#endif /* ENABLED (JERRY_ESNEXT) */ if (current_p >= string_end_p) { @@ -2664,22 +2733,22 @@ ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**flags & RE_FLAG_UNICODE) != 0) { - ctx_p->index++; + index++; const lit_code_point_t cp = ecma_regexp_unicode_advance (¤t_p, string_end_p); if (cp > LIT_UTF16_CODE_UNIT_MAX) { - ctx_p->index++; + index++; } continue; } #endif /* ENABLED (JERRY_ESNEXT) */ - ctx_p->index++; + index++; lit_utf8_incr (¤t_p); } @@ -2727,7 +2796,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ ecma_object_t *this_obj_p = ecma_get_object_from_value (this_arg); ecma_replace_context_t replace_ctx; - replace_ctx.index = 0; + replace_ctx.flags = RE_FLAG_EMPTY; /* 3. */ ecma_string_t *string_p = ecma_op_to_string (string_arg); @@ -2757,16 +2826,19 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ goto cleanup_replace; } - const bool global = ecma_op_to_boolean (result); + if (ecma_op_to_boolean (result)) + { + replace_ctx.flags |= RE_FLAG_GLOBAL; + } + ecma_free_value (result); #if ENABLED (JERRY_ESNEXT) const lit_utf8_size_t string_length = ecma_string_get_length (string_p); - bool unicode = false; #endif /* ENABLED (JERRY_ESNEXT) */ /* 10. */ - if (global) + if (replace_ctx.flags & RE_FLAG_GLOBAL) { #if ENABLED (JERRY_ESNEXT) result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_UNICODE); @@ -2775,7 +2847,11 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ goto cleanup_replace; } - unicode = ecma_op_to_boolean (result); + if (ecma_op_to_boolean (result)) + { + replace_ctx.flags |= RE_FLAG_UNICODE; + } + ecma_free_value (result); #endif /* ENABLED (JERRY_ESNEXT) */ @@ -2792,12 +2868,8 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ } #if !ENABLED (JERRY_ESNEXT) - ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) this_obj_p; - const re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_POINTER (re_compiled_code_t, - re_obj_p->u.class_prop.u.value); - result = ecma_regexp_replace_helper_fast (&replace_ctx, - bc_p, + (ecma_extended_object_t *) this_obj_p, string_p, replace_arg); @@ -2818,47 +2890,9 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ && ecma_builtin_is_regexp_exec (function_p)) { ecma_deref_object ((ecma_object_t *) function_p); - result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_STICKY); - if (ECMA_IS_VALUE_ERROR (result)) - { - goto cleanup_replace; - } - - const bool sticky = ecma_op_to_boolean (result); - ecma_free_value (result); - - if (sticky && !global) - { - ecma_value_t lastindex_value = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_LASTINDEX_UL); - - result = ecma_op_to_length (lastindex_value, &replace_ctx.index); - ecma_free_value (lastindex_value); - - if (ECMA_IS_VALUE_ERROR (result)) - { - goto cleanup_replace; - } - - if (replace_ctx.index > string_length) - { - result = ecma_op_object_put (this_obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_uint32_value (0), - true); - JERRY_ASSERT (ecma_is_value_true (result)); - - ecma_ref_ecma_string (string_p); - result = ecma_make_string_value (string_p); - goto cleanup_replace; - } - } - - ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) this_obj_p; - const re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t, - re_obj_p->u.class_prop.u.value); result = ecma_regexp_replace_helper_fast (&replace_ctx, - bc_p, + (ecma_extended_object_t *) this_obj_p, string_p, replace_arg); @@ -2914,7 +2948,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ /* 13.d.i */ ecma_collection_push_back (results_p, result); - if (!global) + if ((replace_ctx.flags & RE_FLAG_GLOBAL) == 0) { break; } @@ -2957,7 +2991,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ ecma_free_value (result); - index = ecma_op_advance_string_index (string_p, index, unicode); + index = ecma_op_advance_string_index (string_p, index, (replace_ctx.flags & RE_FLAG_UNICODE) != 0); /* 10.d.iii.3.c */ result = ecma_op_object_put (this_obj_p, @@ -2992,6 +3026,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ replace_ctx.builder = ecma_stringbuilder_create (); replace_ctx.matched_p = NULL; replace_ctx.capture_count = 0; + ecma_length_t index = 0; /* 15. */ const lit_utf8_byte_t *source_position_p = replace_ctx.string_p; @@ -3053,7 +3088,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ } /* 16.i */ - lit_utf8_size_t position = JERRY_MIN ((lit_utf8_size_t) JERRY_MAX (position_num, 0.0f), string_length); + ecma_length_t position = JERRY_MIN ((ecma_length_t) JERRY_MAX (position_num, 0.0f), string_length); /* 16.k */ ecma_collection_t *arguments_p = ecma_new_collection (); @@ -3091,7 +3126,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ n++; } - const bool should_replace = (position >= replace_ctx.index); + const bool should_replace = (position >= index); /* 16.p */ if (should_replace) { @@ -3106,7 +3141,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ else { match_position_p = source_position_p; - lit_utf8_size_t distance = position - replace_ctx.index; + lit_utf8_size_t distance = position - index; while (distance--) { lit_utf8_incr (&match_position_p); @@ -3134,7 +3169,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ source_position_p = match_position_p; } - replace_ctx.index = JERRY_MIN (position + matched_str_length, string_length); + index = JERRY_MIN (position + matched_str_length, string_length); } /* 16.m */ @@ -3192,7 +3227,7 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */ } /* 18. */ - JERRY_ASSERT (replace_ctx.index <= string_length); + JERRY_ASSERT (index <= string_length); ecma_stringbuilder_append_raw (&(replace_ctx.builder), source_position_p, (lit_utf8_size_t) (string_end_p - source_position_p)); diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index 3d86cc2f4..e9614ca46 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -397,8 +397,7 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOURCE, "source") #if ENABLED (JERRY_BUILTIN_ARRAY) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPLICE, "splice") #endif -#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ESNEXT) \ -|| ENABLED (JERRY_BUILTIN_REGEXP) && !( !ENABLED (JERRY_ESNEXT)) +#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ESNEXT) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STICKY, "sticky") #endif LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRING, "string") diff --git a/tests/jerry/es.next/symbol-replace.js b/tests/jerry/es.next/symbol-replace.js index e5f47c59b..7bd12e642 100644 --- a/tests/jerry/es.next/symbol-replace.js +++ b/tests/jerry/es.next/symbol-replace.js @@ -680,9 +680,45 @@ Object.defineProperty(abruptStickyRegexp, 'sticky', { } }); +assert(abruptStickyRegexp[Symbol.replace]() === "undefinedndefined"); + +var r = /./; +r.lastIndex = { + valueOf: function() { + throw "abrupt lastIndex" + } +} + try { - abruptStickyRegexp[Symbol.replace](); + r[Symbol.replace]("a", "b"); assert(false); } catch (e) { - assert(e === "abrupt sticky"); + assert(e === "abrupt lastIndex"); } + +var r = /a/y; +r.lastIndex = 3; +assert (r[Symbol.replace]("aaaaa", "b") === "aaaba"); +assert (r.lastIndex === 4); + +assert (r[Symbol.replace]("ccccc", "b") === "ccccc"); +assert (r.lastIndex === 0); + +var r = /a/yg; +r.lastIndex = 3; +assert (r[Symbol.replace]("aaaaa", "b") === "bbbbb"); +assert (r.lastIndex === 0); + +var replaceCalled = false; +var r = /a/ +r.lastIndex = 2; + +assert(r[Symbol.replace]("aaaa", function(match, index) { + replaceCalled = true; + assert (match === "a"); + assert (index === 0); + return "b"; +}) === "baaa"); + +assert (replaceCalled); +assert (r.lastIndex === 2) diff --git a/tests/jerry/string-prototype-replace.js b/tests/jerry/string-prototype-replace.js index 872ac408d..bb257e6fb 100644 --- a/tests/jerry/string-prototype-replace.js +++ b/tests/jerry/string-prototype-replace.js @@ -132,3 +132,17 @@ try { } catch (e) { assert (e === "abrupt replacer toString"); } + +var r = /./; +r.lastIndex = { + valueOf: function() { + throw "abrupt lastIndex" + } +} + +try { + "a".replace(r, "b"); + assert(false); +} catch (e) { + assert(e === "abrupt lastIndex"); +} diff --git a/tests/test262-es6-excludelist.xml b/tests/test262-es6-excludelist.xml index 30ad9c783..6b2de92b8 100644 --- a/tests/test262-es6-excludelist.xml +++ b/tests/test262-es6-excludelist.xml @@ -80,8 +80,8 @@ - - + RegExpBuiltinExec no longer uses accessors to get flags + RegExpBuiltinExec no longer uses accessors to get flags @@ -92,18 +92,12 @@ - - - - - - - - - - - - + RegExpBuiltinExec no longer uses accessors to get flags + RegExpBuiltinExec no longer uses accessors to get flags + RegExpBuiltinExec no longer uses accessors to get flags + RegExpBuiltinExec no longer uses accessors to get flags + RegExpBuiltinExec no longer uses accessors to get flags + RegExpBuiltinExec no longer uses accessors to get flags Test is outdated: ES11, 21.2.5.13 13