Implement Symbol.matchAll (#4082)

The following methods were implemented:
- String.prototype.matchAll based on ECMA-262 v11, 21.1.3.12
- RegExp.prototype[@@matchAll] based on ECMA-262 v11, 21.2.5.8
- RegExp String Iterator Object based on 21.2.7

JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
Szilagyi Adam
2021-01-18 18:08:35 +01:00
committed by GitHub
parent 1d42d17ab6
commit 6ec4455111
24 changed files with 965 additions and 83 deletions
@@ -68,6 +68,7 @@ enum
ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_MATCH,
ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_REPLACE,
ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_SPLIT,
ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_MATCH_ALL,
#endif /* ENABLED (JERRY_ESNEXT) */
};
@@ -514,6 +515,147 @@ ecma_builtin_is_regexp_exec (ecma_extended_object_t *obj_p)
} /* ecma_builtin_is_regexp_exec */
#endif /* ENABLED (JERRY_ESNEXT) */
#if ENABLED (JERRY_ESNEXT)
/**
* The RegExp.prototype object's 'matchAll' routine
*
* See also:
* ECMA-262 v11, 21.2.5.8
*
* @return ecma_value_t
*/
static ecma_value_t
ecma_builtin_regexp_prototype_match_all (ecma_object_t *regexp_obj_p, /**< this argument */
ecma_value_t string_arg) /**< source string */
{
/* 3. */
ecma_string_t *str_p = ecma_op_to_string (string_arg);
if (JERRY_UNLIKELY (str_p == NULL))
{
return ECMA_VALUE_ERROR;
}
/* 4. */
ecma_value_t constructor = ecma_op_species_constructor (regexp_obj_p, ECMA_BUILTIN_ID_REGEXP);
if (ECMA_IS_VALUE_ERROR (constructor))
{
ecma_deref_ecma_string (str_p);
return constructor;
}
/* 5. */
ecma_value_t get_flag = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_FLAGS);
if (ECMA_IS_VALUE_ERROR (get_flag))
{
ecma_deref_ecma_string (str_p);
ecma_free_value (constructor);
return get_flag;
}
ecma_string_t *flags = ecma_op_to_string (get_flag);
ecma_free_value (get_flag);
if (JERRY_UNLIKELY (flags == NULL))
{
ecma_deref_ecma_string (str_p);
ecma_free_value (constructor);
return ECMA_VALUE_ERROR;
}
/* 6. */
ecma_object_t *constructor_obj_p = ecma_get_object_from_value (constructor);
ecma_value_t flags_value = ecma_make_string_value (flags);
ecma_value_t match_args[] = { ecma_make_object_value (regexp_obj_p), flags_value};
ecma_value_t matcher = ecma_op_function_construct (constructor_obj_p, constructor_obj_p, match_args, 2);
ecma_deref_object (constructor_obj_p);
if (ECMA_IS_VALUE_ERROR (matcher))
{
ecma_deref_ecma_string (str_p);
ecma_deref_ecma_string (flags);
return matcher;
}
/* 7. */
ecma_value_t get_last_index = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_LASTINDEX_UL);
if (ECMA_IS_VALUE_ERROR (get_last_index))
{
ecma_deref_ecma_string (str_p);
ecma_deref_ecma_string (flags);
ecma_free_value (matcher);
return get_last_index;
}
ecma_length_t last_index;
ecma_value_t to_len = ecma_op_to_length (get_last_index, &last_index);
ecma_free_value (get_last_index);
if (ECMA_IS_VALUE_ERROR (to_len))
{
ecma_deref_ecma_string (str_p);
ecma_deref_ecma_string (flags);
ecma_free_value (matcher);
return to_len;
}
/* 8. */
ecma_object_t *matcher_obj_p = ecma_get_object_from_value (matcher);
ecma_value_t last_index_value = ecma_make_length_value (last_index);
ecma_value_t set = ecma_op_object_put (matcher_obj_p,
ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
last_index_value,
true);
ecma_free_value (last_index_value);
if (ECMA_IS_VALUE_ERROR (set))
{
ecma_deref_ecma_string (str_p);
ecma_deref_ecma_string (flags);
ecma_deref_object (matcher_obj_p);
}
uint16_t parsed_flag;
ecma_value_t flag_parse = ecma_regexp_parse_flags (flags, &parsed_flag);
ecma_deref_ecma_string (flags);
if (ECMA_IS_VALUE_ERROR (flag_parse))
{
ecma_deref_ecma_string (str_p);
ecma_deref_object (matcher_obj_p);
return flag_parse;
}
/* 13. */
ecma_object_t *result_obj;
ecma_object_t *proto_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_STRING_ITERATOR_PROTOTYPE);
result_obj = ecma_create_object (proto_p,
sizeof (ecma_regexp_string_iterator_t),
ECMA_OBJECT_TYPE_PSEUDO_ARRAY);
ecma_extended_object_t *ext_obj_p = (ecma_extended_object_t *) result_obj;
ext_obj_p->u.pseudo_array.type = (uint8_t) ECMA_PSEUDO_REGEXP_STRING_ITERATOR;
ext_obj_p->u.pseudo_array.extra_info = (uint8_t) (parsed_flag & (RE_FLAG_GLOBAL | RE_FLAG_UNICODE));
ecma_regexp_string_iterator_t *regexp_string_iterator_obj = (ecma_regexp_string_iterator_t *) result_obj;
regexp_string_iterator_obj->iterating_regexp = matcher;
regexp_string_iterator_obj->iterated_string = ecma_make_string_value (str_p);
ecma_deref_object (matcher_obj_p);
return ecma_make_object_value (result_obj);
} /* ecma_builtin_regexp_prototype_match_all */
#endif /* ENABLED (JERRY_ESNEXT) */
/**
* Dispatcher of the Regexp built-in's routines
*
@@ -584,6 +726,10 @@ ecma_builtin_regexp_prototype_dispatch_routine (uint8_t builtin_routine_id, /**<
{
return ecma_regexp_match_helper (this_arg, arguments_list_p[0]);
}
case ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_MATCH_ALL:
{
return ecma_builtin_regexp_prototype_match_all (obj_p, arguments_list_p[0]);
}
case ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_REPLACE:
{
return ecma_regexp_replace_helper (this_arg, arguments_list_p[0], arguments_list_p[1]);
@@ -63,6 +63,7 @@ ROUTINE (LIT_GLOBAL_SYMBOL_REPLACE, ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_REPLACE
ROUTINE (LIT_GLOBAL_SYMBOL_SEARCH, ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_SEARCH, 1, 1)
ROUTINE (LIT_GLOBAL_SYMBOL_SPLIT, ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_SPLIT, 2, 2)
ROUTINE (LIT_GLOBAL_SYMBOL_MATCH, ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_MATCH, 1, 1)
ROUTINE (LIT_GLOBAL_SYMBOL_MATCH_ALL, ECMA_REGEXP_PROTOTYPE_ROUTINE_SYMBOL_MATCH_ALL, 1, 1)
#else /* !ENABLED (JERRY_ESNEXT) */
/* ECMA-262 v5, 15.10.7.1 */
STRING_VALUE (LIT_MAGIC_STRING_SOURCE,
@@ -0,0 +1,192 @@
/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ecma-builtin-helpers.h"
#include "ecma-builtins.h"
#include "ecma-conversion.h"
#include "ecma-gc.h"
#include "ecma-iterator-object.h"
#include "ecma-objects.h"
#if ENABLED (JERRY_ESNEXT)
#define ECMA_BUILTINS_INTERNAL
#include "ecma-builtins-internal.h"
#define BUILTIN_INC_HEADER_NAME "ecma-builtin-regexp-string-iterator-prototype.inc.h"
#define BUILTIN_UNDERSCORED_ID regexp_string_iterator_prototype
#include "ecma-builtin-internal-routines-template.inc.h"
/** \addtogroup ecma ECMA
* @{
*
* \addtogroup ecmabuiltins
* @{
*
* \addtogroup %regexpstringiteratorprototype% ECMA %ArrayIteratorPrototype% object built-in
* @{
*/
/**
* The %RegExpStringIteratorPrototype% object's 'next' routine
*
* See also:
* ECMA-262 v11, 21.2.7.1.1
*
* Note:
* Returned value must be freed with ecma_free_value.
*
* @return iterator result object, if success
* error - otherwise
*/
static ecma_value_t
ecma_builtin_regexp_string_iterator_prototype_object_next (ecma_value_t this_val) /**< this argument */
{
/* 2. */
if (!ecma_is_value_object (this_val))
{
return ecma_raise_type_error (ECMA_ERR_MSG ("Argument 'this' is not an object."));
}
ecma_object_t *obj_p = ecma_get_object_from_value (this_val);
ecma_extended_object_t *ext_obj_p = (ecma_extended_object_t *) obj_p;
/* 3. */
if (ecma_get_object_type (obj_p) != ECMA_OBJECT_TYPE_PSEUDO_ARRAY
|| ext_obj_p->u.pseudo_array.type != ECMA_PSEUDO_REGEXP_STRING_ITERATOR)
{
return ecma_raise_type_error (ECMA_ERR_MSG ("Argument 'this' is not an iterator."));
}
ecma_regexp_string_iterator_t *regexp_string_iterator_obj = (ecma_regexp_string_iterator_t *) obj_p;
/* 4. */
if (ecma_is_value_empty (regexp_string_iterator_obj->iterated_string))
{
return ecma_create_iter_result_object (ECMA_VALUE_UNDEFINED, ECMA_VALUE_TRUE);
}
/* 5. */
ecma_value_t regexp = regexp_string_iterator_obj->iterating_regexp;
/* 6. */
ecma_value_t matcher_str_value = regexp_string_iterator_obj->iterated_string;
ecma_string_t *matcher_str_p = ecma_get_string_from_value (matcher_str_value);
/* 9. */
ecma_value_t match = ecma_op_regexp_exec (regexp, matcher_str_p);
if (ECMA_IS_VALUE_ERROR (match))
{
return match;
}
/* 10. */
if (ecma_is_value_null (match))
{
ecma_free_value (regexp_string_iterator_obj->iterated_string);
regexp_string_iterator_obj->iterated_string = ECMA_VALUE_EMPTY;
return ecma_create_iter_result_object (ECMA_VALUE_UNDEFINED, ECMA_VALUE_TRUE);
}
ecma_object_t *match_result_array_p = ecma_get_object_from_value (match);
ecma_value_t result = ECMA_VALUE_ERROR;
/* 11. */
if (regexp_string_iterator_obj->header.u.pseudo_array.extra_info & RE_FLAG_GLOBAL)
{
ecma_value_t matched_str_value = ecma_op_object_get_by_index (match_result_array_p, 0);
if (ECMA_IS_VALUE_ERROR (matched_str_value))
{
goto free_variables;
}
ecma_string_t *matched_str_p = ecma_op_to_string (matched_str_value);
ecma_free_value (matched_str_value);
if (JERRY_UNLIKELY (matched_str_p == NULL))
{
ecma_deref_ecma_string (matched_str_p);
goto free_variables;
}
if (ecma_string_is_empty (matched_str_p))
{
ecma_object_t *regexp_obj_p = ecma_get_object_from_value (regexp);
ecma_value_t get_last_index = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_LASTINDEX_UL);
if (ECMA_IS_VALUE_ERROR (get_last_index))
{
goto free_variables;
}
ecma_length_t this_index;
ecma_value_t to_len = ecma_op_to_length (get_last_index, &this_index);
ecma_free_value (get_last_index);
if (ECMA_IS_VALUE_ERROR (to_len))
{
goto free_variables;
}
bool full_unciode = (regexp_string_iterator_obj->header.u.pseudo_array.extra_info & RE_FLAG_UNICODE) != 0;
ecma_length_t next_index = ecma_op_advance_string_index (matcher_str_p,
this_index,
full_unciode);
ecma_value_t next_index_value = ecma_make_length_value (next_index);
ecma_value_t set = ecma_op_object_put (regexp_obj_p,
ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
next_index_value,
true);
ecma_free_value (next_index_value);
if (ECMA_IS_VALUE_ERROR (set))
{
goto free_variables;
}
}
else
{
ecma_deref_ecma_string (matched_str_p);
}
}
else
{
ecma_free_value (regexp_string_iterator_obj->iterated_string);
regexp_string_iterator_obj->iterated_string = ECMA_VALUE_EMPTY;
}
result = ecma_create_iter_result_object (match, ECMA_VALUE_FALSE);
free_variables:
ecma_deref_object (match_result_array_p);
return result;
} /* ecma_builtin_regexp_string_iterator_prototype_object_next */
/**
* @}
* @}
* @}
*/
#endif /* ENABLED (JERRY_ESNEXT) */
@@ -0,0 +1,34 @@
/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* %RegExpStringIteratorPrototype% built-in description
*/
#include "ecma-builtin-helpers-macro-defines.inc.h"
#if ENABLED (JERRY_ESNEXT)
STRING_VALUE (LIT_GLOBAL_SYMBOL_TO_STRING_TAG,
LIT_MAGIC_STRING_REGEXP_STRING_ITERATOR_UL,
ECMA_PROPERTY_FLAG_CONFIGURABLE)
/* Routine properties:
* (property name, C routine name, arguments number or NON_FIXED, value of the routine's length property) */
ROUTINE (LIT_MAGIC_STRING_NEXT, ecma_builtin_regexp_string_iterator_prototype_object_next, 0, 0)
#endif /* ENABLED (JERRY_ESNEXT) */
#include "ecma-builtin-helpers-macro-undefs.inc.h"
@@ -91,6 +91,7 @@ enum
ECMA_STRING_PROTOTYPE_ITERATOR,
ECMA_STRING_PROTOTYPE_REPLACE_ALL,
ECMA_STRING_PROTOTYPE_MATCH_ALL,
};
#define BUILTIN_INC_HEADER_NAME "ecma-builtin-string-prototype.inc.h"
@@ -382,37 +383,128 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_argument, /**< thi
#if ENABLED (JERRY_ESNEXT)
/**
* Helper method to find a specific character in a string
* The String.prototype object's 'matchAll' routine
*
* Used by:
* ecma_builtin_string_prototype_object_replace_helper
* See also:
* ECMA-262 v11, 21.1.3.12
*
* @return true - if the given character is in the string
* false - otherwise
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
static bool
ecma_find_char_in_string (ecma_string_t *str_p, /**< source string */
lit_utf8_byte_t c) /**< character to find*/
static ecma_value_t
ecma_builtin_string_prototype_object_match_all (ecma_value_t this_argument, /**< this argument */
ecma_value_t regexp_arg) /**< routine's argument */
{
ECMA_STRING_TO_UTF8_STRING (str_p, start_p, start_size);
const lit_utf8_byte_t *str_curr_p = start_p;
const lit_utf8_byte_t *str_end_p = start_p + start_size;
bool have_char = false;
while (str_curr_p < str_end_p)
/* 2. */
if (!ecma_is_value_null (regexp_arg) && !ecma_is_value_undefined (regexp_arg))
{
if (*str_curr_p++ == c)
/* 2.a */
ecma_value_t is_regexp = ecma_op_is_regexp (regexp_arg);
if (ECMA_IS_VALUE_ERROR (is_regexp))
{
have_char = true;
break;
return is_regexp;
}
/* 2.b */
if (ecma_is_value_true (is_regexp))
{
/* 2.b.i */
ecma_object_t *regexp_obj_p = ecma_get_object_from_value (regexp_arg);
ecma_value_t get_flags = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_FLAGS);
if (ECMA_IS_VALUE_ERROR (get_flags))
{
return get_flags;
}
/* 2.b.ii */
if (!ecma_op_require_object_coercible (get_flags))
{
ecma_free_value (get_flags);
return ECMA_VALUE_ERROR;
}
/* 2.b.iii */
ecma_string_t *flags = ecma_op_to_string (get_flags);
ecma_free_value (get_flags);
if (JERRY_UNLIKELY (flags == NULL))
{
return ECMA_VALUE_ERROR;
}
uint16_t parsed_flag;
ecma_value_t flag_parse = ecma_regexp_parse_flags (flags, &parsed_flag);
ecma_deref_ecma_string (flags);
if (ECMA_IS_VALUE_ERROR (flag_parse))
{
return flag_parse;
}
if (!(parsed_flag & RE_FLAG_GLOBAL))
{
return ecma_raise_type_error (ECMA_ERR_MSG ("RegExp argument should have global flag."));
}
}
/* 2.c */
ecma_value_t matcher = ecma_op_get_method_by_symbol_id (regexp_arg, LIT_GLOBAL_SYMBOL_MATCH_ALL);
if (ECMA_IS_VALUE_ERROR (matcher))
{
return matcher;
}
/* 2.d */
if (!ecma_is_value_undefined (matcher))
{
/* 2.d.i */
ecma_object_t *matcher_method = ecma_get_object_from_value (matcher);
ecma_value_t result = ecma_op_function_call (matcher_method, regexp_arg, &this_argument, 1);
ecma_deref_object (matcher_method);
return result;
}
}
ECMA_FINALIZE_UTF8_STRING (start_p, start_size);
/* 3. */
ecma_string_t *str_p = ecma_op_to_string (this_argument);
return have_char;
} /* ecma_find_char_in_string */
if (JERRY_UNLIKELY (str_p == NULL))
{
return ECMA_VALUE_ERROR;
}
/* 4. */
ecma_object_t *new_regexp_obj_p = ecma_op_regexp_alloc (NULL);
if (JERRY_UNLIKELY (new_regexp_obj_p == NULL))
{
ecma_deref_ecma_string (str_p);
return ECMA_VALUE_ERROR;
}
ecma_value_t new_regexp = ecma_op_create_regexp_from_pattern (new_regexp_obj_p, regexp_arg, ECMA_VALUE_UNDEFINED);
if (ECMA_IS_VALUE_ERROR (new_regexp))
{
ecma_deref_ecma_string (str_p);
ecma_deref_object (new_regexp_obj_p);
return new_regexp;
}
/* 5. */
ecma_value_t string_arg = ecma_make_string_value (str_p);
ecma_value_t ret_value = ecma_op_invoke_by_symbol_id (new_regexp, LIT_GLOBAL_SYMBOL_MATCH_ALL, &string_arg, 1);
ecma_deref_ecma_string (str_p);
ecma_free_value (new_regexp);
return ret_value;
} /* ecma_builtin_string_prototype_object_match_all */
#endif /* ENABLED (JERRY_ESNEXT) */
/**
@@ -469,7 +561,7 @@ ecma_builtin_string_prototype_object_replace_helper (ecma_value_t this_value, /*
return ECMA_VALUE_ERROR;
}
bool have_global_flag = ecma_find_char_in_string (flags, LIT_CHAR_LOWERCASE_G);
bool have_global_flag = lit_find_char_in_string (flags, LIT_CHAR_LOWERCASE_G);
ecma_deref_ecma_string (flags);
@@ -1393,6 +1485,13 @@ ecma_builtin_string_prototype_dispatch_routine (uint8_t builtin_routine_id, /**<
{
return ecma_builtin_string_prototype_object_match (this_arg, arg1);
}
#if ENABLED (JERRY_ESNEXT)
if (builtin_routine_id == ECMA_STRING_PROTOTYPE_MATCH_ALL)
{
return ecma_builtin_string_prototype_object_match_all (this_arg, arg1);
}
#endif /* ENABLED (JERRY_ESNEXT) */
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
if (builtin_routine_id <= ECMA_STRING_PROTOTYPE_CHAR_CODE_AT)
@@ -79,6 +79,7 @@ ROUTINE (LIT_MAGIC_STRING_CODE_POINT_AT, ECMA_STRING_PROTOTYPE_CODE_POINT_AT, 1,
ROUTINE (LIT_MAGIC_STRING_PAD_START, ECMA_STRING_PROTOTYPE_PAD_START, 2, 1)
ROUTINE (LIT_MAGIC_STRING_PAD_END, ECMA_STRING_PROTOTYPE_PAD_END, 2, 1)
ROUTINE (LIT_GLOBAL_SYMBOL_ITERATOR, ECMA_STRING_PROTOTYPE_ITERATOR, 0, 0)
ROUTINE (LIT_MAGIC_STRING_MATCH_ALL, ECMA_STRING_PROTOTYPE_MATCH_ALL, 1, 1)
INTRINSIC_PROPERTY (LIT_MAGIC_STRING_TRIM_START, LIT_MAGIC_STRING_TRIM_START,
ECMA_PROPERTY_CONFIGURABLE_WRITABLE)
@@ -89,6 +89,10 @@ SYMBOL_VALUE (LIT_MAGIC_STRING_TO_STRING_TAG,
SYMBOL_VALUE (LIT_MAGIC_STRING_UNSCOPABLES,
LIT_GLOBAL_SYMBOL_UNSCOPABLES)
/* ECMA-262 v11, 19.4.2.8 */
SYMBOL_VALUE (LIT_MAGIC_STRING_MATCH_ALL,
LIT_GLOBAL_SYMBOL_MATCH_ALL)
/* Routine properties:
* (property name, C routine name, arguments number or NON_FIXED, value of the routine's length property) */
ROUTINE (LIT_MAGIC_STRING_FOR, ecma_builtin_symbol_for, 1, 1)
@@ -639,6 +639,13 @@ BUILTIN (ECMA_BUILTIN_ID_STRING_ITERATOR_PROTOTYPE,
true,
string_iterator_prototype)
/* The %RegExpStringIteratorPrototype% object (ECMA-262 v11, 21.2.7.1) */
BUILTIN (ECMA_BUILTIN_ID_REGEXP_STRING_ITERATOR_PROTOTYPE,
ECMA_OBJECT_TYPE_GENERAL,
ECMA_BUILTIN_ID_ITERATOR_PROTOTYPE,
true,
regexp_string_iterator_prototype)
/* The %AsyncIteratorPrototype% object (ECMA-262 v10, 25.1.3) */
BUILTIN (ECMA_BUILTIN_ID_ASYNC_ITERATOR_PROTOTYPE,
ECMA_OBJECT_TYPE_GENERAL,