Implement Regexp.prototype[@@match] method (#3345)

The algorithm is based on ECMA-262 v6, 21.2.5.6

The following helper methods are also implemented:
  - RegExpExec: ECMA-262 v6, 21.2.5.2.1
  - AdvanceStringIndex: ECMA-262 v6, 21.2.5.2.3

JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
Szilagyi Adam
2019-11-26 16:05:48 +01:00
committed by Robert Fancsik
parent 9725936848
commit 0c6b306429
8 changed files with 362 additions and 1 deletions
@@ -2799,6 +2799,55 @@ ecma_stringbuilder_destroy (ecma_stringbuilder_t *builder_p) /**< string builder
#endif /* ENABLED (JERRY_MEM_STATS) */
} /* ecma_stringbuilder_destroy */
#if ENABLED (JERRY_ES2015)
/**
* AdvanceStringIndex operation
*
* See also:
* ECMA-262 v6.0, 21.2.5.2.3
*
* @return uint32_t - the proper character index based on the operation
*/
uint32_t
ecma_op_advance_string_index (ecma_string_t *str_p, /**< input string */
uint32_t index, /**< given character index */
bool is_unicode) /**< true - if regexp object's "unicode" flag is set
false - otherwise */
{
JERRY_ASSERT (index < UINT32_MAX - 1);
uint32_t next_index = index + 1;
if (!is_unicode)
{
return next_index;
}
ecma_length_t str_len = ecma_string_get_length (str_p);
if (next_index >= str_len)
{
return next_index;
}
ecma_char_t first = ecma_string_get_char_at_pos (str_p, index);
if (first < LIT_UTF16_HIGH_SURROGATE_MIN || first > LIT_UTF16_HIGH_SURROGATE_MAX)
{
return next_index;
}
ecma_char_t second = ecma_string_get_char_at_pos (str_p, next_index);
if (second < LIT_UTF16_LOW_SURROGATE_MIN || second > LIT_UTF16_LOW_SURROGATE_MAX)
{
return next_index;
}
return next_index + 1;
} /* ecma_op_advance_string_index */
#endif /* ENABLED (JERRY_ES2015) */
/**
* @}
* @}
+1
View File
@@ -231,6 +231,7 @@ lit_magic_string_id_t ecma_get_typeof_lit_id (ecma_value_t value);
#if ENABLED (JERRY_ES2015)
ecma_string_t *ecma_new_symbol_from_descriptor_string (ecma_value_t string_desc);
bool ecma_prop_name_is_symbol (ecma_string_t *string_p);
uint32_t ecma_op_advance_string_index (ecma_string_t *str_p, uint32_t index, bool is_unicode);
#endif /* ENABLED (JERRY_ES2015) */
#if ENABLED (JERRY_ES2015_BUILTIN_MAP) || ENABLED (JERRY_ES2015_BUILTIN_SET)
ecma_string_t *ecma_new_map_key_string (ecma_value_t value);
@@ -616,6 +616,179 @@ ecma_builtin_regexp_prototype_symbol_replace (ecma_value_t this_arg, /**< this a
{
return ecma_regexp_replace_helper (this_arg, string_arg, replace_arg);
} /* ecma_builtin_regexp_prototype_symbol_replace */
/**
* The RegExp.prototype object's '@@match' routine
*
* See also:
* ECMA-262 v6.0, 21.2.5.6
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
static ecma_value_t
ecma_builtin_regexp_prototype_symbol_match (ecma_value_t this_arg, /**< this argument */
ecma_value_t string_arg) /**< source string */
{
if (!ecma_is_value_object (this_arg))
{
return ecma_raise_type_error (ECMA_ERR_MSG ("TypeError"));
}
ecma_string_t *str_p = ecma_op_to_string (string_arg);
if (JERRY_UNLIKELY (str_p == NULL))
{
return ECMA_VALUE_ERROR;
}
ecma_object_t *obj_p = ecma_get_object_from_value (this_arg);
ecma_value_t global_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_GLOBAL);
if (ECMA_IS_VALUE_ERROR (global_value))
{
ecma_deref_ecma_string (str_p);
return global_value;
}
bool global = ecma_op_to_boolean (global_value);
ecma_free_value (global_value);
if (!global)
{
ecma_value_t result = ecma_op_regexp_exec (this_arg, str_p);
ecma_deref_ecma_string (str_p);
return result;
}
ecma_value_t full_unicode_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_UNICODE);
if (ECMA_IS_VALUE_ERROR (full_unicode_value))
{
ecma_deref_ecma_string (str_p);
return full_unicode_value;
}
bool full_unicode = ecma_op_to_boolean (full_unicode_value);
ecma_free_value (full_unicode_value);
ecma_value_t set_status = ecma_op_object_put (obj_p,
ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
ecma_make_uint32_value (0),
true);
if (ECMA_IS_VALUE_ERROR (set_status))
{
ecma_deref_ecma_string (str_p);
return set_status;
}
ecma_value_t ret_value = ECMA_VALUE_ERROR;
ecma_value_t result_array = ecma_op_create_array_object (0, 0, false);
ecma_object_t *result_array_p = ecma_get_object_from_value (result_array);
uint32_t n = 0;
while (true)
{
ecma_value_t result_value = ecma_op_regexp_exec (this_arg, str_p);
if (ECMA_IS_VALUE_ERROR (result_value))
{
goto result_cleanup;
}
if (ecma_is_value_null (result_value))
{
if (n == 0)
{
ret_value = ECMA_VALUE_NULL;
goto result_cleanup;
}
ecma_deref_ecma_string (str_p);
return result_array;
}
ecma_object_t *result_value_p = ecma_get_object_from_value (result_value);
ecma_value_t match_str_value = ecma_op_object_get_by_uint32_index (result_value_p, 0);
ecma_deref_object (result_value_p);
if (ECMA_IS_VALUE_ERROR (match_str_value))
{
goto result_cleanup;
}
ecma_string_t *match_str_p = ecma_op_to_string (match_str_value);
if (JERRY_UNLIKELY (match_str_p == NULL))
{
ecma_free_value (match_str_value);
goto result_cleanup;
}
ecma_value_t new_prop = ecma_builtin_helper_def_prop_by_index (result_array_p,
n,
match_str_value,
ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (new_prop));
ecma_value_t match_result = ECMA_VALUE_ERROR;
if (ecma_string_is_empty (match_str_p))
{
ecma_value_t this_index = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_LASTINDEX_UL);
if (ECMA_IS_VALUE_ERROR (this_index))
{
goto match_cleanup;
}
uint32_t index;
ecma_value_t length_value = ecma_op_to_length (this_index, &index);
ecma_free_value (this_index);
if (ECMA_IS_VALUE_ERROR (length_value))
{
goto match_cleanup;
}
uint32_t next_index = ecma_op_advance_string_index (str_p, index, full_unicode);
ecma_value_t next_set_status = ecma_op_object_put (obj_p,
ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
ecma_make_uint32_value (next_index),
true);
if (ECMA_IS_VALUE_ERROR (next_set_status))
{
goto match_cleanup;
}
}
match_result = ECMA_VALUE_EMPTY;
match_cleanup:
ecma_deref_ecma_string (match_str_p);
ecma_free_value (match_str_value);
if (ECMA_IS_VALUE_ERROR (match_result))
{
goto result_cleanup;
}
n++;
}
result_cleanup:
ecma_deref_ecma_string (str_p);
ecma_deref_object (result_array_p);
return ret_value;
} /* ecma_builtin_regexp_prototype_symbol_match */
#endif /* ENABLED (JERRY_ES2015) */
/**
@@ -56,6 +56,7 @@ ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_STICKY,
ECMA_PROPERTY_FIXED)
ROUTINE (LIT_GLOBAL_SYMBOL_REPLACE, ecma_builtin_regexp_prototype_symbol_replace, 2, 2)
ROUTINE (LIT_GLOBAL_SYMBOL_MATCH, ecma_builtin_regexp_prototype_symbol_match, 1, 1)
#else /* !ENABLED (JERRY_ES2015) */
/* ECMA-262 v5, 15.10.7.1 */
STRING_VALUE (LIT_MAGIC_STRING_SOURCE,
+3 -1
View File
@@ -2605,7 +2605,9 @@ ecma_op_is_regexp (ecma_value_t arg) /**< argument */
if (!ecma_is_value_undefined (is_regexp))
{
return ecma_make_boolean_value (ecma_op_to_boolean (is_regexp));
const bool to_bool = ecma_op_to_boolean (is_regexp);
ecma_free_value (is_regexp);
return ecma_make_boolean_value (to_bool);
}
return ecma_make_boolean_value (ecma_object_is_regexp_object (arg));
@@ -2011,6 +2011,65 @@ cleanup_string:
return result;
} /* ecma_regexp_replace_helper */
/**
* RegExpExec operation
*
* See also:
* ECMA-262 v6.0, 21.2.5.2.1
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
ecma_value_t
ecma_op_regexp_exec (ecma_value_t this_arg, /**< this argument */
ecma_string_t *str_p) /**< input string */
{
#if ENABLED (JERRY_ES2015)
ecma_object_t *arg_obj_p = ecma_get_object_from_value (this_arg);
ecma_value_t exec = ecma_op_object_get_by_magic_id (arg_obj_p, LIT_MAGIC_STRING_EXEC);
if (ECMA_IS_VALUE_ERROR (exec))
{
return exec;
}
if (ecma_op_is_callable (exec))
{
ecma_object_t *function_p = ecma_get_object_from_value (exec);
ecma_value_t arguments[] = { ecma_make_string_value (str_p) };
ecma_value_t result = ecma_op_function_call (function_p, this_arg, arguments, 1);
ecma_deref_object (function_p);
if (ECMA_IS_VALUE_ERROR (result))
{
return result;
}
if (!ecma_is_value_object (result) && !ecma_is_value_null (result))
{
ecma_free_value (result);
return ecma_raise_type_error (ECMA_ERR_MSG ("Return value of 'exec' must be an Object or Null"));
}
return result;
}
else
{
ecma_free_value (exec);
}
#endif /* ENABLED (JERRY_ES2015) */
if (!ecma_object_is_regexp_object (this_arg))
{
return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not a valid RegExp object"));
}
return ecma_regexp_exec_helper (this_arg, ecma_make_string_value (str_p), false);
} /* ecma_op_regexp_exec */
/**
* @}
* @}
@@ -110,6 +110,8 @@ ecma_value_t
ecma_regexp_replace_helper (ecma_value_t this_arg,
ecma_value_t string_arg,
ecma_value_t replace_arg);
ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p);
/**
* @}
* @}
@@ -0,0 +1,74 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
var regexp = /[0-9]+/g;
var str = '2016-01-02';
var num = 12131233;
// Test with string input
var result = regexp[Symbol.match](str);
assert(result.toString() === "2016,01,02");
regexp = /[0-5]+/g;
result = regexp[Symbol.match](str);
assert(result.toString() === "201,01,02");
regexp = /[0-1]+/g;
result = regexp[Symbol.match](str);
assert(result.toString() === "01,01,0");
regexp = /([0-9]+)-([0-9]+)-([0-9]+)/g
result = regexp[Symbol.match](str);
assert(result.toString() === "2016-01-02");
// Test with number input
regexp = /[0-9]+/g;
result = regexp[Symbol.match](num);
assert(result.toString() === "12131233");
// Test with empty string
result = regexp[Symbol.match]('');
assert(result === null);
// Test with undefined
result = regexp[Symbol.match](undefined);
assert(result === null);
// Test when input is not a regexp
regexp = 12;
try {
result = regexp[Symbol.match](str);
assert(false);
} catch (e) {
assert(e instanceof TypeError);
}
// Test with RegExp subclass where we override the [Symbol.match] function
class RegExpSub extends RegExp {
[Symbol.match](str) {
var result = RegExp.prototype[Symbol.match].call(this, str);
if (result) {
return "VALID";
}
else
{
return "INVALID";
}
}
}
var regexp1 = new RegExpSub('([0-9]+)-([0-9]+)-([0-9]+)');
result = regexp1[Symbol.match](str);
assert(result === "VALID");