Implement String.prototype.search, and some minor regexp refactors.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
This commit is contained in:
Zoltan Herczeg
2015-08-04 05:07:44 -07:00
parent f39a294bc6
commit 0a1b6eb4c7
6 changed files with 209 additions and 65 deletions
@@ -67,30 +67,13 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
{
ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value);
ecma_object_t *obj_p = ecma_get_object_from_value (obj_this);
ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);
ECMA_TRY_CATCH (input_str_value,
ecma_op_to_string (arg),
ret_value);
ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value);
/* Convert ecma_String_t *to regexp_bytecode_t* */
lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p);
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size, lit_utf8_byte_t);
ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size);
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_utf8_buffer_p, input_str_size);
ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, &iter);
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false);
ECMA_FINALIZE (input_str_value);
ECMA_FINALIZE (obj_this);
}
@@ -31,6 +31,10 @@
#include "jrt-libc-includes.h"
#include "lit-char-helpers.h"
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
#include "ecma-regexp-object.h"
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN
#define ECMA_BUILTINS_INTERNAL
@@ -560,15 +564,10 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg
JERRY_ASSERT (ecma_is_value_boolean (global_value));
ecma_value_t exec_arguments[1] = { this_to_string_value };
if (!ecma_is_value_true (global_value))
{
/* 7. */
ret_value = ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
regexp_value,
exec_arguments,
1);
ret_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false);
}
else
{
@@ -608,10 +607,7 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg
{
/* 8.f.i. */
ECMA_TRY_CATCH (exec_value,
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
regexp_value,
exec_arguments,
1),
ecma_regexp_exec_helper (regexp_value, this_to_string_value, false),
ret_value);
if (ecma_is_value_null (exec_value))
@@ -829,13 +825,10 @@ ecma_builtin_string_prototype_object_replace_match (ecma_builtin_replace_search_
if (context_p->is_regexp)
{
ecma_value_t exec_arguments[1] = { context_p->input_string };
ECMA_TRY_CATCH (match_value,
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
context_p->regexp_or_search_string,
exec_arguments,
1),
ecma_regexp_exec_helper (context_p->regexp_or_search_string,
context_p->input_string,
false),
ret_value);
if (!ecma_is_value_null (match_value))
@@ -1504,7 +1497,6 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a
return ret_value;
} /* ecma_builtin_string_prototype_object_replace */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
/**
* The String.prototype object's 'search' routine
@@ -1517,11 +1509,91 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a
*/
static ecma_completion_value_t
ecma_builtin_string_prototype_object_search (ecma_value_t this_arg, /**< this argument */
ecma_value_t arg) /**< routine's argument */
ecma_value_t regexp_arg) /**< routine's argument */
{
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg);
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
/* 1. */
ECMA_TRY_CATCH (check_coercible_value,
ecma_op_check_object_coercible (this_arg),
ret_value);
/* 2. */
ECMA_TRY_CATCH (to_string_value,
ecma_op_to_string (this_arg),
ret_value);
ecma_value_t regexp_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);
/* 3. */
if (ecma_is_value_object (regexp_arg)
&& ecma_object_get_class_name (ecma_get_object_from_value (regexp_arg)) == LIT_MAGIC_STRING_REGEXP_UL)
{
regexp_value = ecma_copy_value (regexp_arg, true);
}
else
{
/* 4. */
ecma_value_t regexp_arguments[1] = { regexp_arg };
ECMA_TRY_CATCH (new_regexp_value,
ecma_builtin_regexp_dispatch_construct (regexp_arguments, 1),
ret_value);
regexp_value = ecma_copy_value (new_regexp_value, true);
ECMA_FINALIZE (new_regexp_value);
}
/* 5. */
if (ecma_is_completion_value_empty (ret_value))
{
ECMA_TRY_CATCH (match_result,
ecma_regexp_exec_helper (regexp_value, to_string_value, true),
ret_value);
ecma_number_t offset = -1;
if (!ecma_is_value_null (match_result))
{
JERRY_ASSERT (ecma_is_value_object (match_result));
ecma_object_t *match_object_p = ecma_get_object_from_value (match_result);
ecma_string_t *index_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);
ECMA_TRY_CATCH (index_value,
ecma_op_object_get (match_object_p, index_string_p),
ret_value);
JERRY_ASSERT (ecma_is_value_number (index_value));
offset = *ecma_get_number_from_value (index_value);
ECMA_FINALIZE (index_value);
ecma_deref_ecma_string (index_string_p);
}
if (ecma_is_completion_value_empty (ret_value))
{
ecma_number_t *offset_number_p = ecma_alloc_number ();
*offset_number_p = offset;
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (offset_number_p));
}
ECMA_FINALIZE (match_result);
ecma_free_value (regexp_value, true);
}
ECMA_FINALIZE (to_string_value);
ECMA_FINALIZE (check_coercible_value);
/* 6. */
return ret_value;
} /* ecma_builtin_string_prototype_object_search */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
/**
* The String.prototype object's 'slice' routine
*
@@ -71,9 +71,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1)
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
@@ -1176,19 +1176,45 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
* Returned value must be freed with ecma_free_completion_value
*/
ecma_completion_value_t
ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
re_bytecode_t *bc_p, /**< start of the RegExp bytecode */
lit_utf8_iterator_t *iter_p) /**< input string iterator */
ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
ecma_value_t input_string, /**< input string */
bool ignore_global) /**< ignore global flag */
{
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
JERRY_ASSERT (ecma_is_value_object (regexp_value));
JERRY_ASSERT (ecma_is_value_string (input_string));
ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value);
JERRY_ASSERT (ecma_object_get_class_name (regexp_object_p) == LIT_MAGIC_STRING_REGEXP_UL);
ecma_property_t *bytecode_prop_p = ecma_get_internal_property (regexp_object_p,
ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
re_bytecode_t *bc_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);
ecma_string_t *input_string_p = ecma_get_string_from_value (input_string);
lit_utf8_size_t input_string_size = ecma_string_get_size (input_string_p);
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_string_size, lit_utf8_byte_t);
ecma_string_to_utf8_string (input_string_p, input_utf8_buffer_p, (ssize_t) input_string_size);
lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_utf8_buffer_p, input_string_size);
re_matcher_ctx_t re_ctx;
re_ctx.input_start_p = iter_p->buf_p;
re_ctx.input_end_p = iter_p->buf_p + iter_p->buf_size;
re_ctx.input_start_p = iterator.buf_p;
re_ctx.input_end_p = iterator.buf_p + iterator.buf_size;
re_ctx.match_limit = 0;
re_ctx.recursion_depth = 0;
/* 1. Read bytecode header and init regexp matcher context. */
re_ctx.flags = (uint8_t) re_get_value (&bc_p);
if (ignore_global)
{
re_ctx.flags &= (uint8_t) ~RE_FLAG_GLOBAL;
}
JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n",
re_ctx.flags & RE_FLAG_GLOBAL,
re_ctx.flags & RE_FLAG_IGNORE_CASE,
@@ -1217,22 +1243,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
bool is_match = false;
re_ctx.num_of_iterations_p = num_of_iter_p;
int32_t index = 0;
ecma_length_t input_str_len = lit_utf8_string_length (iter_p->buf_p, iter_p->buf_size);
ecma_length_t input_str_len = lit_utf8_string_length (iterator.buf_p, iterator.buf_size);
if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p);
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (regexp_object_p, magic_str_p);
ECMA_OP_TO_NUMBER_TRY_CATCH (lastindex_num, lastindex_prop_p->u.named_data_property.value, ret_value)
index = ecma_number_to_int32 (lastindex_num);
JERRY_ASSERT (iter_p->buf_pos.offset == 0 && !iter_p->buf_pos.is_non_bmp_middle);
if (!lit_utf8_iterator_is_eos (iter_p)
JERRY_ASSERT (iterator.buf_pos.offset == 0 && !iterator.buf_pos.is_non_bmp_middle);
if (!lit_utf8_iterator_is_eos (&iterator)
&& index <= (int32_t) input_str_len
&& index > 0)
{
lit_utf8_iterator_advance (iter_p, (ecma_length_t) index);
lit_utf8_iterator_advance (&iterator, (ecma_length_t) index);
}
ECMA_OP_TO_NUMBER_FINALIZE (lastindex_num);
ecma_deref_ecma_string (magic_str_p);
@@ -1245,19 +1271,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
{
if (index < 0 || index > (int32_t) input_str_len)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = ECMA_NUMBER_ZERO;
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
if (re_ctx.flags & RE_FLAG_GLOBAL)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = ECMA_NUMBER_ZERO;
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
}
is_match = false;
break;
}
else
{
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, *iter_p, &sub_iter), ret_value);
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, iterator, &sub_iter), ret_value);
if (ecma_is_value_true (match_value))
{
@@ -1265,9 +1294,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
break;
}
if (!lit_utf8_iterator_is_eos (iter_p))
if (!lit_utf8_iterator_is_eos (&iterator))
{
lit_utf8_iterator_advance (iter_p, 1);
lit_utf8_iterator_advance (&iterator, 1);
}
index++;
@@ -1275,12 +1304,12 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
}
}
if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = sub_iter.buf_pos.offset;
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
}
@@ -1299,9 +1328,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
{
ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2);
/* Note: 'iter_p->buf_p == NULL' means the input is empty string */
/* Note: 'iterator.buf_p == NULL' means the input is empty string */
if (((re_ctx.saved_p[i].buf_p && re_ctx.saved_p[i + 1].buf_p)
|| (!iter_p->buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
|| (!iterator.buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
&& re_ctx.saved_p[i + 1].buf_pos.offset >= re_ctx.saved_p[i].buf_pos.offset)
{
ecma_length_t capture_str_len;
@@ -1336,8 +1365,10 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_NULL));
}
}
MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p);
MEM_FINALIZE_LOCAL_ARRAY (saved_p);
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
return ret_value;
} /* ecma_regexp_exec_helper */
@@ -59,9 +59,7 @@ extern ecma_completion_value_t
ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p);
extern ecma_completion_value_t
ecma_regexp_exec_helper (ecma_object_t *obj_p,
re_bytecode_t *bc_p,
lit_utf8_iterator_t *iter_p);
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
/**
* @}
+60
View File
@@ -0,0 +1,60 @@
// Copyright 2015 University of Szeged
// Copyright 2015 Samsung Electronics Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
assert ("abcabbcd".search (/abb+c/) === 3);
assert ("ababbccabd".search ("((?:(ax))|(bx)|ab*c+)") === 2);
assert ("acbaabcabcabc".search (/b+c/g) === 5);
assert ("abcabd".search ("c?a+d") === -1);
assert (String.prototype.search.call ({}, "ec+t") === 4);
try
{
String.prototype.search.call (null, "u");
assert (false);
}
catch (e)
{
assert (e instanceof TypeError);
}
var regexp = /x/g;
regexp.lastIndex = "index";
assert ("aaxbb".search (regexp) === 2);
assert ("aabb".search (regexp) === -1);
assert (regexp.lastIndex === "index");
Object.defineProperty(regexp, "lastIndex", {
configurable : false,
enumerable : false,
value : "index2",
writable : false
});
assert ("axb".search (regexp) === 1);
assert ("aabb".search (regexp) === -1);
assert (regexp.lastIndex === "index2");
assert ("##\ud801\udc00".search ("\ud801") === 2);
assert ("##\ud801\udc00".search ("\udc00") === 3);
// The real "exec" never returns with a number.
Object.getPrototypeOf(/x/).exec = function () { return "???"; }
assert (/y/.exec("y") === "???");
// Changing exec should not affect search.
assert ("ay".search (/y/) === 1);