Use unicode iterators in RegExp engine and implement \d, \D, \w, \W, \s, \S, \x, \u matching in RegExp.

JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
László Langó
2015-07-22 09:17:37 +02:00
parent 6567651b6c
commit 3c71daaf84
8 changed files with 559 additions and 402 deletions
@@ -79,14 +79,12 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
/* Convert ecma_String_t *to regexp_bytecode_t* */
lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p);
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size + 1, lit_utf8_byte_t);
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size, lit_utf8_byte_t);
ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size);
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_utf8_buffer_p, input_str_size);
FIXME ("Update ecma_regexp_exec_helper so that zero symbol is not needed.");
input_utf8_buffer_p[input_str_size] = LIT_BYTE_NULL;
ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, input_utf8_buffer_p, input_str_size);
ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, &iter);
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
@@ -119,7 +119,16 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
if (ecma_is_completion_value_empty (ret_value))
{
ret_value = ecma_op_create_regexp_object (pattern_string_p, flags_string_p);
if (ecma_string_get_length (pattern_string_p) == 0)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP);
ret_value = ecma_op_create_regexp_object (magic_str_p, flags_string_p);
ecma_deref_ecma_string (magic_str_p);
}
else
{
ret_value = ecma_op_create_regexp_object (pattern_string_p, flags_string_p);
}
}
if (flags_string_p != NULL)