Rework RegExp engine and add support for proper unicode matching (#3746)
This change includes several bugfixes, general improvements, and support for additional features. - Added full support for web compatibility syntax defined in Annex B - Implemented parsing and matching patterns in unicode mode - Fixed capture results when iterating with nested capturing groups - Significantly reduced regexp bytecode size - Reduced stack usage during regexp execution - Improved matching performance JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
This commit is contained in:
@@ -2847,9 +2847,6 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
|
||||
context_p->literal_count++;
|
||||
|
||||
/* Compile the RegExp literal and store the RegExp bytecode pointer */
|
||||
const re_compiled_code_t *re_bytecode_p = NULL;
|
||||
ecma_value_t completion_value;
|
||||
|
||||
ecma_string_t *pattern_str_p = NULL;
|
||||
|
||||
if (lit_is_valid_cesu8_string (regex_start_p, length))
|
||||
@@ -2862,19 +2859,14 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
|
||||
pattern_str_p = ecma_new_ecma_string_from_utf8_converted_to_cesu8 (regex_start_p, length);
|
||||
}
|
||||
|
||||
completion_value = re_compile_bytecode (&re_bytecode_p,
|
||||
pattern_str_p,
|
||||
current_flags);
|
||||
re_compiled_code_t *re_bytecode_p = re_compile_bytecode (pattern_str_p, current_flags);
|
||||
ecma_deref_ecma_string (pattern_str_p);
|
||||
|
||||
if (ECMA_IS_VALUE_ERROR (completion_value))
|
||||
if (JERRY_UNLIKELY (re_bytecode_p == NULL))
|
||||
{
|
||||
jcontext_release_exception ();
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_REGEXP);
|
||||
}
|
||||
|
||||
ecma_free_value (completion_value);
|
||||
|
||||
literal_p->type = LEXER_REGEXP_LITERAL;
|
||||
literal_p->u.bytecode_p = (ecma_compiled_code_t *) re_bytecode_p;
|
||||
|
||||
|
||||
@@ -2723,6 +2723,14 @@ parser_parse_script (const uint8_t *arg_list_p, /**< function argument list */
|
||||
jcontext_raise_exception (ECMA_VALUE_NULL);
|
||||
return ECMA_VALUE_ERROR;
|
||||
}
|
||||
|
||||
if (parser_error.error == PARSER_ERR_INVALID_REGEXP)
|
||||
{
|
||||
/* The RegExp compiler has already raised an exception. */
|
||||
JERRY_ASSERT (jcontext_has_pending_exception ());
|
||||
return ECMA_VALUE_ERROR;
|
||||
}
|
||||
|
||||
#if ENABLED (JERRY_ERROR_MESSAGES)
|
||||
const lit_utf8_byte_t *err_bytes_p = (const lit_utf8_byte_t *) parser_error_to_string (parser_error.error);
|
||||
lit_utf8_size_t err_bytes_size = lit_zt_utf8_string_size (err_bytes_p);
|
||||
|
||||
Reference in New Issue
Block a user