Rework RegExp engine and add support for proper unicode matching (#3746)

This change includes several bugfixes, general improvements, and support
for additional features.
- Added full support for web compatibility syntax defined in Annex B
- Implemented parsing and matching patterns in unicode mode
- Fixed capture results when iterating with nested capturing groups
- Significantly reduced regexp bytecode size
- Reduced stack usage during regexp execution
- Improved matching performance

JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
This commit is contained in:
Dániel Bátyai
2020-05-26 15:28:54 +02:00
committed by GitHub
parent 908240ba62
commit 8f76a1f382
30 changed files with 3641 additions and 2647 deletions
+2 -5
View File
@@ -559,7 +559,6 @@ snapshot_load_compiled_code (const uint8_t *base_addr_p, /**< base address of th
#if ENABLED (JERRY_BUILTIN_REGEXP)
if (!(bytecode_p->status_flags & CBC_CODE_FLAGS_FUNCTION))
{
const re_compiled_code_t *re_bytecode_p = NULL;
const uint8_t *regex_start_p = ((const uint8_t *) bytecode_p) + sizeof (ecma_compiled_code_t);
@@ -567,10 +566,8 @@ snapshot_load_compiled_code (const uint8_t *base_addr_p, /**< base address of th
ecma_string_t *pattern_str_p = ecma_new_ecma_string_from_utf8 (regex_start_p,
bytecode_p->refs);
re_compile_bytecode (&re_bytecode_p,
pattern_str_p,
bytecode_p->status_flags);
const re_compiled_code_t *re_bytecode_p = re_compile_bytecode (pattern_str_p,
bytecode_p->status_flags);
ecma_deref_ecma_string (pattern_str_p);
return (ecma_compiled_code_t *) re_bytecode_p;