Rework RegExp engine and add support for proper unicode matching (#3746)

This change includes several bugfixes, general improvements, and support for additional features. - Added full support for web compatibility syntax defined in Annex B - Implemented parsing and matching patterns in unicode mode - Fixed capture results when iterating with nested capturing groups - Significantly reduced regexp bytecode size - Reduced stack usage during regexp execution - Improved matching performance JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
2020-05-26 15:28:54 +02:00
parent 908240ba62
commit 8f76a1f382
30 changed files with 3641 additions and 2647 deletions
@@ -2362,8 +2362,7 @@ ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] curr
  {
    read_size = lit_read_code_unit_from_utf8 (current_p, &ch);

-    if (!lit_char_is_white_space (ch)
-        && !lit_char_is_line_terminator (ch))
+    if (!lit_char_is_white_space (ch))
    {
      nonws_start_p = current_p;
      break;
@@ -2378,8 +2377,7 @@ ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] curr
  {
    read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);

-    if (!lit_char_is_white_space (ch)
-        && !lit_char_is_line_terminator (ch))
+    if (!lit_char_is_white_space (ch))
    {
      break;
    }