Rework RegExp engine and add support for proper unicode matching (#3746)
This change includes several bugfixes, general improvements, and support for additional features. - Added full support for web compatibility syntax defined in Annex B - Implemented parsing and matching patterns in unicode mode - Fixed capture results when iterating with nested capturing groups - Significantly reduced regexp bytecode size - Reduced stack usage during regexp execution - Improved matching performance JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
This commit is contained in:
@@ -559,7 +559,6 @@ snapshot_load_compiled_code (const uint8_t *base_addr_p, /**< base address of th
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
if (!(bytecode_p->status_flags & CBC_CODE_FLAGS_FUNCTION))
|
||||
{
|
||||
const re_compiled_code_t *re_bytecode_p = NULL;
|
||||
|
||||
const uint8_t *regex_start_p = ((const uint8_t *) bytecode_p) + sizeof (ecma_compiled_code_t);
|
||||
|
||||
@@ -567,10 +566,8 @@ snapshot_load_compiled_code (const uint8_t *base_addr_p, /**< base address of th
|
||||
ecma_string_t *pattern_str_p = ecma_new_ecma_string_from_utf8 (regex_start_p,
|
||||
bytecode_p->refs);
|
||||
|
||||
re_compile_bytecode (&re_bytecode_p,
|
||||
pattern_str_p,
|
||||
const re_compiled_code_t *re_bytecode_p = re_compile_bytecode (pattern_str_p,
|
||||
bytecode_p->status_flags);
|
||||
|
||||
ecma_deref_ecma_string (pattern_str_p);
|
||||
|
||||
return (ecma_compiled_code_t *) re_bytecode_p;
|
||||
|
||||
@@ -1467,7 +1467,7 @@ ecma_gc_run (void)
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
/* Free RegExp bytecodes stored in cache */
|
||||
re_cache_gc_run ();
|
||||
re_cache_gc ();
|
||||
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
|
||||
} /* ecma_gc_run */
|
||||
|
||||
|
||||
@@ -2362,8 +2362,7 @@ ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] curr
|
||||
{
|
||||
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
|
||||
|
||||
if (!lit_char_is_white_space (ch)
|
||||
&& !lit_char_is_line_terminator (ch))
|
||||
if (!lit_char_is_white_space (ch))
|
||||
{
|
||||
nonws_start_p = current_p;
|
||||
break;
|
||||
@@ -2378,8 +2377,7 @@ ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] curr
|
||||
{
|
||||
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
|
||||
|
||||
if (!lit_char_is_white_space (ch)
|
||||
&& !lit_char_is_line_terminator (ch))
|
||||
if (!lit_char_is_white_space (ch))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -223,13 +223,13 @@ ecma_builtin_global_object_decode_uri_helper (lit_utf8_byte_t *input_start_p, /*
|
||||
continue;
|
||||
}
|
||||
|
||||
ecma_char_t decoded_byte;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
|
||||
uint32_t hex_value = lit_char_hex_lookup (input_char_p + 1, input_end_p, 2);
|
||||
if (hex_value == UINT32_MAX)
|
||||
{
|
||||
return ecma_raise_uri_error (ECMA_ERR_MSG ("Invalid hexadecimal value."));
|
||||
}
|
||||
|
||||
ecma_char_t decoded_byte = (ecma_char_t) hex_value;
|
||||
input_char_p += URI_ENCODED_BYTE_SIZE;
|
||||
|
||||
if (decoded_byte <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
@@ -272,20 +272,18 @@ ecma_builtin_global_object_decode_uri_helper (lit_utf8_byte_t *input_start_p, /*
|
||||
/* Input decode. */
|
||||
if (*input_char_p != '%')
|
||||
{
|
||||
*output_char_p = *input_char_p;
|
||||
output_char_p++;
|
||||
input_char_p++;
|
||||
*output_char_p++ = *input_char_p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ecma_char_t decoded_byte;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
|
||||
uint32_t hex_value = lit_char_hex_lookup (input_char_p + 1, input_end_p, 2);
|
||||
if (hex_value == UINT32_MAX)
|
||||
{
|
||||
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG ("Invalid hexadecimal value."));
|
||||
break;
|
||||
}
|
||||
|
||||
ecma_char_t decoded_byte = (ecma_char_t) hex_value;
|
||||
input_char_p += URI_ENCODED_BYTE_SIZE;
|
||||
|
||||
if (decoded_byte <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
@@ -337,17 +335,16 @@ ecma_builtin_global_object_decode_uri_helper (lit_utf8_byte_t *input_start_p, /*
|
||||
}
|
||||
else
|
||||
{
|
||||
ecma_char_t chr;
|
||||
hex_value = lit_char_hex_lookup (input_char_p + 1, input_end_p, 2);
|
||||
|
||||
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &chr)
|
||||
|| ((chr & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
|
||||
if (hex_value == UINT32_MAX || (hex_value & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
|
||||
{
|
||||
is_valid = false;
|
||||
break;
|
||||
}
|
||||
|
||||
octets[i] = (lit_utf8_byte_t) chr;
|
||||
input_char_p += URI_ENCODED_BYTE_SIZE;
|
||||
octets[i] = (lit_utf8_byte_t) hex_value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -174,18 +174,13 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
|
||||
}
|
||||
case LIT_CHAR_LOWERCASE_U:
|
||||
{
|
||||
if ((end_p - current_p <= ECMA_JSON_HEX_ESCAPE_SEQUENCE_LENGTH))
|
||||
uint32_t hex_value = lit_char_hex_lookup (current_p + 1, end_p, ECMA_JSON_HEX_ESCAPE_SEQUENCE_LENGTH);
|
||||
if (hex_value == UINT32_MAX)
|
||||
{
|
||||
goto invalid_string;
|
||||
}
|
||||
|
||||
ecma_char_t code_unit;
|
||||
if (!(lit_read_code_unit_from_hex (current_p + 1, ECMA_JSON_HEX_ESCAPE_SEQUENCE_LENGTH, &code_unit)))
|
||||
{
|
||||
goto invalid_string;
|
||||
}
|
||||
|
||||
ecma_stringbuilder_append_char (&result_builder, code_unit);
|
||||
ecma_stringbuilder_append_char (&result_builder, (ecma_char_t) hex_value);
|
||||
current_p += ECMA_JSON_HEX_ESCAPE_SEQUENCE_LENGTH + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -505,12 +505,10 @@ ecma_instantiate_builtin (ecma_builtin_id_t obj_builtin_id) /**< built-in id */
|
||||
|
||||
ext_object_p->u.class_prop.class_id = LIT_MAGIC_STRING_REGEXP_UL;
|
||||
|
||||
const re_compiled_code_t *bc_p = NULL;
|
||||
ecma_value_t ret_value = re_compile_bytecode (&bc_p,
|
||||
ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP),
|
||||
re_compiled_code_t *bc_p = re_compile_bytecode (ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP),
|
||||
RE_FLAG_EMPTY);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_empty (ret_value));
|
||||
JERRY_ASSERT (bc_p != NULL);
|
||||
|
||||
ECMA_SET_INTERNAL_VALUE_POINTER (ext_object_p->u.class_prop.u.value, bc_p);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -44,30 +44,73 @@ typedef enum
|
||||
} ecma_regexp_flags_t;
|
||||
|
||||
/**
|
||||
* Structure for storing capturing group results
|
||||
* Class escapes
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_ESCAPE__START, /**< escapes start */
|
||||
RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */
|
||||
RE_ESCAPE_NOT_DIGIT, /**< not digit */
|
||||
RE_ESCAPE_WORD_CHAR, /**< word char */
|
||||
RE_ESCAPE_NOT_WORD_CHAR, /**< not word char */
|
||||
RE_ESCAPE_WHITESPACE, /**< whitespace */
|
||||
RE_ESCAPE_NOT_WHITESPACE, /**< not whitespace */
|
||||
RE_ESCAPE__COUNT, /**< escape count */
|
||||
} ecma_class_escape_t;
|
||||
|
||||
/**
|
||||
* Character class flags escape count mask size.
|
||||
*/
|
||||
#define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u)
|
||||
|
||||
/**
|
||||
* Character class flags escape count mask.
|
||||
*/
|
||||
#define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u)
|
||||
|
||||
/**
|
||||
* Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits
|
||||
* hold a value that contains the number of class escapes present in the character class.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_CLASS_HAS_CHARS = (1 << 5), /**< contains individual characters */
|
||||
RE_CLASS_HAS_RANGES = (1 << 6), /**< contains character ranges */
|
||||
RE_CLASS_INVERT = (1 << 7), /**< inverted */
|
||||
} ecma_char_class_flags_t;
|
||||
|
||||
/**
|
||||
* Structure for matching capturing groups and storing their result
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *begin_p; /**< capture start pointer */
|
||||
const lit_utf8_byte_t *end_p; /**< capture end pointer */
|
||||
const uint8_t *bc_p; /**< group bytecode pointer */
|
||||
uint32_t iterator; /**< iteration counter */
|
||||
uint32_t subcapture_count; /**< number of nested capturing groups */
|
||||
} ecma_regexp_capture_t;
|
||||
|
||||
/**
|
||||
* Structure for matching non-capturing groups
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *begin_p; /**< substring start pointer */
|
||||
const lit_utf8_byte_t *end_p; /**< substring end pointer */
|
||||
} ecma_regexp_capture_t;
|
||||
const uint8_t *bc_p; /**< group bytecode pointer */
|
||||
uint32_t iterator; /**< iteration counter */
|
||||
uint32_t subcapture_start; /**< first nested capturing group index */
|
||||
uint32_t subcapture_count; /**< number of nested capturing groups */
|
||||
} ecma_regexp_non_capture_t;
|
||||
|
||||
/**
|
||||
* Check if an ecma_regexp_capture_t contains a defined capture
|
||||
*/
|
||||
#define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL && (c)->end_p >= (c)->begin_p)
|
||||
#define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL)
|
||||
|
||||
ecma_value_t
|
||||
ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p);
|
||||
|
||||
/**
|
||||
* Structure for storing non-capturing group results
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *str_p; /**< string pointer */
|
||||
} ecma_regexp_non_capture_t;
|
||||
|
||||
#if (JERRY_STACK_LIMIT != 0)
|
||||
/**
|
||||
* Value used ase result when stack limit is reached
|
||||
@@ -82,27 +125,38 @@ typedef struct
|
||||
#define ECMA_RE_STACK_LIMIT_REACHED(p) (false)
|
||||
#endif /* JERRY_STACK_LIMIT != 0 */
|
||||
|
||||
/**
|
||||
* Offset applied to qmax when encoded into the bytecode.
|
||||
*
|
||||
* It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store
|
||||
* it in a single byte az zero.
|
||||
*/
|
||||
#define RE_QMAX_OFFSET 1
|
||||
|
||||
/**
|
||||
* RegExp executor context
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input string */
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input string */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input string */
|
||||
uint32_t captures_count; /**< number of capture groups */
|
||||
ecma_regexp_capture_t *captures_p; /**< capturing groups */
|
||||
uint32_t non_captures_count; /**< number of non-capture groups */
|
||||
ecma_regexp_capture_t *captures_p; /**< capturing groups */
|
||||
ecma_regexp_non_capture_t *non_captures_p; /**< non-capturing groups */
|
||||
uint32_t *iterations_p; /**< number of iterations */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
uint8_t char_size; /**< size of encoded characters */
|
||||
} ecma_regexp_ctx_t;
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
lit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p);
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
ecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p);
|
||||
ecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p,
|
||||
ecma_string_t *input_string_p);
|
||||
ecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg);
|
||||
lit_code_point_t ecma_regexp_canonicalize (lit_code_point_t ch, bool is_ignorecase);
|
||||
lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch);
|
||||
lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode);
|
||||
ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
|
||||
void ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p,
|
||||
ecma_string_t *source_p,
|
||||
|
||||
@@ -127,7 +127,7 @@ struct jerry_context_t
|
||||
/* Update JERRY_CONTEXT_FIRST_MEMBER if the first non-external member changes */
|
||||
jmem_cpointer_t ecma_builtin_objects[ECMA_BUILTIN_ID__COUNT]; /**< pointer to instances of built-in objects */
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
const re_compiled_code_t *re_cache[RE_CACHE_SIZE]; /**< regex cache */
|
||||
re_compiled_code_t *re_cache[RE_CACHE_SIZE]; /**< regex cache */
|
||||
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
|
||||
jmem_cpointer_t ecma_gc_objects_cp; /**< List of currently alive objects. */
|
||||
jmem_heap_free_t *jmem_heap_list_skip_p; /**< This is used to speed up deallocation. */
|
||||
|
||||
@@ -103,31 +103,32 @@ search_char_in_interval_array (ecma_char_t c, /**< code unit */
|
||||
} /* search_char_in_interval_array */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of the Whitespace characters including those
|
||||
* that fall into "Space, Separator" ("Zs") Unicode character category.
|
||||
* Check if specified character is one of the Whitespace characters including those that fall into
|
||||
* "Space, Separator" ("Zs") Unicode character category or one of the Line Terminator characters.
|
||||
*
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
|
||||
* false - otherwise
|
||||
*/
|
||||
bool
|
||||
lit_char_is_white_space (ecma_char_t c) /**< code unit */
|
||||
lit_char_is_white_space (lit_code_point_t c) /**< code point */
|
||||
{
|
||||
if (c <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return (c == LIT_CHAR_TAB
|
||||
|| c == LIT_CHAR_VTAB
|
||||
|| c == LIT_CHAR_FF
|
||||
|| c == LIT_CHAR_SP);
|
||||
return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
|
||||
}
|
||||
else
|
||||
{
|
||||
return (c == LIT_CHAR_NBSP
|
||||
|| c == LIT_CHAR_BOM
|
||||
|| (c >= lit_unicode_separator_char_interval_sps[0]
|
||||
&& c <= lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
|
||||
|| search_char_in_char_array (c,
|
||||
if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return (c <= LIT_UTF16_CODE_UNIT_MAX
|
||||
&& ((c >= lit_unicode_separator_char_interval_sps[0]
|
||||
&& c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
|
||||
|| search_char_in_char_array ((ecma_char_t) c,
|
||||
lit_unicode_separator_chars,
|
||||
NUM_OF_ELEMENTS (lit_unicode_separator_chars)));
|
||||
NUM_OF_ELEMENTS (lit_unicode_separator_chars))));
|
||||
}
|
||||
} /* lit_char_is_white_space */
|
||||
|
||||
@@ -429,51 +430,72 @@ lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, /**< destination buffer */
|
||||
} /* lit_four_byte_utf8_char_to_cesu8 */
|
||||
|
||||
/**
|
||||
* Parse the next number_of_characters hexadecimal character,
|
||||
* and construct a code unit from them. The buffer must
|
||||
* be zero terminated.
|
||||
* Lookup hex digits in a buffer
|
||||
*
|
||||
* @return true if decoding was successful, false otherwise
|
||||
* @return UINT32_MAX - if next 'lookup' number of characters do not form a valid hex number
|
||||
* value of hex number, otherwise
|
||||
*/
|
||||
bool
|
||||
lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
||||
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
|
||||
ecma_char_t *out_code_unit_p) /**< [out] decoded result */
|
||||
uint32_t
|
||||
lit_char_hex_lookup (const lit_utf8_byte_t *buf_p, /**< buffer */
|
||||
const lit_utf8_byte_t *const buf_end_p, /**< buffer end */
|
||||
uint32_t lookup) /**< size of lookup */
|
||||
{
|
||||
ecma_char_t code_unit = LIT_CHAR_NULL;
|
||||
JERRY_ASSERT (lookup <= 4);
|
||||
|
||||
JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);
|
||||
|
||||
for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
|
||||
if (JERRY_UNLIKELY (buf_p + lookup > buf_end_p))
|
||||
{
|
||||
code_unit = (ecma_char_t) (code_unit << 4u);
|
||||
|
||||
if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
|
||||
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
{
|
||||
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
|
||||
}
|
||||
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
|
||||
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
|
||||
{
|
||||
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
|
||||
}
|
||||
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
|
||||
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
|
||||
{
|
||||
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
buf_p++;
|
||||
uint32_t value = 0;
|
||||
|
||||
while (lookup--)
|
||||
{
|
||||
lit_utf8_byte_t ch = *buf_p++;
|
||||
if (!lit_char_is_hex_digit (ch))
|
||||
{
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
*out_code_unit_p = code_unit;
|
||||
return true;
|
||||
} /* lit_read_code_unit_from_hex */
|
||||
value <<= 4;
|
||||
value += lit_char_hex_to_int (ch);
|
||||
}
|
||||
|
||||
JERRY_ASSERT (value <= LIT_UTF16_CODE_UNIT_MAX);
|
||||
return value;
|
||||
} /* lit_char_hex_lookup */
|
||||
|
||||
/**
|
||||
* Parse a decimal number with the value clamped to UINT32_MAX.
|
||||
*
|
||||
* @returns uint32_t number
|
||||
*/
|
||||
uint32_t
|
||||
lit_parse_decimal (const lit_utf8_byte_t **buffer_p, /**< [in/out] character buffer */
|
||||
const lit_utf8_byte_t *buffer_end_p) /**< buffer end */
|
||||
{
|
||||
const lit_utf8_byte_t *current_p = *buffer_p;
|
||||
JERRY_ASSERT (lit_char_is_decimal_digit (*current_p));
|
||||
|
||||
uint32_t value = (uint32_t) (*current_p++ - LIT_CHAR_0);
|
||||
|
||||
while (current_p < buffer_end_p && lit_char_is_decimal_digit (*current_p))
|
||||
{
|
||||
const uint32_t digit = (uint32_t) (*current_p++ - LIT_CHAR_0);
|
||||
uint32_t new_value = value * 10 + digit;
|
||||
|
||||
if (JERRY_UNLIKELY (value > UINT32_MAX / 10) || JERRY_UNLIKELY (new_value < value))
|
||||
{
|
||||
value = UINT32_MAX;
|
||||
continue;
|
||||
}
|
||||
|
||||
value = new_value;
|
||||
}
|
||||
|
||||
*buffer_p = current_p;
|
||||
return value;
|
||||
} /* lit_parse_decimal */
|
||||
|
||||
/**
|
||||
* Check if specified character is a word character (part of IsWordChar abstract operation)
|
||||
@@ -484,7 +506,7 @@ lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, /**< buffer with char
|
||||
* false - otherwise
|
||||
*/
|
||||
bool
|
||||
lit_char_is_word_char (ecma_char_t c) /**< code unit */
|
||||
lit_char_is_word_char (lit_code_point_t c) /**< code point */
|
||||
{
|
||||
return ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
#include "lit-globals.h"
|
||||
|
||||
#define LIT_CHAR_UNDEF ((ecma_char_t) 0xFFFF) /* undefined character */
|
||||
|
||||
/*
|
||||
* Format control characters (ECMA-262 v5, Table 1)
|
||||
*/
|
||||
@@ -37,7 +35,7 @@
|
||||
#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
|
||||
/* LIT_CHAR_BOM is defined above */
|
||||
|
||||
bool lit_char_is_white_space (ecma_char_t c);
|
||||
bool lit_char_is_white_space (lit_code_point_t c);
|
||||
|
||||
/*
|
||||
* Line terminator characters (ECMA-262 v5, Table 3)
|
||||
@@ -219,10 +217,8 @@ uint32_t lit_char_hex_to_int (ecma_char_t c);
|
||||
size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
|
||||
size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
|
||||
void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);
|
||||
|
||||
/* read a hex encoded code point from a zero terminated buffer */
|
||||
bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, lit_utf8_size_t number_of_characters,
|
||||
ecma_char_t *out_code_unit_p);
|
||||
uint32_t lit_char_hex_lookup (const lit_utf8_byte_t *buf_p, const lit_utf8_byte_t *const buf_end_p, uint32_t lookup);
|
||||
uint32_t lit_parse_decimal (const lit_utf8_byte_t **buffer_p, const lit_utf8_byte_t *const buffer_end_p);
|
||||
|
||||
/**
|
||||
* Null character
|
||||
@@ -232,7 +228,7 @@ bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, lit_utf8_size_t
|
||||
/*
|
||||
* Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3)
|
||||
*/
|
||||
bool lit_char_is_word_char (ecma_char_t c);
|
||||
bool lit_char_is_word_char (lit_code_point_t c);
|
||||
|
||||
/*
|
||||
* Utility functions for uppercasing / lowercasing
|
||||
|
||||
@@ -513,7 +513,7 @@ lit_cesu8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with ch
|
||||
*
|
||||
* @return next code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
ecma_char_t JERRY_ATTR_NOINLINE
|
||||
lit_cesu8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (buf_p != NULL);
|
||||
@@ -529,7 +529,7 @@ lit_cesu8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with cha
|
||||
*
|
||||
* @return previous code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
ecma_char_t JERRY_ATTR_NOINLINE
|
||||
lit_cesu8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (buf_p != NULL);
|
||||
@@ -543,7 +543,7 @@ lit_cesu8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with cha
|
||||
/**
|
||||
* Increase cesu-8 encoded string pointer by one code unit.
|
||||
*/
|
||||
void
|
||||
inline void JERRY_ATTR_ALWAYS_INLINE
|
||||
lit_utf8_incr (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
|
||||
@@ -2847,9 +2847,6 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
|
||||
context_p->literal_count++;
|
||||
|
||||
/* Compile the RegExp literal and store the RegExp bytecode pointer */
|
||||
const re_compiled_code_t *re_bytecode_p = NULL;
|
||||
ecma_value_t completion_value;
|
||||
|
||||
ecma_string_t *pattern_str_p = NULL;
|
||||
|
||||
if (lit_is_valid_cesu8_string (regex_start_p, length))
|
||||
@@ -2862,19 +2859,14 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
|
||||
pattern_str_p = ecma_new_ecma_string_from_utf8_converted_to_cesu8 (regex_start_p, length);
|
||||
}
|
||||
|
||||
completion_value = re_compile_bytecode (&re_bytecode_p,
|
||||
pattern_str_p,
|
||||
current_flags);
|
||||
re_compiled_code_t *re_bytecode_p = re_compile_bytecode (pattern_str_p, current_flags);
|
||||
ecma_deref_ecma_string (pattern_str_p);
|
||||
|
||||
if (ECMA_IS_VALUE_ERROR (completion_value))
|
||||
if (JERRY_UNLIKELY (re_bytecode_p == NULL))
|
||||
{
|
||||
jcontext_release_exception ();
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_REGEXP);
|
||||
}
|
||||
|
||||
ecma_free_value (completion_value);
|
||||
|
||||
literal_p->type = LEXER_REGEXP_LITERAL;
|
||||
literal_p->u.bytecode_p = (ecma_compiled_code_t *) re_bytecode_p;
|
||||
|
||||
|
||||
@@ -2723,6 +2723,14 @@ parser_parse_script (const uint8_t *arg_list_p, /**< function argument list */
|
||||
jcontext_raise_exception (ECMA_VALUE_NULL);
|
||||
return ECMA_VALUE_ERROR;
|
||||
}
|
||||
|
||||
if (parser_error.error == PARSER_ERR_INVALID_REGEXP)
|
||||
{
|
||||
/* The RegExp compiler has already raised an exception. */
|
||||
JERRY_ASSERT (jcontext_has_pending_exception ());
|
||||
return ECMA_VALUE_ERROR;
|
||||
}
|
||||
|
||||
#if ENABLED (JERRY_ERROR_MESSAGES)
|
||||
const lit_utf8_byte_t *err_bytes_p = (const lit_utf8_byte_t *) parser_error_to_string (parser_error.error);
|
||||
lit_utf8_size_t err_bytes_size = lit_zt_utf8_string_size (err_bytes_p);
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
*/
|
||||
|
||||
#include "ecma-globals.h"
|
||||
#include "re-bytecode.h"
|
||||
#include "ecma-regexp-object.h"
|
||||
#include "lit-strings.h"
|
||||
#include "re-bytecode.h"
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
|
||||
@@ -29,135 +30,103 @@
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Size of block of RegExp bytecode. Used for allocation
|
||||
*
|
||||
* @return pointer to the RegExp compiled code header
|
||||
*/
|
||||
#define REGEXP_BYTECODE_BLOCK_SIZE 8UL
|
||||
|
||||
void
|
||||
re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
re_initialize_regexp_bytecode (re_compiler_ctx_t *re_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
const size_t initial_size = JERRY_ALIGNUP (REGEXP_BYTECODE_BLOCK_SIZE + sizeof (re_compiled_code_t), JMEM_ALIGNMENT);
|
||||
bc_ctx_p->block_start_p = jmem_heap_alloc_block (initial_size);
|
||||
bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + initial_size;
|
||||
bc_ctx_p->current_p = bc_ctx_p->block_start_p + sizeof (re_compiled_code_t);
|
||||
const size_t initial_size = sizeof (re_compiled_code_t);
|
||||
re_ctx_p->bytecode_start_p = jmem_heap_alloc_block (initial_size);
|
||||
re_ctx_p->bytecode_size = initial_size;
|
||||
} /* re_initialize_regexp_bytecode */
|
||||
|
||||
/**
|
||||
* Realloc the bytecode container
|
||||
*
|
||||
* @return current position in RegExp bytecode
|
||||
*/
|
||||
static uint8_t *
|
||||
re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
inline uint32_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_bytecode_size (re_compiler_ctx_t *re_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
JERRY_ASSERT (bc_ctx_p->block_end_p >= bc_ctx_p->block_start_p);
|
||||
const size_t old_size = (size_t) (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p);
|
||||
|
||||
/* If one of the members of RegExp bytecode context is NULL, then all member should be NULL
|
||||
* (it means first allocation), otherwise all of the members should be a non NULL pointer. */
|
||||
JERRY_ASSERT ((!bc_ctx_p->current_p && !bc_ctx_p->block_end_p && !bc_ctx_p->block_start_p)
|
||||
|| (bc_ctx_p->current_p && bc_ctx_p->block_end_p && bc_ctx_p->block_start_p));
|
||||
|
||||
const size_t new_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE;
|
||||
JERRY_ASSERT (bc_ctx_p->current_p >= bc_ctx_p->block_start_p);
|
||||
const size_t current_ptr_offset = (size_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p);
|
||||
|
||||
bc_ctx_p->block_start_p = jmem_heap_realloc_block (bc_ctx_p->block_start_p,
|
||||
old_size,
|
||||
new_size);
|
||||
bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + new_size;
|
||||
bc_ctx_p->current_p = bc_ctx_p->block_start_p + current_ptr_offset;
|
||||
|
||||
return bc_ctx_p->current_p;
|
||||
} /* re_realloc_regexp_bytecode_block */
|
||||
return (uint32_t) re_ctx_p->bytecode_size;
|
||||
} /* re_bytecode_size */
|
||||
|
||||
/**
|
||||
* Append a new bytecode to the and of the bytecode container
|
||||
*/
|
||||
static uint8_t *
|
||||
re_bytecode_reserve (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
re_bytecode_reserve (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const size_t size) /**< size */
|
||||
{
|
||||
JERRY_ASSERT (size <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
|
||||
uint8_t *current_p = bc_ctx_p->current_p;
|
||||
if (current_p + size > bc_ctx_p->block_end_p)
|
||||
{
|
||||
current_p = re_realloc_regexp_bytecode_block (bc_ctx_p);
|
||||
}
|
||||
|
||||
bc_ctx_p->current_p += size;
|
||||
return current_p;
|
||||
const size_t old_size = re_ctx_p->bytecode_size;
|
||||
const size_t new_size = old_size + size;
|
||||
re_ctx_p->bytecode_start_p = jmem_heap_realloc_block (re_ctx_p->bytecode_start_p, old_size, new_size);
|
||||
re_ctx_p->bytecode_size = new_size;
|
||||
return re_ctx_p->bytecode_start_p + old_size;
|
||||
} /* re_bytecode_reserve */
|
||||
|
||||
/**
|
||||
* Insert a new bytecode to the bytecode container
|
||||
*/
|
||||
static void
|
||||
re_bytecode_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
static uint8_t *
|
||||
re_bytecode_insert (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const size_t offset, /**< distance from the start of the container */
|
||||
const size_t size) /**< size */
|
||||
{
|
||||
JERRY_ASSERT (size <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
const size_t tail_size = re_ctx_p->bytecode_size - offset;
|
||||
re_bytecode_reserve (re_ctx_p, size);
|
||||
|
||||
uint8_t *current_p = bc_ctx_p->current_p;
|
||||
if (current_p + size > bc_ctx_p->block_end_p)
|
||||
{
|
||||
re_realloc_regexp_bytecode_block (bc_ctx_p);
|
||||
}
|
||||
uint8_t *dest_p = re_ctx_p->bytecode_start_p + offset;
|
||||
memmove (dest_p + size, dest_p, tail_size);
|
||||
|
||||
uint8_t *dest_p = bc_ctx_p->block_start_p + offset;
|
||||
const size_t bytecode_length = re_get_bytecode_length (bc_ctx_p);
|
||||
if (bytecode_length - offset > 0)
|
||||
{
|
||||
memmove (dest_p + size, dest_p, bytecode_length - offset);
|
||||
}
|
||||
|
||||
bc_ctx_p->current_p += size;
|
||||
return dest_p;
|
||||
} /* re_bytecode_insert */
|
||||
|
||||
/**
|
||||
* Encode ecma_char_t into bytecode
|
||||
* Append a byte
|
||||
*/
|
||||
static void
|
||||
re_encode_char (uint8_t *dest_p, /**< destination */
|
||||
const ecma_char_t c) /**< character */
|
||||
void
|
||||
re_append_byte (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const uint8_t byte) /**< byte value */
|
||||
{
|
||||
*dest_p++ = (uint8_t) ((c >> 8) & 0xFF);
|
||||
*dest_p = (uint8_t) (c & 0xFF);
|
||||
} /* re_encode_char */
|
||||
uint8_t *dest_p = re_bytecode_reserve (re_ctx_p, sizeof (uint8_t));
|
||||
*dest_p = byte;
|
||||
} /* re_append_byte */
|
||||
|
||||
/**
|
||||
* Encode uint32_t into bytecode
|
||||
* Insert a byte value
|
||||
*/
|
||||
static void
|
||||
re_encode_u32 (uint8_t *dest_p, /**< destination */
|
||||
const uint32_t u) /**< uint32 value */
|
||||
void
|
||||
re_insert_byte (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t offset, /**< distance from the start of the container */
|
||||
const uint8_t byte) /**< byte value */
|
||||
{
|
||||
*dest_p++ = (uint8_t) ((u >> 24) & 0xFF);
|
||||
*dest_p++ = (uint8_t) ((u >> 16) & 0xFF);
|
||||
*dest_p++ = (uint8_t) ((u >> 8) & 0xFF);
|
||||
*dest_p = (uint8_t) (u & 0xFF);
|
||||
} /* re_encode_u32 */
|
||||
uint8_t *dest_p = re_bytecode_insert (re_ctx_p, offset, sizeof (uint8_t));
|
||||
*dest_p = byte;
|
||||
} /* re_insert_byte */
|
||||
|
||||
/**
|
||||
* Get a character from the RegExp bytecode and increase the bytecode position
|
||||
*
|
||||
* @return ecma character
|
||||
* Get a single byte and icnrease bytecode position.
|
||||
*/
|
||||
inline ecma_char_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_char (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
inline uint8_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_byte (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
const uint8_t *src_p = *bc_p;
|
||||
ecma_char_t chr = (ecma_char_t) *src_p++;
|
||||
chr = (ecma_char_t) (chr << 8);
|
||||
chr = (ecma_char_t) (chr | *src_p);
|
||||
(*bc_p) += sizeof (ecma_char_t);
|
||||
return chr;
|
||||
} /* re_get_char */
|
||||
return *((*bc_p)++);
|
||||
} /* re_get_byte */
|
||||
|
||||
/**
|
||||
* Append a RegExp opcode
|
||||
*/
|
||||
inline void JERRY_ATTR_ALWAYS_INLINE
|
||||
re_append_opcode (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
re_append_byte (re_ctx_p, (uint8_t) opcode);
|
||||
} /* re_append_opcode */
|
||||
|
||||
/**
|
||||
* Insert a RegExp opcode
|
||||
*/
|
||||
inline void JERRY_ATTR_ALWAYS_INLINE
|
||||
re_insert_opcode (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t offset, /**< distance from the start of the container */
|
||||
const re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
re_insert_byte (re_ctx_p, offset, (uint8_t) opcode);
|
||||
} /* re_insert_opcode */
|
||||
|
||||
/**
|
||||
* Get a RegExp opcode and increase the bytecode position
|
||||
@@ -167,318 +136,497 @@ re_get_char (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
inline re_opcode_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_opcode (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
return (re_opcode_t) *((*bc_p)++);
|
||||
return (re_opcode_t) re_get_byte (bc_p);
|
||||
} /* re_get_opcode */
|
||||
|
||||
/**
|
||||
* Get a parameter of a RegExp opcode and increase the bytecode position
|
||||
*
|
||||
* @return opcode parameter
|
||||
* Encode 2 byte unsigned integer into the bytecode
|
||||
*/
|
||||
inline uint32_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_value (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
static void
|
||||
re_encode_u16 (uint8_t *dest_p, /**< destination */
|
||||
const uint16_t value) /**< value */
|
||||
{
|
||||
const uint8_t *src_p = *bc_p;
|
||||
uint32_t value = (uint32_t) (*src_p++);
|
||||
value <<= 8;
|
||||
value |= ((uint32_t) (*src_p++));
|
||||
value <<= 8;
|
||||
value |= ((uint32_t) (*src_p++));
|
||||
value <<= 8;
|
||||
value |= ((uint32_t) (*src_p++));
|
||||
*dest_p++ = (uint8_t) ((value >> 8) & 0xFF);
|
||||
*dest_p = (uint8_t) (value & 0xFF);
|
||||
} /* re_encode_u16 */
|
||||
|
||||
(*bc_p) += sizeof (uint32_t);
|
||||
/**
|
||||
* Encode 4 byte unsigned integer into the bytecode
|
||||
*/
|
||||
static void
|
||||
re_encode_u32 (uint8_t *dest_p, /**< destination */
|
||||
const uint32_t value) /**< value */
|
||||
{
|
||||
*dest_p++ = (uint8_t) ((value >> 24) & 0xFF);
|
||||
*dest_p++ = (uint8_t) ((value >> 16) & 0xFF);
|
||||
*dest_p++ = (uint8_t) ((value >> 8) & 0xFF);
|
||||
*dest_p = (uint8_t) (value & 0xFF);
|
||||
} /* re_encode_u32 */
|
||||
|
||||
/**
|
||||
* Decode 2 byte unsigned integer from bytecode
|
||||
*
|
||||
* @return uint16_t value
|
||||
*/
|
||||
static uint16_t
|
||||
re_decode_u16 (const uint8_t *src_p) /**< source */
|
||||
{
|
||||
uint16_t value = (uint16_t) (((uint16_t) *src_p++) << 8);
|
||||
value = (uint16_t) (value + *src_p++);
|
||||
return value;
|
||||
} /* re_decode_u16 */
|
||||
|
||||
/**
|
||||
* Decode 4 byte unsigned integer from bytecode
|
||||
*
|
||||
* @return uint32_t value
|
||||
*/
|
||||
static uint32_t JERRY_ATTR_NOINLINE
|
||||
re_decode_u32 (const uint8_t *src_p) /**< source */
|
||||
{
|
||||
uint32_t value = (uint32_t) (((uint32_t) *src_p++) << 24);
|
||||
value += (uint32_t) (((uint32_t) *src_p++) << 16);
|
||||
value += (uint32_t) (((uint32_t) *src_p++) << 8);
|
||||
value += (uint32_t) (*src_p++);
|
||||
return value;
|
||||
} /* re_decode_u32 */
|
||||
|
||||
/**
|
||||
* Get the encoded size of an uint32_t value.
|
||||
*
|
||||
* @return encoded value size
|
||||
*/
|
||||
inline static size_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_encoded_value_size (uint32_t value) /**< value */
|
||||
{
|
||||
if (JERRY_LIKELY (value <= RE_VALUE_1BYTE_MAX))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 5;
|
||||
} /* re_get_encoded_value_size */
|
||||
|
||||
/*
|
||||
* Encode a value to the specified position in the bytecode.
|
||||
*/
|
||||
static void
|
||||
re_encode_value (uint8_t *dest_p, /**< position in bytecode */
|
||||
const uint32_t value) /**< value */
|
||||
{
|
||||
if (JERRY_LIKELY (value <= RE_VALUE_1BYTE_MAX))
|
||||
{
|
||||
*dest_p = (uint8_t) value;
|
||||
return;
|
||||
}
|
||||
|
||||
*dest_p++ = (uint8_t) (RE_VALUE_4BYTE_MARKER);
|
||||
re_encode_u32 (dest_p, value);
|
||||
} /* re_encode_value */
|
||||
|
||||
/**
|
||||
* Append a value to the end of the bytecode.
|
||||
*/
|
||||
void
|
||||
re_append_value (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t value) /**< value */
|
||||
{
|
||||
const size_t size = re_get_encoded_value_size (value);
|
||||
uint8_t *dest_p = re_bytecode_reserve (re_ctx_p, size);
|
||||
re_encode_value (dest_p, value);
|
||||
} /* re_append_value */
|
||||
|
||||
/**
|
||||
* Insert a value into the bytecode at a specific offset.
|
||||
*/
|
||||
void
|
||||
re_insert_value (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t offset, /**< bytecode offset */
|
||||
const uint32_t value) /**< value */
|
||||
{
|
||||
const size_t size = re_get_encoded_value_size (value);
|
||||
uint8_t *dest_p = re_bytecode_insert (re_ctx_p, offset, size);
|
||||
re_encode_value (dest_p, value);
|
||||
} /* re_insert_value */
|
||||
|
||||
/**
|
||||
* Read an encoded value from the bytecode.
|
||||
*
|
||||
* @return decoded value
|
||||
*/
|
||||
uint32_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_value (const uint8_t **bc_p) /** refence to bytecode pointer */
|
||||
{
|
||||
uint32_t value = *(*bc_p)++;
|
||||
if (JERRY_LIKELY (value <= RE_VALUE_1BYTE_MAX))
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
value = re_decode_u32 (*bc_p);
|
||||
*bc_p += sizeof (uint32_t);
|
||||
return value;
|
||||
} /* re_get_value */
|
||||
|
||||
/**
|
||||
* Get length of bytecode
|
||||
*
|
||||
* @return bytecode length (unsigned integer)
|
||||
*/
|
||||
inline uint32_t JERRY_ATTR_PURE JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
return ((uint32_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p));
|
||||
} /* re_get_bytecode_length */
|
||||
|
||||
/**
|
||||
* Append a RegExp opcode
|
||||
*/
|
||||
void
|
||||
re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
const re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (uint8_t));
|
||||
*dest_p = (uint8_t) opcode;
|
||||
} /* re_append_opcode */
|
||||
|
||||
/**
|
||||
* Append a parameter of a RegExp opcode
|
||||
*/
|
||||
void
|
||||
re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t value) /**< input value */
|
||||
{
|
||||
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (uint32_t));
|
||||
re_encode_u32 (dest_p, value);
|
||||
} /* re_append_u32 */
|
||||
|
||||
/**
|
||||
* Append a character to the RegExp bytecode
|
||||
*/
|
||||
void
|
||||
re_append_char (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
const ecma_char_t input_char) /**< input char */
|
||||
re_append_char (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const lit_code_point_t cp) /**< code point */
|
||||
{
|
||||
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (ecma_char_t));
|
||||
re_encode_char (dest_p, input_char);
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
const size_t size = (re_ctx_p->flags & RE_FLAG_UNICODE) ? sizeof (lit_code_point_t) : sizeof (ecma_char_t);
|
||||
#else /* !ENABLED (JERRY_ES2015) */
|
||||
JERRY_UNUSED (re_ctx_p);
|
||||
const size_t size = sizeof (ecma_char_t);
|
||||
#endif /* !ENABLED (JERRY_ES2015) */
|
||||
|
||||
uint8_t *dest_p = re_bytecode_reserve (re_ctx_p, size);
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (re_ctx_p->flags & RE_FLAG_UNICODE)
|
||||
{
|
||||
re_encode_u32 (dest_p, cp);
|
||||
return;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
JERRY_ASSERT (cp <= LIT_UTF16_CODE_UNIT_MAX);
|
||||
re_encode_u16 (dest_p, (ecma_char_t) cp);
|
||||
} /* re_append_char */
|
||||
|
||||
/**
|
||||
* Append a jump offset parameter of a RegExp opcode
|
||||
* Append a character to the RegExp bytecode
|
||||
*/
|
||||
void
|
||||
re_append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t value) /**< input value */
|
||||
re_insert_char (re_compiler_ctx_t *re_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t offset, /**< bytecode offset */
|
||||
const lit_code_point_t cp) /**< code point*/
|
||||
{
|
||||
value += (uint32_t) (sizeof (uint32_t));
|
||||
re_append_u32 (bc_ctx_p, value);
|
||||
} /* re_append_jump_offset */
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
const size_t size = (re_ctx_p->flags & RE_FLAG_UNICODE) ? sizeof (lit_code_point_t) : sizeof (ecma_char_t);
|
||||
#else /* !ENABLED (JERRY_ES2015) */
|
||||
JERRY_UNUSED (re_ctx_p);
|
||||
const size_t size = sizeof (ecma_char_t);
|
||||
#endif /* !ENABLED (JERRY_ES2015) */
|
||||
|
||||
uint8_t *dest_p = re_bytecode_insert (re_ctx_p, offset, size);
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (re_ctx_p->flags & RE_FLAG_UNICODE)
|
||||
{
|
||||
re_encode_u32 (dest_p, cp);
|
||||
return;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
JERRY_ASSERT (cp <= LIT_UTF16_CODE_UNIT_MAX);
|
||||
re_encode_u16 (dest_p, (ecma_char_t) cp);
|
||||
} /* re_insert_char */
|
||||
|
||||
/**
|
||||
* Insert a RegExp opcode
|
||||
* Decode a character from the bytecode.
|
||||
*
|
||||
* @return decoded character
|
||||
*/
|
||||
void
|
||||
re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
const uint32_t offset, /**< distance from the start of the container */
|
||||
const re_opcode_t opcode) /**< input opcode */
|
||||
inline lit_code_point_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_char (const uint8_t **bc_p, /**< reference to bytecode pointer */
|
||||
bool unicode) /**< full unicode mode */
|
||||
{
|
||||
re_bytecode_insert (bc_ctx_p, offset, sizeof (uint8_t));
|
||||
*(bc_ctx_p->block_start_p + offset) = (uint8_t) opcode;
|
||||
} /* re_insert_opcode */
|
||||
lit_code_point_t cp;
|
||||
|
||||
/**
|
||||
* Insert a parameter of a RegExp opcode
|
||||
*/
|
||||
void
|
||||
re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t offset, /**< distance from the start of the container */
|
||||
uint32_t value) /**< input value */
|
||||
{
|
||||
re_bytecode_insert (bc_ctx_p, offset, sizeof (uint32_t));
|
||||
re_encode_u32 (bc_ctx_p->block_start_p + offset, value);
|
||||
} /* re_insert_u32 */
|
||||
#if !ENABLED (JERRY_ES2015)
|
||||
JERRY_UNUSED (unicode);
|
||||
#else /* ENABLED (JERRY_ES2015) */
|
||||
if (unicode)
|
||||
{
|
||||
cp = re_decode_u32 (*bc_p);
|
||||
*bc_p += sizeof (lit_code_point_t);
|
||||
}
|
||||
else
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
{
|
||||
cp = re_decode_u16 (*bc_p);
|
||||
*bc_p += sizeof (ecma_char_t);
|
||||
}
|
||||
|
||||
return cp;
|
||||
} /* re_get_char */
|
||||
|
||||
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
|
||||
static uint32_t
|
||||
re_get_bytecode_offset (const uint8_t *start_p, /**< bytecode start pointer */
|
||||
const uint8_t *current_p) /**< current bytecode pointer */
|
||||
{
|
||||
return (uint32_t) ((uintptr_t) current_p - (uintptr_t) start_p);
|
||||
} /* re_get_bytecode_offset */
|
||||
|
||||
/**
|
||||
* RegExp bytecode dumper
|
||||
*/
|
||||
void
|
||||
re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
re_dump_bytecode (re_compiler_ctx_t *re_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
re_compiled_code_t *compiled_code_p = (re_compiled_code_t *) bc_ctx_p->block_start_p;
|
||||
JERRY_DEBUG_MSG ("%d ", compiled_code_p->header.status_flags);
|
||||
JERRY_DEBUG_MSG ("%d ", compiled_code_p->captures_count);
|
||||
JERRY_DEBUG_MSG ("%d | ", compiled_code_p->non_captures_count);
|
||||
static const char escape_chars[] = {'d', 'D', 'w', 'W', 's', 'S'};
|
||||
|
||||
const uint8_t *bytecode_p = (const uint8_t *) (compiled_code_p + 1);
|
||||
re_compiled_code_t *compiled_code_p = (re_compiled_code_t *) re_ctx_p->bytecode_start_p;
|
||||
JERRY_DEBUG_MSG ("Flags: 0x%x ", compiled_code_p->header.status_flags);
|
||||
JERRY_DEBUG_MSG ("Capturing groups: %d ", compiled_code_p->captures_count);
|
||||
JERRY_DEBUG_MSG ("Non-capturing groups: %d\n", compiled_code_p->non_captures_count);
|
||||
|
||||
re_opcode_t op;
|
||||
while ((op = re_get_opcode (&bytecode_p)))
|
||||
const uint8_t *bytecode_start_p = (const uint8_t *) (compiled_code_p + 1);
|
||||
const uint8_t *bytecode_p = bytecode_start_p;
|
||||
|
||||
while (true)
|
||||
{
|
||||
JERRY_DEBUG_MSG ("[%3u] ", (uint32_t) ((uintptr_t) bytecode_p - (uintptr_t) bytecode_start_p));
|
||||
re_opcode_t op = *bytecode_p++;
|
||||
switch (op)
|
||||
{
|
||||
case RE_OP_MATCH:
|
||||
case RE_OP_ALTERNATIVE_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("MATCH, ");
|
||||
JERRY_DEBUG_MSG ("ALTERNATIVE_START ");
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG ("tail offset: [%3u]\n", offset);
|
||||
break;
|
||||
}
|
||||
case RE_OP_CHAR:
|
||||
case RE_OP_ALTERNATIVE_NEXT:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("CHAR ");
|
||||
JERRY_DEBUG_MSG ("%c, ", (char) re_get_char (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("ALTERNATIVE_NEXT ");
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG ("tail offset: [%3u]\n", offset);
|
||||
break;
|
||||
}
|
||||
case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
|
||||
case RE_OP_NO_ALTERNATIVE:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("N");
|
||||
/* FALLTHRU */
|
||||
}
|
||||
case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("GZ_START ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("NO_ALTERNATIVES\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_CAPTURE_GROUP_START:
|
||||
case RE_OP_CAPTURING_GROUP_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("START ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("CAPTURING_GROUP_START ");
|
||||
JERRY_DEBUG_MSG ("idx: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("capture count: %u, ", re_get_value (&bytecode_p));
|
||||
|
||||
const uint32_t qmin = re_get_value (&bytecode_p);
|
||||
JERRY_DEBUG_MSG ("qmin: %u", qmin);
|
||||
if (qmin == 0)
|
||||
{
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG (", tail offset: [%3u]\n", offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_DEBUG_MSG ("\n");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
|
||||
case RE_OP_NON_CAPTURING_GROUP_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("N");
|
||||
/* FALLTHRU */
|
||||
JERRY_DEBUG_MSG ("NON_CAPTURING_GROUP_START ");
|
||||
JERRY_DEBUG_MSG ("idx: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("capture start: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("capture count: %u, ", re_get_value (&bytecode_p));
|
||||
|
||||
const uint32_t qmin = re_get_value (&bytecode_p);
|
||||
JERRY_DEBUG_MSG ("qmin: %u", qmin);
|
||||
if (qmin == 0)
|
||||
{
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG (", tail offset: [%3u]\n", offset);
|
||||
}
|
||||
case RE_OP_CAPTURE_GREEDY_GROUP_END:
|
||||
else
|
||||
{
|
||||
JERRY_DEBUG_MSG ("G_END ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("\n");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
|
||||
case RE_OP_GREEDY_CAPTURING_GROUP_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("N");
|
||||
/* FALLTHRU */
|
||||
}
|
||||
case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("GZ_NC_START ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("GREEDY_CAPTURING_GROUP_END ");
|
||||
JERRY_DEBUG_MSG ("idx: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmax: %u\n", re_get_value (&bytecode_p) - RE_QMAX_OFFSET);
|
||||
break;
|
||||
}
|
||||
case RE_OP_NON_CAPTURE_GROUP_START:
|
||||
case RE_OP_LAZY_CAPTURING_GROUP_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("NC_START ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("LAZY_CAPTURING_GROUP_END ");
|
||||
JERRY_DEBUG_MSG ("idx: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmax: %u\n", re_get_value (&bytecode_p) - RE_QMAX_OFFSET);
|
||||
break;
|
||||
}
|
||||
case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END:
|
||||
case RE_OP_GREEDY_NON_CAPTURING_GROUP_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("N");
|
||||
/* FALLTHRU */
|
||||
}
|
||||
case RE_OP_NON_CAPTURE_GREEDY_GROUP_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("G_NC_END ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("GREEDY_NON_CAPTURING_GROUP_END ");
|
||||
JERRY_DEBUG_MSG ("idx: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmax: %u\n", re_get_value (&bytecode_p) - RE_QMAX_OFFSET);
|
||||
break;
|
||||
}
|
||||
case RE_OP_SAVE_AT_START:
|
||||
case RE_OP_LAZY_NON_CAPTURING_GROUP_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("RE_START ");
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
break;
|
||||
}
|
||||
case RE_OP_SAVE_AND_MATCH:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("RE_END, ");
|
||||
JERRY_DEBUG_MSG ("LAZY_NON_CAPTURING_GROUP_END ");
|
||||
JERRY_DEBUG_MSG ("idx: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmax: %u\n", re_get_value (&bytecode_p) - RE_QMAX_OFFSET);
|
||||
break;
|
||||
}
|
||||
case RE_OP_GREEDY_ITERATOR:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("GREEDY_ITERATOR ");
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmax: %u, ", re_get_value (&bytecode_p) - RE_QMAX_OFFSET);
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG ("tail offset: [%3u]\n", offset);
|
||||
break;
|
||||
}
|
||||
case RE_OP_NON_GREEDY_ITERATOR:
|
||||
case RE_OP_LAZY_ITERATOR:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("NON_GREEDY_ITERATOR ");
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("LAZY_ITERATOR ");
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("qmax: %u, ", re_get_value (&bytecode_p) - RE_QMAX_OFFSET);
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG ("tail offset: [%3u]\n", offset);
|
||||
break;
|
||||
}
|
||||
case RE_OP_PERIOD:
|
||||
case RE_OP_ITERATOR_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("PERIOD ");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ALTERNATIVE:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ALTERNATIVE ");
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_START ");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_END ");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_WORD_BOUNDARY:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_WORD_BOUNDARY ");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_NOT_WORD_BOUNDARY ");
|
||||
break;
|
||||
}
|
||||
case RE_OP_LOOKAHEAD_POS:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("LOOKAHEAD_POS ");
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
break;
|
||||
}
|
||||
case RE_OP_LOOKAHEAD_NEG:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("LOOKAHEAD_NEG ");
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("ITERATOR_END\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_BACKREFERENCE:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("BACKREFERENCE ");
|
||||
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("idx: %d\n", re_get_value (&bytecode_p));
|
||||
break;
|
||||
}
|
||||
case RE_OP_INV_CHAR_CLASS:
|
||||
case RE_OP_ASSERT_LINE_START:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("INV_");
|
||||
/* FALLTHRU */
|
||||
JERRY_DEBUG_MSG ("ASSERT_LINE_START\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_LINE_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_LINE_END\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_LOOKAHEAD_POS:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_LOOKAHEAD_POS ");
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", *bytecode_p++);
|
||||
JERRY_DEBUG_MSG ("capture start: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("capture count: %u, ", re_get_value (&bytecode_p));
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG ("tail offset: [%3u]\n", offset);
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_LOOKAHEAD_NEG:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_LOOKAHEAD_NEG ");
|
||||
JERRY_DEBUG_MSG ("qmin: %u, ", *bytecode_p++);
|
||||
JERRY_DEBUG_MSG ("capture start: %u, ", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("capture count: %u, ", re_get_value (&bytecode_p));
|
||||
const uint32_t offset = re_get_value (&bytecode_p) + re_get_bytecode_offset (bytecode_start_p, bytecode_p);
|
||||
JERRY_DEBUG_MSG ("tail offset: [%3u]\n", offset);
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_END:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_END\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_WORD_BOUNDARY:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_WORD_BOUNDARY\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("ASSERT_NOT_WORD_BOUNDARY\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_CLASS_ESCAPE:
|
||||
{
|
||||
ecma_class_escape_t escape = (ecma_class_escape_t) *bytecode_p++;
|
||||
JERRY_DEBUG_MSG ("CLASS_ESCAPE \\%c\n", escape_chars[escape]);
|
||||
break;
|
||||
}
|
||||
case RE_OP_CHAR_CLASS:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("CHAR_CLASS ");
|
||||
uint32_t num_of_class = re_get_value (&bytecode_p);
|
||||
JERRY_DEBUG_MSG ("%d", num_of_class);
|
||||
while (num_of_class)
|
||||
uint8_t flags = *bytecode_p++;
|
||||
uint32_t char_count = (flags & RE_CLASS_HAS_CHARS) ? re_get_value (&bytecode_p) : 0;
|
||||
uint32_t range_count = (flags & RE_CLASS_HAS_RANGES) ? re_get_value (&bytecode_p) : 0;
|
||||
|
||||
if (flags & RE_CLASS_INVERT)
|
||||
{
|
||||
if ((compiled_code_p->header.status_flags & RE_FLAG_UNICODE) != 0)
|
||||
JERRY_DEBUG_MSG ("inverted ");
|
||||
}
|
||||
|
||||
JERRY_DEBUG_MSG ("escapes: ");
|
||||
uint8_t escape_count = flags & RE_CLASS_ESCAPE_COUNT_MASK;
|
||||
while (escape_count--)
|
||||
{
|
||||
JERRY_DEBUG_MSG (" %u", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("-%u", re_get_value (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("\\%c, ", escape_chars[*bytecode_p++]);
|
||||
}
|
||||
else
|
||||
|
||||
JERRY_DEBUG_MSG ("chars: ");
|
||||
while (char_count--)
|
||||
{
|
||||
JERRY_DEBUG_MSG (" %u", re_get_char (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("-%u", re_get_char (&bytecode_p));
|
||||
JERRY_DEBUG_MSG ("\\u%04x, ", re_get_char (&bytecode_p, re_ctx_p->flags & RE_FLAG_UNICODE));
|
||||
}
|
||||
num_of_class--;
|
||||
|
||||
JERRY_DEBUG_MSG ("ranges: ");
|
||||
while (range_count--)
|
||||
{
|
||||
const lit_code_point_t begin = re_get_char (&bytecode_p, re_ctx_p->flags & RE_FLAG_UNICODE);
|
||||
const lit_code_point_t end = re_get_char (&bytecode_p, re_ctx_p->flags & RE_FLAG_UNICODE);
|
||||
JERRY_DEBUG_MSG ("\\u%04x-\\u%04x, ", begin, end);
|
||||
}
|
||||
JERRY_DEBUG_MSG (", ");
|
||||
|
||||
JERRY_DEBUG_MSG ("\n");
|
||||
break;
|
||||
}
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
case RE_OP_UNICODE_PERIOD:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("UNICODE_PERIOD\n");
|
||||
break;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
case RE_OP_PERIOD:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("PERIOD\n");
|
||||
break;
|
||||
}
|
||||
case RE_OP_CHAR:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("CHAR \\u%04x\n", re_get_char (&bytecode_p, re_ctx_p->flags & RE_FLAG_UNICODE));
|
||||
break;
|
||||
}
|
||||
case RE_OP_BYTE:
|
||||
{
|
||||
const uint8_t ch = *bytecode_p++;
|
||||
JERRY_DEBUG_MSG ("BYTE \\u%04x '%c'\n", ch, (char) ch);
|
||||
break;
|
||||
}
|
||||
case RE_OP_EOF:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("EOF\n");
|
||||
return;
|
||||
}
|
||||
default:
|
||||
{
|
||||
JERRY_DEBUG_MSG ("UNKNOWN(%d), ", (uint32_t) op);
|
||||
JERRY_DEBUG_MSG ("UNKNOWN(%d)\n", (uint32_t) op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
JERRY_DEBUG_MSG ("EOF\n");
|
||||
} /* re_dump_bytecode */
|
||||
#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
|
||||
#include "ecma-globals.h"
|
||||
#include "re-compiler-context.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
@@ -40,43 +41,57 @@
|
||||
*/
|
||||
#define RE_FLAGS_MASK 0x3F
|
||||
|
||||
/**
|
||||
* Maximum value that can be encoded in the RegExp bytecode as a single byte.
|
||||
*/
|
||||
#define RE_VALUE_1BYTE_MAX 0xFE
|
||||
|
||||
/**
|
||||
* Marker that signals that the actual value is enocded in the following 4 bytes in the bytecode.
|
||||
*/
|
||||
#define RE_VALUE_4BYTE_MARKER 0xFF
|
||||
|
||||
/**
|
||||
* RegExp opcodes
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_OP_EOF,
|
||||
/* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
|
||||
* Change it carefully. Capture opcodes should be at first.
|
||||
*/
|
||||
RE_OP_CAPTURE_GROUP_START, /**< group start */
|
||||
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START, /**< greedy zero group start */
|
||||
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-greedy zero group start */
|
||||
RE_OP_CAPTURE_GREEDY_GROUP_END, /**< greedy group end */
|
||||
RE_OP_CAPTURE_NON_GREEDY_GROUP_END, /**< non-greedy group end */
|
||||
RE_OP_NON_CAPTURE_GROUP_START, /**< non-capture group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START, /**< non-capture greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-capture non-greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_GROUP_END, /**< non-capture greedy group end */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END, /**< non-capture non-greedy group end */
|
||||
RE_OP_EOF, /**< end of pattern */
|
||||
|
||||
RE_OP_ALTERNATIVE_START, /**< start of alternatives */
|
||||
RE_OP_ALTERNATIVE_NEXT, /**< next alternative */
|
||||
RE_OP_NO_ALTERNATIVE, /**< no alternative */
|
||||
|
||||
RE_OP_CAPTURING_GROUP_START, /**< start of a capturing group */
|
||||
RE_OP_NON_CAPTURING_GROUP_START, /**< start of a non-capturing group */
|
||||
|
||||
RE_OP_GREEDY_CAPTURING_GROUP_END, /**< end of a greedy capturing group */
|
||||
RE_OP_GREEDY_NON_CAPTURING_GROUP_END, /**< end of a greedy non-capturing group */
|
||||
RE_OP_LAZY_CAPTURING_GROUP_END, /**< end of a lazy capturing group */
|
||||
RE_OP_LAZY_NON_CAPTURING_GROUP_END, /**< end of a lazy non-capturing group */
|
||||
|
||||
RE_OP_MATCH, /**< match */
|
||||
RE_OP_CHAR, /**< any character */
|
||||
RE_OP_SAVE_AT_START, /**< save at start */
|
||||
RE_OP_SAVE_AND_MATCH, /**< save and match */
|
||||
RE_OP_PERIOD, /**< "." */
|
||||
RE_OP_ALTERNATIVE, /**< "|" */
|
||||
RE_OP_GREEDY_ITERATOR, /**< greedy iterator */
|
||||
RE_OP_NON_GREEDY_ITERATOR, /**< non-greedy iterator */
|
||||
RE_OP_ASSERT_START, /**< "^" */
|
||||
RE_OP_ASSERT_END, /**< "$" */
|
||||
RE_OP_ASSERT_WORD_BOUNDARY, /**< "\b" */
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< "\B" */
|
||||
RE_OP_LOOKAHEAD_POS, /**< lookahead pos */
|
||||
RE_OP_LOOKAHEAD_NEG, /**< lookahead neg */
|
||||
RE_OP_BACKREFERENCE, /**< "\[0..9]" */
|
||||
RE_OP_CHAR_CLASS, /**< "[ ]" */
|
||||
RE_OP_INV_CHAR_CLASS /**< "[^ ]" */
|
||||
RE_OP_LAZY_ITERATOR, /**< lazy iterator */
|
||||
RE_OP_ITERATOR_END, /*** end of an iterator */
|
||||
|
||||
RE_OP_BACKREFERENCE, /**< backreference */
|
||||
|
||||
RE_OP_ASSERT_LINE_START, /**< line start assertion */
|
||||
RE_OP_ASSERT_LINE_END, /**< line end assertion */
|
||||
RE_OP_ASSERT_WORD_BOUNDARY, /**< word boundary assertion */
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< not word boundary assertion */
|
||||
RE_OP_ASSERT_LOOKAHEAD_POS, /**< positive lookahead assertion */
|
||||
RE_OP_ASSERT_LOOKAHEAD_NEG, /**< negative lookahead assertion */
|
||||
RE_OP_ASSERT_END, /**< end of an assertion */
|
||||
|
||||
RE_OP_CLASS_ESCAPE, /**< class escape */
|
||||
RE_OP_CHAR_CLASS, /**< character class */
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
RE_OP_UNICODE_PERIOD, /**< period in full unicode mode */
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
RE_OP_PERIOD, /**< period in non-unicode mode */
|
||||
RE_OP_CHAR, /**< any code point */
|
||||
RE_OP_BYTE, /**< 1-byte utf8 character */
|
||||
} re_opcode_t;
|
||||
|
||||
/**
|
||||
@@ -85,42 +100,31 @@ typedef enum
|
||||
typedef struct
|
||||
{
|
||||
ecma_compiled_code_t header; /**< compiled code header */
|
||||
uint32_t captures_count; /**< number of capturing groups */
|
||||
uint32_t non_captures_count; /**< number of non-capturing groups */
|
||||
ecma_value_t source; /**< original RegExp pattern */
|
||||
uint32_t captures_count; /**< number of capturing brackets */
|
||||
uint32_t non_captures_count; /**< number of non capturing brackets */
|
||||
} re_compiled_code_t;
|
||||
|
||||
/**
|
||||
* Context of RegExp bytecode container
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint8_t *block_start_p; /**< start of bytecode block */
|
||||
uint8_t *block_end_p; /**< end of bytecode block */
|
||||
uint8_t *current_p; /**< current position in bytecode */
|
||||
} re_bytecode_ctx_t;
|
||||
void re_initialize_regexp_bytecode (re_compiler_ctx_t *re_ctx_p);
|
||||
uint32_t re_bytecode_size (re_compiler_ctx_t *re_ctx_p);
|
||||
|
||||
void re_append_opcode (re_compiler_ctx_t *re_ctx_p, const re_opcode_t opcode);
|
||||
void re_append_byte (re_compiler_ctx_t *re_ctx_p, const uint8_t byte);
|
||||
void re_append_char (re_compiler_ctx_t *re_ctx_p, const lit_code_point_t cp);
|
||||
void re_append_value (re_compiler_ctx_t *re_ctx_p, const uint32_t value);
|
||||
|
||||
void re_insert_opcode (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const re_opcode_t opcode);
|
||||
void re_insert_byte (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const uint8_t byte);
|
||||
void re_insert_char (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const lit_code_point_t cp);
|
||||
void re_insert_value (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const uint32_t value);
|
||||
|
||||
re_opcode_t re_get_opcode (const uint8_t **bc_p);
|
||||
ecma_char_t re_get_char (const uint8_t **bc_p);
|
||||
uint8_t re_get_byte (const uint8_t **bc_p);
|
||||
lit_code_point_t re_get_char (const uint8_t **bc_p, bool unicode);
|
||||
uint32_t re_get_value (const uint8_t **bc_p);
|
||||
uint32_t JERRY_ATTR_PURE re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p);
|
||||
|
||||
void re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p);
|
||||
|
||||
void re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, const re_opcode_t opcode);
|
||||
void re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, const uint32_t value);
|
||||
void re_append_char (re_bytecode_ctx_t *bc_ctx_p, const ecma_char_t input_char);
|
||||
void re_append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, uint32_t value);
|
||||
|
||||
void re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, const uint32_t offset, const re_opcode_t opcode);
|
||||
void re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, const uint32_t offset, const uint32_t value);
|
||||
void re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p,
|
||||
const size_t offset,
|
||||
const uint8_t *bytecode_p,
|
||||
const size_t length);
|
||||
|
||||
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
|
||||
void re_dump_bytecode (re_bytecode_ctx_t *bc_ctx);
|
||||
void re_dump_bytecode (re_compiler_ctx_t *bc_ctx);
|
||||
#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RE_COMPILER_CONTEXT_H
|
||||
#define RE_COMPILER_CONTEXT_H
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
|
||||
#include "re-token.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_compiler Compiler
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp compiler context
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input pattern */
|
||||
const lit_utf8_byte_t *input_curr_p; /**< current position in input pattern */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input pattern */
|
||||
|
||||
uint8_t *bytecode_start_p; /**< start of bytecode block */
|
||||
size_t bytecode_size; /**< size of bytecode */
|
||||
|
||||
uint32_t captures_count; /**< number of capture groups */
|
||||
uint32_t non_captures_count; /**< number of non-capture groups */
|
||||
|
||||
int groups_count; /**< number of groups */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
re_token_t token; /**< current token */
|
||||
} re_compiler_ctx_t;
|
||||
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
|
||||
#endif /* !RE_COMPILER_CONTEXT_H */
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "jmem.h"
|
||||
#include "re-bytecode.h"
|
||||
#include "re-compiler.h"
|
||||
#include "re-compiler-context.h"
|
||||
#include "re-parser.h"
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
@@ -38,896 +39,140 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Insert simple atom iterator
|
||||
* Search for the given pattern in the RegExp cache.
|
||||
*
|
||||
* @return empty ecma value - if inserted successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
* @return pointer to bytecode if found
|
||||
* NULL - otherwise
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_insert_simple_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
uint32_t new_atom_start_offset) /**< atom start offset */
|
||||
{
|
||||
uint32_t atom_code_length;
|
||||
uint32_t offset;
|
||||
uint32_t qmin, qmax;
|
||||
|
||||
qmin = re_ctx_p->current_token.qmin;
|
||||
qmax = re_ctx_p->current_token.qmax;
|
||||
|
||||
if (qmin == 1 && qmax == 1)
|
||||
{
|
||||
return ECMA_VALUE_EMPTY;
|
||||
}
|
||||
else if (qmin > qmax)
|
||||
{
|
||||
/* ECMA-262 v5.1 15.10.2.5 */
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("RegExp quantifier error: min > max."));
|
||||
}
|
||||
|
||||
/* TODO: optimize bytecode length. Store 0 rather than INF */
|
||||
|
||||
re_append_opcode (re_ctx_p->bytecode_ctx_p, RE_OP_MATCH); /* complete 'sub atom' */
|
||||
uint32_t bytecode_length = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
atom_code_length = (uint32_t) (bytecode_length - new_atom_start_offset);
|
||||
|
||||
offset = new_atom_start_offset;
|
||||
re_insert_u32 (re_ctx_p->bytecode_ctx_p, offset, atom_code_length);
|
||||
re_insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmax);
|
||||
re_insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmin);
|
||||
if (re_ctx_p->current_token.greedy)
|
||||
{
|
||||
re_insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_GREEDY_ITERATOR);
|
||||
}
|
||||
else
|
||||
{
|
||||
re_insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_NON_GREEDY_ITERATOR);
|
||||
}
|
||||
|
||||
return ECMA_VALUE_EMPTY;
|
||||
} /* re_insert_simple_iterator */
|
||||
|
||||
/**
|
||||
* Get the type of a group start
|
||||
*
|
||||
* @return RegExp opcode
|
||||
*/
|
||||
static re_opcode_t
|
||||
re_get_start_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
bool is_capturable) /**< is capturable group */
|
||||
{
|
||||
if (is_capturable)
|
||||
{
|
||||
if (re_ctx_p->current_token.qmin == 0)
|
||||
{
|
||||
if (re_ctx_p->current_token.greedy)
|
||||
{
|
||||
return RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START;
|
||||
}
|
||||
|
||||
return RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START;
|
||||
}
|
||||
|
||||
return RE_OP_CAPTURE_GROUP_START;
|
||||
}
|
||||
|
||||
if (re_ctx_p->current_token.qmin == 0)
|
||||
{
|
||||
if (re_ctx_p->current_token.greedy)
|
||||
{
|
||||
return RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START;
|
||||
}
|
||||
|
||||
return RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START;
|
||||
}
|
||||
|
||||
return RE_OP_NON_CAPTURE_GROUP_START;
|
||||
} /* re_get_start_opcode_type */
|
||||
|
||||
/**
|
||||
* Get the type of a group end
|
||||
*
|
||||
* @return RegExp opcode
|
||||
*/
|
||||
static re_opcode_t
|
||||
re_get_end_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
bool is_capturable) /**< is capturable group */
|
||||
{
|
||||
if (is_capturable)
|
||||
{
|
||||
if (re_ctx_p->current_token.greedy)
|
||||
{
|
||||
return RE_OP_CAPTURE_GREEDY_GROUP_END;
|
||||
}
|
||||
|
||||
return RE_OP_CAPTURE_NON_GREEDY_GROUP_END;
|
||||
}
|
||||
|
||||
if (re_ctx_p->current_token.greedy)
|
||||
{
|
||||
return RE_OP_NON_CAPTURE_GREEDY_GROUP_END;
|
||||
}
|
||||
|
||||
return RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END;
|
||||
} /* re_get_end_opcode_type */
|
||||
|
||||
/**
|
||||
* Enclose the given bytecode to a group
|
||||
*
|
||||
* @return empty ecma value - if inserted successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
uint32_t group_start_offset, /**< offset of group start */
|
||||
uint32_t idx, /**< index of group */
|
||||
bool is_capturable) /**< is capturable group */
|
||||
{
|
||||
uint32_t qmin = re_ctx_p->current_token.qmin;
|
||||
uint32_t qmax = re_ctx_p->current_token.qmax;
|
||||
|
||||
if (qmin > qmax)
|
||||
{
|
||||
/* ECMA-262 v5.1 15.10.2.5 */
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("RegExp quantifier error: min > max."));
|
||||
}
|
||||
|
||||
re_opcode_t start_opcode = re_get_start_opcode_type (re_ctx_p, is_capturable);
|
||||
re_opcode_t end_opcode = re_get_end_opcode_type (re_ctx_p, is_capturable);
|
||||
|
||||
uint32_t start_head_offset_len = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
re_insert_u32 (re_ctx_p->bytecode_ctx_p, group_start_offset, idx);
|
||||
re_insert_opcode (re_ctx_p->bytecode_ctx_p, group_start_offset, start_opcode);
|
||||
start_head_offset_len = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - start_head_offset_len;
|
||||
re_append_opcode (re_ctx_p->bytecode_ctx_p, end_opcode);
|
||||
re_append_u32 (re_ctx_p->bytecode_ctx_p, idx);
|
||||
re_append_u32 (re_ctx_p->bytecode_ctx_p, qmin);
|
||||
re_append_u32 (re_ctx_p->bytecode_ctx_p, qmax);
|
||||
|
||||
group_start_offset += start_head_offset_len;
|
||||
re_append_jump_offset (re_ctx_p->bytecode_ctx_p,
|
||||
re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - group_start_offset);
|
||||
|
||||
if (start_opcode != RE_OP_CAPTURE_GROUP_START && start_opcode != RE_OP_NON_CAPTURE_GROUP_START)
|
||||
{
|
||||
re_insert_u32 (re_ctx_p->bytecode_ctx_p,
|
||||
group_start_offset,
|
||||
re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - group_start_offset);
|
||||
}
|
||||
|
||||
return ECMA_VALUE_EMPTY;
|
||||
} /* re_insert_into_group */
|
||||
|
||||
/**
|
||||
* Enclose the given bytecode to a group and inster jump value
|
||||
*
|
||||
* @return empty ecma value - if inserted successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
uint32_t group_start_offset, /**< offset of group start */
|
||||
uint32_t idx, /**< index of group */
|
||||
bool is_capturable) /**< is capturable group */
|
||||
{
|
||||
re_insert_u32 (re_ctx_p->bytecode_ctx_p,
|
||||
group_start_offset,
|
||||
re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - group_start_offset);
|
||||
return re_insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable);
|
||||
} /* re_insert_into_group_with_jump */
|
||||
|
||||
/**
|
||||
* Append a character class range to the bytecode
|
||||
*/
|
||||
static void
|
||||
re_append_char_class (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
lit_code_point_t start, /**< character class range from */
|
||||
lit_code_point_t end) /**< character class range to */
|
||||
{
|
||||
re_ctx_p->parser_ctx_p->classes_count++;
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (re_ctx_p->flags & RE_FLAG_UNICODE)
|
||||
{
|
||||
re_append_u32 (re_ctx_p->bytecode_ctx_p, ecma_regexp_canonicalize (start, re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
re_append_u32 (re_ctx_p->bytecode_ctx_p, ecma_regexp_canonicalize (end, re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
return;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
JERRY_ASSERT (start <= LIT_UTF16_CODE_UNIT_MAX);
|
||||
JERRY_ASSERT (end <= LIT_UTF16_CODE_UNIT_MAX);
|
||||
|
||||
re_append_char (re_ctx_p->bytecode_ctx_p,
|
||||
(ecma_char_t) ecma_regexp_canonicalize (start,
|
||||
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
re_append_char (re_ctx_p->bytecode_ctx_p,
|
||||
(ecma_char_t) ecma_regexp_canonicalize (end,
|
||||
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
} /* re_append_char_class */
|
||||
|
||||
/**
|
||||
* Read the input pattern and parse the range of character class
|
||||
*
|
||||
* @return empty ecma value - if parsed successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
re_token_t *out_token_p) /**< [out] output token */
|
||||
{
|
||||
re_parser_ctx_t *const parser_ctx_p = re_ctx_p->parser_ctx_p;
|
||||
out_token_p->qmax = out_token_p->qmin = 1;
|
||||
parser_ctx_p->classes_count = 0;
|
||||
|
||||
lit_code_point_t start = LIT_CHAR_UNDEF;
|
||||
bool is_range = false;
|
||||
const bool is_char_class = (re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS
|
||||
|| re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS);
|
||||
|
||||
const ecma_char_t prev_char = lit_cesu8_peek_prev (parser_ctx_p->input_curr_p);
|
||||
if (prev_char != LIT_CHAR_LEFT_SQUARE && prev_char != LIT_CHAR_CIRCUMFLEX)
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
|
||||
}
|
||||
|
||||
lit_code_point_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
if (start != LIT_CHAR_UNDEF)
|
||||
{
|
||||
re_append_char_class (re_ctx_p, start, start);
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (ch == LIT_CHAR_MINUS)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '-'"));
|
||||
}
|
||||
|
||||
if (start != LIT_CHAR_UNDEF
|
||||
&& !is_range
|
||||
&& *parser_ctx_p->input_curr_p != LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
is_range = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (ch == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\'"));
|
||||
}
|
||||
|
||||
ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_LOWERCASE_B)
|
||||
{
|
||||
ch = LIT_CHAR_BS;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_F)
|
||||
{
|
||||
ch = LIT_CHAR_FF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_N)
|
||||
{
|
||||
ch = LIT_CHAR_LF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_T)
|
||||
{
|
||||
ch = LIT_CHAR_TAB;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_R)
|
||||
{
|
||||
ch = LIT_CHAR_CR;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_V)
|
||||
{
|
||||
ch = LIT_CHAR_VTAB;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_C)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
|
||||
{
|
||||
ch = *parser_ctx_p->input_curr_p;
|
||||
|
||||
if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
||||
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (ch >= LIT_CHAR_0 && ch <= LIT_CHAR_9))
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.10 (Point 3) */
|
||||
ch = (ch % 32);
|
||||
parser_ctx_p->input_curr_p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
ch = LIT_CHAR_LOWERCASE_C;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_X && re_hex_lookup (parser_ctx_p, 2))
|
||||
{
|
||||
ecma_char_t code_unit;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'"));
|
||||
}
|
||||
|
||||
parser_ctx_p->input_curr_p += 2;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = code_unit;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_U && re_hex_lookup (parser_ctx_p, 4))
|
||||
{
|
||||
ecma_char_t code_unit;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'"));
|
||||
}
|
||||
|
||||
parser_ctx_p->input_curr_p += 4;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = code_unit;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_D)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_BEGIN, LIT_CHAR_ASCII_DIGITS_END);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_D)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_ASCII_DIGITS_BEGIN - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_END + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_S)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_TAB, LIT_CHAR_CR);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_SP, LIT_CHAR_SP);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NBSP, LIT_CHAR_NBSP);
|
||||
re_append_char_class (re_ctx_p, 0x1680UL, 0x1680UL); /* Ogham Space Mark */
|
||||
re_append_char_class (re_ctx_p, 0x180EUL, 0x180EUL); /* Mongolian Vowel Separator */
|
||||
re_append_char_class (re_ctx_p, 0x2000UL, 0x200AUL); /* En Quad - Hair Space */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_LS, LIT_CHAR_PS);
|
||||
re_append_char_class (re_ctx_p, 0x202FUL, 0x202FUL); /* Narrow No-Break Space */
|
||||
re_append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); /* Medium Mathematical Space */
|
||||
re_append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); /* Ideographic Space */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_BOM, LIT_CHAR_BOM);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_S)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_TAB - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_CR + 1, LIT_CHAR_SP - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_SP + 1, LIT_CHAR_NBSP - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NBSP + 1, 0x167FUL);
|
||||
re_append_char_class (re_ctx_p, 0x1681UL, 0x180DUL);
|
||||
re_append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL);
|
||||
re_append_char_class (re_ctx_p, 0x200BUL, LIT_CHAR_LS - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_PS + 1, 0x202EUL);
|
||||
re_append_char_class (re_ctx_p, 0x2030UL, 0x205EUL);
|
||||
re_append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL);
|
||||
re_append_char_class (re_ctx_p, 0x3001UL, LIT_CHAR_BOM - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_BOM + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_W)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_0, LIT_CHAR_9);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_A, LIT_CHAR_UPPERCASE_Z);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE, LIT_CHAR_UNDERSCORE);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_A, LIT_CHAR_LOWERCASE_Z);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_W)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_0 - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_9 + 1, LIT_CHAR_UPPERCASE_A - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_Z + 1, LIT_CHAR_UNDERSCORE - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE + 1, LIT_CHAR_LOWERCASE_A - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (lit_char_is_octal_digit ((ecma_char_t) ch))
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
ch = (ecma_char_t) re_parse_octal (parser_ctx_p);
|
||||
}
|
||||
} /* ch == LIT_CHAR_BACKSLASH */
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (re_ctx_p->flags & RE_FLAG_UNICODE
|
||||
&& lit_is_code_point_utf16_high_surrogate (ch)
|
||||
&& parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
|
||||
{
|
||||
const ecma_char_t next_ch = lit_cesu8_peek_next (parser_ctx_p->input_curr_p);
|
||||
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||
{
|
||||
ch = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) ch, next_ch);
|
||||
lit_utf8_incr (&parser_ctx_p->input_curr_p);
|
||||
}
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
if (start != LIT_CHAR_UNDEF)
|
||||
{
|
||||
if (is_range)
|
||||
{
|
||||
if (start > ch)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, wrong order"));
|
||||
}
|
||||
else
|
||||
{
|
||||
re_append_char_class (re_ctx_p, start, ch);
|
||||
start = LIT_CHAR_UNDEF;
|
||||
is_range = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
re_append_char_class (re_ctx_p, start, start);
|
||||
start = ch;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
start = ch;
|
||||
}
|
||||
}
|
||||
while (is_char_class);
|
||||
|
||||
return re_parse_iterator (parser_ctx_p, out_token_p);
|
||||
} /* re_parse_char_class */
|
||||
|
||||
/**
|
||||
* Parse alternatives
|
||||
*
|
||||
* @return empty ecma value - if alternative was successfully parsed
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
bool expect_eof) /**< expect end of file */
|
||||
{
|
||||
ECMA_CHECK_STACK_USAGE ();
|
||||
uint32_t idx;
|
||||
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
|
||||
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
|
||||
|
||||
uint32_t alternative_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
|
||||
while (ecma_is_value_empty (ret_value))
|
||||
{
|
||||
ecma_value_t next_token_result = re_parse_next_token (re_ctx_p->parser_ctx_p,
|
||||
&(re_ctx_p->current_token));
|
||||
if (ECMA_IS_VALUE_ERROR (next_token_result))
|
||||
{
|
||||
return next_token_result;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_empty (next_token_result));
|
||||
|
||||
uint32_t new_atom_start_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
|
||||
switch (re_ctx_p->current_token.type)
|
||||
{
|
||||
case RE_TOK_START_CAPTURE_GROUP:
|
||||
{
|
||||
idx = re_ctx_p->captures_count++;
|
||||
JERRY_TRACE_MSG ("Compile a capture group start (idx: %u)\n", (unsigned int) idx);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
|
||||
if (ecma_is_value_empty (ret_value))
|
||||
{
|
||||
ret_value = re_insert_into_group (re_ctx_p, new_atom_start_offset, idx, true);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_TOK_START_NON_CAPTURE_GROUP:
|
||||
{
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
JERRY_TRACE_MSG ("Compile a non-capture group start (idx: %u)\n", (unsigned int) idx);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
|
||||
if (ecma_is_value_empty (ret_value))
|
||||
{
|
||||
ret_value = re_insert_into_group (re_ctx_p, new_atom_start_offset, idx, false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_TOK_CHAR:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile character token: %c, qmin: %u, qmax: %u\n",
|
||||
(char) re_ctx_p->current_token.value, (unsigned int) re_ctx_p->current_token.qmin,
|
||||
(unsigned int) re_ctx_p->current_token.qmax);
|
||||
|
||||
re_append_opcode (bc_ctx_p, RE_OP_CHAR);
|
||||
re_append_char (bc_ctx_p, (ecma_char_t) ecma_regexp_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
|
||||
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
|
||||
ret_value = re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_PERIOD:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a period\n");
|
||||
re_append_opcode (bc_ctx_p, RE_OP_PERIOD);
|
||||
|
||||
ret_value = re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ALTERNATIVE:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile an alternative\n");
|
||||
re_insert_u32 (bc_ctx_p, alternative_offset, re_get_bytecode_length (bc_ctx_p) - alternative_offset);
|
||||
re_append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE);
|
||||
alternative_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_START:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a start assertion\n");
|
||||
re_append_opcode (bc_ctx_p, RE_OP_ASSERT_START);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_END:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile an end assertion\n");
|
||||
re_append_opcode (bc_ctx_p, RE_OP_ASSERT_END);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_WORD_BOUNDARY:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a word boundary assertion\n");
|
||||
re_append_opcode (bc_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_NOT_WORD_BOUNDARY:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a not word boundary assertion\n");
|
||||
re_append_opcode (bc_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_START_POS_LOOKAHEAD:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a positive lookahead assertion\n");
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
|
||||
if (ecma_is_value_empty (ret_value))
|
||||
{
|
||||
re_append_opcode (bc_ctx_p, RE_OP_MATCH);
|
||||
|
||||
ret_value = re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_START_NEG_LOOKAHEAD:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a negative lookahead assertion\n");
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
|
||||
if (ecma_is_value_empty (ret_value))
|
||||
{
|
||||
re_append_opcode (bc_ctx_p, RE_OP_MATCH);
|
||||
|
||||
ret_value = re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_TOK_BACKREFERENCE:
|
||||
{
|
||||
uint32_t backref = (uint32_t) re_ctx_p->current_token.value;
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
|
||||
if (backref > re_ctx_p->highest_backref)
|
||||
{
|
||||
re_ctx_p->highest_backref = backref;
|
||||
}
|
||||
|
||||
JERRY_TRACE_MSG ("Compile a backreference: %u\n", (unsigned int) backref);
|
||||
re_append_opcode (bc_ctx_p, RE_OP_BACKREFERENCE);
|
||||
re_append_u32 (bc_ctx_p, backref);
|
||||
|
||||
ret_value = re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_DIGIT:
|
||||
case RE_TOK_NOT_DIGIT:
|
||||
case RE_TOK_WHITE:
|
||||
case RE_TOK_NOT_WHITE:
|
||||
case RE_TOK_WORD_CHAR:
|
||||
case RE_TOK_NOT_WORD_CHAR:
|
||||
case RE_TOK_START_CHAR_CLASS:
|
||||
case RE_TOK_START_INV_CHAR_CLASS:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a character class\n");
|
||||
re_append_opcode (bc_ctx_p,
|
||||
re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS
|
||||
? RE_OP_INV_CHAR_CLASS
|
||||
: RE_OP_CHAR_CLASS);
|
||||
uint32_t offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
|
||||
ret_value = re_parse_char_class (re_ctx_p,
|
||||
&(re_ctx_p->current_token));
|
||||
|
||||
if (!ECMA_IS_VALUE_ERROR (ret_value))
|
||||
{
|
||||
re_insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->classes_count);
|
||||
ret_value = re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RE_TOK_END_GROUP:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a group end\n");
|
||||
|
||||
if (expect_eof)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of paren."));
|
||||
}
|
||||
|
||||
re_insert_u32 (bc_ctx_p, alternative_offset, re_get_bytecode_length (bc_ctx_p) - alternative_offset);
|
||||
return ECMA_VALUE_EMPTY;
|
||||
}
|
||||
case RE_TOK_EOF:
|
||||
{
|
||||
if (!expect_eof)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern."));
|
||||
}
|
||||
|
||||
re_insert_u32 (bc_ctx_p, alternative_offset, re_get_bytecode_length (bc_ctx_p) - alternative_offset);
|
||||
return ECMA_VALUE_EMPTY;
|
||||
}
|
||||
default:
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected RegExp token."));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret_value;
|
||||
} /* re_parse_alternative */
|
||||
|
||||
/**
|
||||
* Search for the given pattern in the RegExp cache
|
||||
*
|
||||
* @return index of bytecode in cache - if found
|
||||
* RE_CACHE_SIZE - otherwise
|
||||
*/
|
||||
static uint8_t
|
||||
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
|
||||
static re_compiled_code_t *
|
||||
re_cache_lookup (ecma_string_t *pattern_str_p, /**< pattern string */
|
||||
uint16_t flags) /**< flags */
|
||||
{
|
||||
uint8_t free_idx = RE_CACHE_SIZE;
|
||||
re_compiled_code_t **cache_p = JERRY_CONTEXT (re_cache);
|
||||
|
||||
for (uint8_t idx = 0u; idx < RE_CACHE_SIZE; idx++)
|
||||
{
|
||||
const re_compiled_code_t *cached_bytecode_p = JERRY_CONTEXT (re_cache)[idx];
|
||||
re_compiled_code_t *cached_bytecode_p = cache_p[idx];
|
||||
|
||||
if (cached_bytecode_p != NULL)
|
||||
if (cached_bytecode_p == NULL)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->source);
|
||||
|
||||
if ((cached_bytecode_p->header.status_flags & RE_FLAGS_MASK) == flags
|
||||
&& ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
|
||||
{
|
||||
JERRY_TRACE_MSG ("RegExp is found in cache\n");
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* mark as free, so it can be overridden if the cache is full */
|
||||
free_idx = idx;
|
||||
return cached_bytecode_p;
|
||||
}
|
||||
}
|
||||
|
||||
JERRY_TRACE_MSG ("RegExp is NOT found in cache\n");
|
||||
return free_idx;
|
||||
} /* re_find_bytecode_in_cache */
|
||||
return NULL;
|
||||
} /* re_cache_lookup */
|
||||
|
||||
/**
|
||||
* Run gerbage collection in RegExp cache
|
||||
* Run garbage collection in RegExp cache.
|
||||
*/
|
||||
void
|
||||
re_cache_gc_run (void)
|
||||
re_cache_gc (void)
|
||||
{
|
||||
re_compiled_code_t **cache_p = JERRY_CONTEXT (re_cache);
|
||||
|
||||
for (uint32_t i = 0u; i < RE_CACHE_SIZE; i++)
|
||||
{
|
||||
const re_compiled_code_t *cached_bytecode_p = JERRY_CONTEXT (re_cache)[i];
|
||||
const re_compiled_code_t *cached_bytecode_p = cache_p[i];
|
||||
|
||||
if (cached_bytecode_p != NULL
|
||||
&& cached_bytecode_p->header.refs == 1)
|
||||
if (cached_bytecode_p == NULL)
|
||||
{
|
||||
/* Only the cache has reference for the bytecode */
|
||||
break;
|
||||
}
|
||||
|
||||
ecma_bytecode_deref ((ecma_compiled_code_t *) cached_bytecode_p);
|
||||
JERRY_CONTEXT (re_cache)[i] = NULL;
|
||||
cache_p[i] = NULL;
|
||||
}
|
||||
}
|
||||
} /* re_cache_gc_run */
|
||||
|
||||
JERRY_CONTEXT (re_cache_idx) = 0;
|
||||
} /* re_cache_gc */
|
||||
|
||||
/**
|
||||
* Compilation of RegExp bytecode
|
||||
*
|
||||
* @return empty ecma value - if bytecode was compiled successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
* @return pointer to bytecode if compilation was successful
|
||||
* NULL - otherwise
|
||||
*/
|
||||
ecma_value_t
|
||||
re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] pointer to bytecode */
|
||||
ecma_string_t *pattern_str_p, /**< pattern */
|
||||
re_compiled_code_t *
|
||||
re_compile_bytecode (ecma_string_t *pattern_str_p, /**< pattern */
|
||||
uint16_t flags) /**< flags */
|
||||
{
|
||||
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
|
||||
uint8_t cache_idx = re_find_bytecode_in_cache (pattern_str_p, flags);
|
||||
re_compiled_code_t *cached_bytecode_p = re_cache_lookup (pattern_str_p, flags);
|
||||
|
||||
if (cache_idx < RE_CACHE_SIZE)
|
||||
if (cached_bytecode_p != NULL)
|
||||
{
|
||||
*out_bytecode_p = JERRY_CONTEXT (re_cache)[cache_idx];
|
||||
|
||||
if (*out_bytecode_p != NULL)
|
||||
{
|
||||
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
|
||||
return ret_value;
|
||||
}
|
||||
ecma_bytecode_ref ((ecma_compiled_code_t *) cached_bytecode_p);
|
||||
return cached_bytecode_p;
|
||||
}
|
||||
|
||||
/* not in the RegExp cache, so compile it */
|
||||
re_compiler_ctx_t re_ctx;
|
||||
re_ctx.flags = flags;
|
||||
re_ctx.highest_backref = 0;
|
||||
re_ctx.captures_count = 1;
|
||||
re_ctx.non_captures_count = 0;
|
||||
|
||||
re_bytecode_ctx_t bc_ctx;
|
||||
re_ctx.bytecode_ctx_p = &bc_ctx;
|
||||
re_initialize_regexp_bytecode (&bc_ctx);
|
||||
re_initialize_regexp_bytecode (&re_ctx);
|
||||
|
||||
ECMA_STRING_TO_UTF8_STRING (pattern_str_p, pattern_start_p, pattern_start_size);
|
||||
|
||||
re_parser_ctx_t parser_ctx;
|
||||
parser_ctx.input_start_p = pattern_start_p;
|
||||
parser_ctx.input_curr_p = (lit_utf8_byte_t *) pattern_start_p;
|
||||
parser_ctx.input_end_p = pattern_start_p + pattern_start_size;
|
||||
parser_ctx.groups_count = -1;
|
||||
re_ctx.parser_ctx_p = &parser_ctx;
|
||||
re_ctx.input_start_p = pattern_start_p;
|
||||
re_ctx.input_curr_p = (lit_utf8_byte_t *) pattern_start_p;
|
||||
re_ctx.input_end_p = pattern_start_p + pattern_start_size;
|
||||
re_ctx.groups_count = -1;
|
||||
|
||||
/* Parse RegExp pattern */
|
||||
re_ctx.captures_count = 1;
|
||||
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
|
||||
|
||||
ecma_value_t result = re_parse_alternative (&re_ctx, true);
|
||||
|
||||
ECMA_FINALIZE_UTF8_STRING (pattern_start_p, pattern_start_size);
|
||||
|
||||
if (ECMA_IS_VALUE_ERROR (result))
|
||||
{
|
||||
ret_value = result;
|
||||
/* Compilation failed, free bytecode. */
|
||||
jmem_heap_free_block (re_ctx.bytecode_start_p, re_ctx.bytecode_size);
|
||||
return NULL;
|
||||
}
|
||||
/* Check for invalid backreference */
|
||||
else if (re_ctx.highest_backref >= re_ctx.captures_count)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
|
||||
re_append_opcode (&bc_ctx, RE_OP_EOF);
|
||||
|
||||
/* Initialize bytecode header */
|
||||
re_compiled_code_t *re_compiled_code_p = (re_compiled_code_t *) bc_ctx.block_start_p;
|
||||
re_compiled_code_p->header.refs = 1;
|
||||
/* Align bytecode size to JMEM_ALIGNMENT so that it can be stored in the bytecode header. */
|
||||
const uint32_t final_size = JERRY_ALIGNUP (re_ctx.bytecode_size, JMEM_ALIGNMENT);
|
||||
re_compiled_code_t *re_compiled_code_p = (re_compiled_code_t *) jmem_heap_realloc_block (re_ctx.bytecode_start_p,
|
||||
re_ctx.bytecode_size,
|
||||
final_size);
|
||||
|
||||
/* Bytecoded will be inserted into the cache and returned to the caller, so refcount is implicitly set to 2. */
|
||||
re_compiled_code_p->header.refs = 2;
|
||||
re_compiled_code_p->header.size = (uint16_t) (final_size >> JMEM_ALIGNMENT_LOG);
|
||||
re_compiled_code_p->header.status_flags = re_ctx.flags;
|
||||
|
||||
ecma_ref_ecma_string (pattern_str_p);
|
||||
re_compiled_code_p->source = ecma_make_string_value (pattern_str_p);
|
||||
re_compiled_code_p->captures_count = re_ctx.captures_count;
|
||||
re_compiled_code_p->non_captures_count = re_ctx.non_captures_count;
|
||||
}
|
||||
|
||||
size_t byte_code_size = (size_t) (bc_ctx.block_end_p - bc_ctx.block_start_p);
|
||||
|
||||
if (!ecma_is_value_empty (ret_value))
|
||||
{
|
||||
/* Compilation failed, free bytecode. */
|
||||
JERRY_TRACE_MSG ("RegExp compilation failed!\n");
|
||||
jmem_heap_free_block (bc_ctx.block_start_p, byte_code_size);
|
||||
*out_bytecode_p = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
|
||||
if (JERRY_CONTEXT (jerry_init_flags) & ECMA_INIT_SHOW_REGEXP_OPCODES)
|
||||
{
|
||||
re_dump_bytecode (&bc_ctx);
|
||||
re_dump_bytecode (&re_ctx);
|
||||
}
|
||||
#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
|
||||
|
||||
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
|
||||
((re_compiled_code_t *) bc_ctx.block_start_p)->header.size = (uint16_t) (byte_code_size >> JMEM_ALIGNMENT_LOG);
|
||||
uint8_t cache_idx = JERRY_CONTEXT (re_cache_idx);
|
||||
|
||||
if (cache_idx == RE_CACHE_SIZE)
|
||||
{
|
||||
if (JERRY_CONTEXT (re_cache_idx) == RE_CACHE_SIZE)
|
||||
{
|
||||
JERRY_CONTEXT (re_cache_idx) = 0;
|
||||
}
|
||||
|
||||
JERRY_TRACE_MSG ("RegExp cache is full! Remove the element on idx: %d\n", JERRY_CONTEXT (re_cache_idx));
|
||||
|
||||
cache_idx = JERRY_CONTEXT (re_cache_idx)++;
|
||||
|
||||
/* The garbage collector might run during the byte code
|
||||
* allocations above and it may free this entry. */
|
||||
if (JERRY_CONTEXT (re_cache)[cache_idx] != NULL)
|
||||
{
|
||||
ecma_bytecode_deref ((ecma_compiled_code_t *) JERRY_CONTEXT (re_cache)[cache_idx]);
|
||||
}
|
||||
}
|
||||
|
||||
JERRY_TRACE_MSG ("Insert bytecode into RegExp cache (idx: %d).\n", cache_idx);
|
||||
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
|
||||
JERRY_CONTEXT (re_cache)[cache_idx] = *out_bytecode_p;
|
||||
}
|
||||
JERRY_CONTEXT (re_cache)[cache_idx] = re_compiled_code_p;
|
||||
JERRY_CONTEXT (re_cache_idx) = (uint8_t) (cache_idx + 1) % RE_CACHE_SIZE;
|
||||
|
||||
return ret_value;
|
||||
return re_compiled_code_p;
|
||||
} /* re_compile_bytecode */
|
||||
|
||||
/**
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
#include "ecma-globals.h"
|
||||
#include "re-bytecode.h"
|
||||
#include "re-parser.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
@@ -32,24 +31,10 @@
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Context of RegExp compiler
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
uint32_t captures_count; /**< number of capture groups */
|
||||
uint32_t non_captures_count; /**< number of non-capture groups */
|
||||
uint32_t highest_backref; /**< highest backreference */
|
||||
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
|
||||
re_token_t current_token; /**< current token */
|
||||
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
|
||||
} re_compiler_ctx_t;
|
||||
re_compiled_code_t *
|
||||
re_compile_bytecode (ecma_string_t *pattern_str_p, uint16_t flags);
|
||||
|
||||
ecma_value_t
|
||||
re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, ecma_string_t *pattern_str_p, uint16_t flags);
|
||||
|
||||
void re_cache_gc_run (void);
|
||||
void re_cache_gc (void);
|
||||
|
||||
/**
|
||||
* @}
|
||||
|
||||
+1114
-433
File diff suppressed because it is too large
Load Diff
@@ -18,45 +18,18 @@
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
|
||||
#include "re-compiler-context.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_bytecode Bytecode
|
||||
* \addtogroup regexparser_parser Parser
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp token type definitions
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_TOK_EOF, /**< EOF */
|
||||
RE_TOK_BACKREFERENCE, /**< "\[0..9]" */
|
||||
RE_TOK_CHAR, /**< any character */
|
||||
RE_TOK_ALTERNATIVE, /**< "|" */
|
||||
RE_TOK_ASSERT_START, /**< "^" */
|
||||
RE_TOK_ASSERT_END, /**< "$" */
|
||||
RE_TOK_PERIOD, /**< "." */
|
||||
RE_TOK_START_CAPTURE_GROUP, /**< "(" */
|
||||
RE_TOK_START_NON_CAPTURE_GROUP, /**< "(?:" */
|
||||
RE_TOK_END_GROUP, /**< ")" */
|
||||
RE_TOK_ASSERT_START_POS_LOOKAHEAD, /**< "(?=" */
|
||||
RE_TOK_ASSERT_START_NEG_LOOKAHEAD, /**< "(?!" */
|
||||
RE_TOK_ASSERT_WORD_BOUNDARY, /**< "\b" */
|
||||
RE_TOK_ASSERT_NOT_WORD_BOUNDARY, /**< "\B" */
|
||||
RE_TOK_DIGIT, /**< "\d" */
|
||||
RE_TOK_NOT_DIGIT, /**< "\D" */
|
||||
RE_TOK_WHITE, /**< "\s" */
|
||||
RE_TOK_NOT_WHITE, /**< "\S" */
|
||||
RE_TOK_WORD_CHAR, /**< "\w" */
|
||||
RE_TOK_NOT_WORD_CHAR, /**< "\W" */
|
||||
RE_TOK_START_CHAR_CLASS, /**< "[ ]" */
|
||||
RE_TOK_START_INV_CHAR_CLASS, /**< "[^ ]" */
|
||||
} re_token_type_t;
|
||||
|
||||
/**
|
||||
* @}
|
||||
*
|
||||
@@ -65,43 +38,16 @@ typedef enum
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp constant of infinite
|
||||
* Value used for infinite quantifier.
|
||||
*/
|
||||
#define RE_ITERATOR_INFINITE ((uint32_t) - 1)
|
||||
#define RE_INFINITY UINT32_MAX
|
||||
|
||||
/**
|
||||
* Maximum number of decimal escape digits
|
||||
* Maximum decimal value of an octal escape
|
||||
*/
|
||||
#define RE_MAX_RE_DECESC_DIGITS 9
|
||||
#define RE_MAX_OCTAL_VALUE 0xff
|
||||
|
||||
/**
|
||||
* RegExp token type
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
re_token_type_t type; /**< type of the token */
|
||||
uint32_t value; /**< value of the token */
|
||||
uint32_t qmin; /**< minimum number of token iterations */
|
||||
uint32_t qmax; /**< maximum number of token iterations */
|
||||
bool greedy; /**< type of iteration */
|
||||
} re_token_t;
|
||||
|
||||
/**
|
||||
* RegExp parser context
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input pattern */
|
||||
const lit_utf8_byte_t *input_curr_p; /**< current position in input pattern */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input pattern */
|
||||
int groups_count; /**< number of groups */
|
||||
uint32_t classes_count; /**< number of character classes */
|
||||
} re_parser_ctx_t;
|
||||
|
||||
bool re_hex_lookup (re_parser_ctx_t *parser_ctx_p, uint32_t lookup);
|
||||
uint32_t re_parse_octal (re_parser_ctx_t *parser_ctx_p);
|
||||
ecma_value_t re_parse_iterator (re_parser_ctx_t *parser_ctx_p, re_token_t *re_token_p);
|
||||
ecma_value_t re_parse_next_token (re_parser_ctx_t *parser_ctx_p, re_token_t *out_token_p);
|
||||
ecma_value_t re_parse_alternative (re_compiler_ctx_t *re_ctx_p, bool expect_eof);
|
||||
|
||||
/**
|
||||
* @}
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RE_TOKEN_H
|
||||
#define RE_TOKEN_H
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_parser Parser
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp token type definitions
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_TOK_EOF, /**< EOF */
|
||||
RE_TOK_BACKREFERENCE, /**< "\[0..9]" */
|
||||
RE_TOK_ALTERNATIVE, /**< "|" */
|
||||
RE_TOK_ASSERT_START, /**< "^" */
|
||||
RE_TOK_ASSERT_END, /**< "$" */
|
||||
RE_TOK_PERIOD, /**< "." */
|
||||
RE_TOK_START_CAPTURE_GROUP, /**< "(" */
|
||||
RE_TOK_START_NON_CAPTURE_GROUP, /**< "(?:" */
|
||||
RE_TOK_END_GROUP, /**< ")" */
|
||||
RE_TOK_ASSERT_LOOKAHEAD, /**< "(?=" */
|
||||
RE_TOK_ASSERT_WORD_BOUNDARY, /**< "\b" */
|
||||
RE_TOK_ASSERT_NOT_WORD_BOUNDARY, /**< "\B" */
|
||||
RE_TOK_CLASS_ESCAPE, /**< "\d \D \w \W \s \S" */
|
||||
RE_TOK_CHAR_CLASS, /**< "[ ]" */
|
||||
RE_TOK_CHAR, /**< any character */
|
||||
} re_token_type_t;
|
||||
|
||||
/**
|
||||
* RegExp token
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t value; /**< value of the token */
|
||||
uint32_t qmin; /**< minimum number of token iterations */
|
||||
uint32_t qmax; /**< maximum number of token iterations */
|
||||
re_token_type_t type; /**< type of the token */
|
||||
bool greedy; /**< type of iteration */
|
||||
} re_token_t;
|
||||
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
|
||||
#endif /* !RE_TOKEN_H */
|
||||
@@ -0,0 +1,361 @@
|
||||
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
var result = /\0/.exec("\u0000");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u0000");
|
||||
|
||||
result = /\0/u.exec("\u0000");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u0000");
|
||||
|
||||
result = /\000/.exec("\u0000");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u0000");
|
||||
|
||||
try {
|
||||
new RegExp("\\000", 'u').exec("\u0000");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /\0000/.exec("\u0000\u0030");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u0000\u0030");
|
||||
|
||||
result = /\377/.exec("\u00ff");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u00ff");
|
||||
|
||||
try {
|
||||
new RegExp("\\377", 'u').exec("\u00ff");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /\3777/.exec("\u00ff\u0037");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u00ff\u0037");
|
||||
|
||||
try {
|
||||
new RegExp("\\3777", 'u').exec("\u00ff\u0037");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /\400/.exec("\u0020\u0030");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\u0020\u0030");
|
||||
|
||||
try {
|
||||
new RegExp("\\400", 'u').exec("\u0020\u0030");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /(\1)/.exec("\u0001");
|
||||
assert (result !== null);
|
||||
assert (result[0].length === 0);
|
||||
|
||||
result = /(\1)/u.exec("\u0001");
|
||||
assert (result !== null);
|
||||
assert (result[0].length === 0);
|
||||
|
||||
result = /(\2)/.exec("\u0002");
|
||||
assert (result !== null);
|
||||
assert (result[0] === '\u0002');
|
||||
|
||||
try {
|
||||
new RegExp("(\\2)", 'u').exec("\u0002");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /\8/.exec("\u0038");
|
||||
assert (result !== null);
|
||||
assert (result[0] === '8');
|
||||
|
||||
result = /\99/.exec("\u0039\u0039");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "99");
|
||||
|
||||
// CharClassEscape
|
||||
assert (/\d+/.exec("123")[0] === "123");
|
||||
assert (/\D+/.exec("abc")[0] === "abc");
|
||||
assert (/\s+/.exec(" ")[0] === " ");
|
||||
assert (/\S+/.exec("abc")[0] === "abc");
|
||||
assert (/\w+/.exec("abc")[0] === "abc");
|
||||
assert (/\W+/.exec("|||")[0] === "|||");
|
||||
assert (/\d+/u.exec("123")[0] === "123");
|
||||
assert (/\D+/u.exec("abc")[0] === "abc");
|
||||
assert (/\s+/u.exec(" ")[0] === " ");
|
||||
assert (/\S+/u.exec("abc")[0] === "abc");
|
||||
assert (/\w+/u.exec("abc")[0] === "abc");
|
||||
assert (/\W+/u.exec("|||")[0] === "|||");
|
||||
|
||||
assert (/\d+/u.exec("\u{10CAF}") === null);
|
||||
assert (/\D+/u.exec("\u{10CAF}")[0] === "\u{10CAF}");
|
||||
assert (/\s+/u.exec("\u{10CAF}") === null);
|
||||
assert (/\S+/u.exec("\u{10CAF}")[0] === "\u{10CAF}");
|
||||
assert (/\w+/u.exec("\u{10CAF}") === null);
|
||||
assert (/\W+/u.exec("\u{10CAF}")[0] === "\u{10CAF}");
|
||||
|
||||
result = /\xz/.exec("xz");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "xz");
|
||||
|
||||
try {
|
||||
new RegExp("\\xz", "u").exec("xz");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /\c/.exec("\\c");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\\c");
|
||||
|
||||
try {
|
||||
new RegExp("\\c", 'u').exec("\\c")
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
result = /\c1/.exec("\\c1");
|
||||
assert (result !== null);
|
||||
assert (result[0] === "\\c1");
|
||||
|
||||
try {
|
||||
new RegExp("\\c1", 'u').exec("\\c1");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("^+");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("$+");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("\\b+");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("\\B+");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/[\b]/.exec("\u0008")[0] === "\u0008");
|
||||
assert (/[\b]/u.exec("\u0008")[0] === "\u0008");
|
||||
assert (/[\B]/.exec("\u0042")[0] === "\u0042");
|
||||
|
||||
try {
|
||||
new RegExp ("[\\B]", 'u').exec("\u0042");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/[\c1]/.exec("\u0011")[0] === "\u0011");
|
||||
assert (/[\c_]/.exec("\u001f")[0] === "\u001f");
|
||||
assert (/[\c]/.exec("\\")[0] === "\\");
|
||||
assert (/[\c]/.exec("c")[0] === "c");
|
||||
|
||||
try {
|
||||
new RegExp("[\\c1]", 'u');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("[\\c]", 'u');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("[\\c_]", 'u');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/{{1,2}/.exec("{{")[0] === "{{");
|
||||
|
||||
try {
|
||||
new RegExp("{{1,2}", 'u').exec("{{");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/a{1,2/.exec("a{1,2")[0] === "a{1,2");
|
||||
|
||||
try {
|
||||
new RegExp("a{1,2", 'u').exec("a{1,2");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/\u017f/i.exec("s") === null);
|
||||
assert (/\u017f/ui.exec("s")[0] === "s");
|
||||
|
||||
assert (/𐲯/.exec("𐲯")[0] === "𐲯");
|
||||
assert (/𐲯/u.exec("𐲯")[0] === "𐲯");
|
||||
assert (/𐲯*?/.exec("𐲯")[0] === "\ud803");
|
||||
assert (/𐲯*?/u.exec("𐲯")[0] === "");
|
||||
assert (/𐲯+/.exec("𐲯𐲯𐲯")[0] === "𐲯");
|
||||
assert (/𐲯+/u.exec("𐲯𐲯𐲯")[0] === "𐲯𐲯𐲯");
|
||||
|
||||
assert (/\ud803\udc96*?/.exec("𐲖")[0] === '\ud803');
|
||||
assert (/\ud803\udc96*?/u.exec("𐲖")[0] === '');
|
||||
assert (/\ud803\udc96+/.exec("𐲖𐲖𐲖")[0] === '𐲖');
|
||||
assert (/\ud803\udc96+/u.exec("𐲖𐲖𐲖")[0] === '𐲖𐲖𐲖');
|
||||
|
||||
assert (/.*𐲗𐲘/u.exec("𐲓𐲔𐲕𐲖𐲗𐲘")[0] === '𐲓𐲔𐲕𐲖𐲗𐲘');
|
||||
|
||||
assert (/[\u{10000}]/.exec("\u{10000}") === null);
|
||||
assert (/[\u{10000}]/.exec("{")[0] === "{");
|
||||
assert (/[^\u{10000}]/.exec("\u{10000}")[0] === "\ud800");
|
||||
assert (/[^\u{10000}]/.exec("{") === null);
|
||||
|
||||
assert (/[\uffff]/.exec("\uffff")[0] === "\uffff");
|
||||
assert (/[^\uffff]/.exec("\uffff") === null);
|
||||
|
||||
assert (/[\u{10000}]/u.exec("\u{10000}")[0] === "\u{10000}");
|
||||
assert (/[\u{10000}]/u.exec("{") === null);
|
||||
assert (/[^\u{10000}]/u.exec("\u{10000}") === null);
|
||||
assert (/[^\u{10000}]/u.exec("{")[0] === "{");
|
||||
|
||||
assert (/[\uffff]/u.exec("\uffff")[0] === "\uffff");
|
||||
assert (/[^\uffff]/u.exec("\uffff") === null);
|
||||
|
||||
assert (/a{4294967296,4294967297}/.exec("aaaa") === null);
|
||||
assert (/a{4294967294,4294967295}/.exec("aaaa") === null);
|
||||
assert (/a{0000000000000000001,0000000000000000002}/u.exec("aaaa")[0] === 'aa');
|
||||
assert (/(\4294967297)/.exec("\4294967297")[0] === "\4294967297");
|
||||
assert (/(\1)/u.exec("aaaa")[0] === "");
|
||||
|
||||
try {
|
||||
new RegExp("a{4294967295,4294967294}", '');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/[\d-\s]/.exec("-")[0] === "-");
|
||||
assert (/[0-\s]/.exec("-")[0] === "-");
|
||||
assert (/[\d-0]/.exec("-")[0] === "-");
|
||||
|
||||
try {
|
||||
new RegExp("[\\d-\\s]", 'u').exec("-");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("[0-\\s]", 'u').exec("-");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("[\\d-0]", 'u').exec("-");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/[-]/.exec("-")[0] === "-");
|
||||
assert (/[-]/u.exec("-")[0] === "-");
|
||||
assert (/[--]/.exec("-")[0] === "-");
|
||||
assert (/[--]/u.exec("-")[0] === "-");
|
||||
|
||||
assert (/}/.exec("}")[0] === "}");
|
||||
assert (/\}/u.exec("}")[0] === "}");
|
||||
|
||||
try {
|
||||
new RegExp("}", 'u').exec("}");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/]/.exec("]")[0] === "]");
|
||||
assert (/\]/u.exec("]")[0] === "]");
|
||||
|
||||
try {
|
||||
new RegExp("]", 'u').exec("]");
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
assert (/(?=)*/.exec("")[0] === "");
|
||||
assert (/(?=)+/.exec("")[0] === "");
|
||||
assert (/(?=){1,2}/.exec("")[0] === "");
|
||||
|
||||
try {
|
||||
new RegExp("(?=)*", 'u');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("(?=)+", 'u');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("(?=){1,2}", 'u');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp("(?=){2,1}", '');
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
@@ -58,3 +58,6 @@ assert (r.exec("a") == "a");
|
||||
|
||||
r = new RegExp ("a|bb|c|d");
|
||||
assert (r.exec("b") == undefined);
|
||||
|
||||
r = new RegExp("(?:a|b)\\b|\\.\\w+", "g");
|
||||
assert (r.exec("name.lower()")[0] === ".lower")
|
||||
|
||||
@@ -24,3 +24,6 @@ assert (r == undefined);
|
||||
r = new RegExp ("(a)*b\\1").exec("b");
|
||||
assert (r[0] == "b");
|
||||
assert (r[1] == undefined);
|
||||
|
||||
assert (JSON.stringify (/[[]?(a)\1/.exec("aa")) === '["aa","a"]');
|
||||
assert (JSON.stringify (/\1{2,5}()\B/.exec("asd")) === '["",""]');
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
assert (JSON.stringify (/(?:(a)*){3,}/.exec("aaaab")) === '["aaaa",null]');
|
||||
assert (JSON.stringify (/((a)*){3,}/.exec("aaaab")) === '["aaaa","",null]');
|
||||
assert (JSON.stringify (/((a)+){3,}/.exec("aaaab")) === '["aaaa","a","a"]');
|
||||
assert (JSON.stringify (/((.)*){3,}/.exec("abcd")) === '["abcd","",null]');
|
||||
assert (JSON.stringify (/((.)+){3,}/.exec("abcd")) === '["abcd","d","d"]');
|
||||
|
||||
assert (JSON.stringify (/((.){1,2}){1,2}/.exec("abc")) === '["abc","c","c"]');
|
||||
assert (JSON.stringify (/(?:(a)*?)asd/.exec("aaasd")) === '["aaasd","a"]');
|
||||
assert (JSON.stringify (/(?:(a)*)asd/.exec("aaasd")) === '["aaasd","a"]');
|
||||
|
||||
assert (JSON.stringify (/(.)*((a)*|(b)*)/.exec("ab")) === '["ab","b","",null,null]');
|
||||
assert (JSON.stringify (/(.)*((x)|(y))+/.exec("xy")) === '["xy","x","y",null,"y"]');
|
||||
assert (JSON.stringify (/(.)*((y)|(x))+/.exec("xy")) === '["xy","x","y","y",null]');
|
||||
|
||||
assert (JSON.stringify (/((?:a)*)/.exec("aaaad")) === '["aaaa","aaaa"]');
|
||||
assert (JSON.stringify (/((y)+|x)+/.exec("x")) === '["x","x",null]');
|
||||
assert (JSON.stringify (/((?:y)*|x)+/.exec("x")) === '["x","x"]');
|
||||
assert (JSON.stringify (/((y)*|x)+/.exec("x")) === '["x","x",null]');
|
||||
assert (JSON.stringify (/((y)*|x)*/.exec("x")) === '["x","x",null]');
|
||||
assert (JSON.stringify (/(?:(y)*|x)*/.exec("x")) === '["x",null]');
|
||||
assert (JSON.stringify (/(?:(y)*|(x))*/.exec("x")) === '["x",null,"x"]');
|
||||
|
||||
assert (JSON.stringify (/((?:a)*)asd/.exec("aaasd")) === '["aaasd","aa"]');
|
||||
assert (JSON.stringify (/((?:a)+)asd/.exec("aaasd")) === '["aaasd","aa"]');
|
||||
assert (JSON.stringify (/((?:a)*?)asd/.exec("aaasd")) === '["aaasd","aa"]');
|
||||
assert (JSON.stringify (/((?:a)+?)asd/.exec("aaasd")) === '["aaasd","aa"]');
|
||||
|
||||
assert (JSON.stringify (/((y)|(z)|(a))*/.exec("yazx")) === '["yaz","z",null,"z",null]');
|
||||
assert (JSON.stringify (/((y)|(z)|(.))*/.exec("yaz")) === '["yaz","z",null,"z",null]');
|
||||
assert (JSON.stringify (/((y)*|(z)*|(a)*)*/.exec("yazx")) === '["yaz","z",null,"z",null]')
|
||||
assert (JSON.stringify (/((y)|(z)|(a))*/.exec("yazx")) === '["yaz","z",null,"z",null]')
|
||||
assert (JSON.stringify (/(?:(y)|(z)|(a))*/.exec("yazx")) === '["yaz",null,"z",null]')
|
||||
assert (JSON.stringify (/((y)|(z)|(a))+?/.exec("yazx")) === '["y","y","y",null,null]')
|
||||
assert (JSON.stringify (/(?:(y)|(z)|(a))+?/.exec("yazx")) === '["y","y",null,null]')
|
||||
|
||||
assert (JSON.stringify (/(?:(x|y)*|z)*/.exec("yz")) === '["yz",null]');
|
||||
assert (JSON.stringify (/((x|y)*|z)*/.exec("yz")) == '["yz","z",null]');
|
||||
assert (JSON.stringify (/(((x|y)*|(v|w)*|z)*)asd/.exec("xyzwvxzasd")) === '["xyzwvxzasd","xyzwvxz","z",null,null]');
|
||||
|
||||
assert (JSON.stringify (/((a)*){1,3}b/.exec("ab")) === '["ab","a","a"]')
|
||||
assert (JSON.stringify (/((a)*){2,3}b/.exec("ab")) === '["ab","",null]')
|
||||
assert (JSON.stringify (/((a)*){3,3}b/.exec("ab")) === '["ab","",null]')
|
||||
|
||||
assert (JSON.stringify (/((a)*){3,}b/.exec("aaaab")) === '["aaaab","",null]');
|
||||
assert (JSON.stringify (/((a)*)*b/.exec("aaaab")) === '["aaaab","aaaa","a"]');
|
||||
|
||||
assert (JSON.stringify (/((bb?)*)*a/.exec("bbba")) === '["bbba","bbb","b"]');
|
||||
assert (JSON.stringify (/((b)*)*a/.exec("bbba")) === '["bbba","bbb","b"]');
|
||||
|
||||
assert (JSON.stringify (/(aa|a)a/.exec("aa")) === '["aa","a"]');
|
||||
assert (JSON.stringify (/(aa|a)?a/.exec("aa")) === '["aa","a"]');
|
||||
assert (JSON.stringify (/(aa|a)+?a/.exec("aa")) === '["aa","a"]');
|
||||
assert (JSON.stringify (/(?:aa|a)a/.exec("aa")) === '["aa"]');
|
||||
assert (JSON.stringify (/(?:aa|a)?a/.exec("aa")) === '["aa"]');
|
||||
assert (JSON.stringify (/(?:aa|a)+?a/.exec("aa")) === '["aa"]');
|
||||
|
||||
assert (JSON.stringify (/(aa|a)a/.exec("a")) === 'null');
|
||||
assert (JSON.stringify (/(aa|a)?a/.exec("a")) === '["a",null]');
|
||||
assert (JSON.stringify (/(aa|a)+?a/.exec("a")) === 'null');
|
||||
assert (JSON.stringify (/(?:aa|a)a/.exec("a")) === 'null');
|
||||
assert (JSON.stringify (/(?:aa|a)?a/.exec("a")) === '["a"]');
|
||||
assert (JSON.stringify (/(?:aa|a)+?a/.exec("a")) === 'null');
|
||||
|
||||
assert (JSON.stringify (/a+/.exec("aaasd")) === '["aaa"]');
|
||||
assert (JSON.stringify (/a+?/.exec("aaasd")) === '["a"]');
|
||||
|
||||
assert (JSON.stringify (/a+sd/.exec("aaasd")) === '["aaasd"]');
|
||||
assert (JSON.stringify (/a+?sd/.exec("aaasd")) === '["aaasd"]');
|
||||
|
||||
assert (JSON.stringify (/a{2}sd/.exec("aaasd")) === '["aasd"]');
|
||||
assert (JSON.stringify (/a{3}sd/.exec("aaasd")) === '["aaasd"]');
|
||||
|
||||
assert (JSON.stringify (/(?=a)/.exec("a")) === '[""]');
|
||||
assert (JSON.stringify (/(?=a)+/.exec("a")) === '[""]');
|
||||
assert (JSON.stringify (/(?=a)*/.exec("a")) === '[""]');
|
||||
assert (JSON.stringify (/(?=(a))?/.exec("a")) === '["",null]');
|
||||
assert (JSON.stringify (/(?=(a))+?/.exec("a")) === '["","a"]');
|
||||
assert (JSON.stringify (/(?=(a))*?/.exec("a")) === '["",null]');
|
||||
|
||||
assert (JSON.stringify (/(?!a)/.exec("a")) === '[""]');
|
||||
assert (JSON.stringify (/(?!a)+/.exec("a")) === '[""]');
|
||||
assert (JSON.stringify (/(?!a)*/.exec("a")) === '[""]');
|
||||
assert (JSON.stringify (/(?!(a))?/.exec("a")) === '["",null]');
|
||||
assert (JSON.stringify (/(?!(a))+?/.exec("a")) === '["",null]');
|
||||
assert (JSON.stringify (/(?!(a))*?/.exec("a")) === '["",null]');
|
||||
|
||||
assert (JSON.stringify (/al(?=(ma))*ma/.exec("alma")) === '["alma",null]');
|
||||
assert (JSON.stringify (/al(?!(ma))*ma/.exec("alma")) === '["alma",null]');
|
||||
assert (JSON.stringify (/al(?=(ma))+ma/.exec("alma")) === '["alma","ma"]');
|
||||
assert (JSON.stringify (/al(?!(ma))+ma/.exec("alma")) === 'null');
|
||||
|
||||
assert (JSON.stringify (/(?=())x|/.exec("asd")) === '["",null]');
|
||||
assert (JSON.stringify (/(?!())x|/.exec("asd")) === '["",null]');
|
||||
|
||||
assert (JSON.stringify (/(().*)+.$/.exec("abcdefg")) === '["abcdefg","abcdef",""]');
|
||||
assert (JSON.stringify (/(().*)+?.$/.exec("abcdefg")) === '["abcdefg","abcdef",""]');
|
||||
assert (JSON.stringify (/(?:().*)+.$/.exec("abcdefg")) === '["abcdefg",""]');
|
||||
assert (JSON.stringify (/(?:().*)+?.$/.exec("abcdefg")) === '["abcdefg",""]');
|
||||
|
||||
assert (JSON.stringify(/((?=())|.)+^/.exec("a")) === '["","",""]');
|
||||
assert (JSON.stringify(/(?:(|\b\w+?){2})+$/.exec("aaaa")) === '["aaaa","aaaa"]');
|
||||
@@ -196,3 +196,12 @@ assert (r.exec("aa") == "aa,a");
|
||||
|
||||
r = new RegExp ("(a{0,1}?){0,1}a");
|
||||
assert (r.exec("aa") == "aa,a");
|
||||
|
||||
r = new RegExp ("(|.)+");
|
||||
assert (JSON.stringify (r.exec("asdfgh")) === '["asdfgh","h"]');
|
||||
|
||||
assert (JSON.stringify (/([^\W](){8,}?){5}/.exec("asdfghijk")) === '["asdfg","g",""]');
|
||||
assert (JSON.stringify (/(()+?(.+)|){3,}./u.exec("asdfghi")) === '["asdfghi","",null,null]')
|
||||
assert (JSON.stringify (/(()+?(.+)|){3,}?./u.exec("asdfghi")) === '["asdfghi","",null,null]')
|
||||
assert (JSON.stringify (/(?:()+?(.+)|){3,}./u.exec("asdfghi")) === '["asdfghi",null,null]')
|
||||
assert (JSON.stringify (/(?:()+?(.+)|){3,}?./u.exec("asdfghi")) === '["asdfghi",null,null]')
|
||||
|
||||
@@ -88,3 +88,6 @@ assert (r.exec ("\\c3") == "\\c3");
|
||||
|
||||
r = /\cIasd/;
|
||||
assert (r.exec ("\tasd") == "\tasd");
|
||||
|
||||
r = /.??$/;
|
||||
assert (JSON.stringify (r.exec("asd")) === '["d"]');
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
try {
|
||||
/(?:(?=x)){1000}xyz/.exec('xyz');
|
||||
/(?:(?=x)){10000}xyz/.exec('xyz');
|
||||
assert(false);
|
||||
} catch (e) {
|
||||
assert(e instanceof RangeError);
|
||||
|
||||
@@ -85,3 +85,5 @@ assert("\u000A\u000D\u2028\u202911".trim() === "11");
|
||||
|
||||
assert("\u0009\u000B\u000C\u0020\u00A01\u0009\u000B\u000C\u0020\u00A0".trim() === "1");
|
||||
assert("\u000A\u000D\u2028\u202911\u000A\u000D\u2028\u2029".trim() === "11");
|
||||
|
||||
assert ("\u200B".trim() === '\u200B')
|
||||
|
||||
Reference in New Issue
Block a user