RegExp refactoring and improvements
Move RegExp bytecode functions to a separate file. Optimize bytecode lenght on character matching. Implement a basic RegExp cache to optimize memory usage on duplicated RegExp in JS files. Also fix minor style issues and add missing comments. Improve existing comments. JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
#include "jrt.h"
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "jrt-bit-fields.h"
|
||||
#include "re-compiler.h"
|
||||
#include "vm-defines.h"
|
||||
#include "vm-stack.h"
|
||||
|
||||
@@ -549,6 +550,11 @@ ecma_gc_run (void)
|
||||
ecma_gc_objects_lists[ECMA_GC_COLOR_BLACK] = NULL;
|
||||
|
||||
ecma_gc_visited_flip_flag = !ecma_gc_visited_flip_flag;
|
||||
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
/* Free RegExp bytecodes stored in cache */
|
||||
re_cache_gc_run ();
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
} /* ecma_gc_run */
|
||||
|
||||
/**
|
||||
|
||||
@@ -53,7 +53,9 @@
|
||||
* See also:
|
||||
* ECMA-262 v5, B.2.5.1
|
||||
*
|
||||
* @return ecma value
|
||||
* @return undefined - if compiled successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
@@ -240,8 +242,10 @@ ecma_builtin_regexp_prototype_compile (ecma_value_t this_arg, /**< this argument
|
||||
* See also:
|
||||
* ECMA-262 v5, 15.10.6.2
|
||||
*
|
||||
* @return ecma value
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
* @return array object containing the results - if the matched
|
||||
* null - otherwise
|
||||
*
|
||||
* May raise error, so returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
|
||||
@@ -314,8 +318,10 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
|
||||
* See also:
|
||||
* ECMA-262 v5, 15.10.6.3
|
||||
*
|
||||
* @return ecma value
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
* @return true - if match is not null
|
||||
* false - otherwise
|
||||
*
|
||||
* May raise error, so returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */
|
||||
@@ -439,4 +445,4 @@ ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argume
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
* Handle calling [[Call]] of built-in RegExp object
|
||||
*
|
||||
* @return ecma value
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
ecma_value_t
|
||||
ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< arguments list */
|
||||
@@ -58,6 +59,7 @@ ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< ar
|
||||
* Handle calling [[Construct]] of built-in RegExp object
|
||||
*
|
||||
* @return ecma value
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
ecma_value_t
|
||||
ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /**< arguments list */
|
||||
@@ -152,4 +154,4 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
|
||||
@@ -73,7 +73,7 @@ ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1,
|
||||
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
|
||||
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
|
||||
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
|
||||
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
|
||||
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
|
||||
@@ -84,7 +84,7 @@ ROUTINE (LIT_MAGIC_STRING_TRIM, ecma_builtin_string_prototype_object_trim, 0, 0)
|
||||
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN
|
||||
ROUTINE (LIT_MAGIC_STRING_SUBSTR, ecma_builtin_string_prototype_object_substr, 2, 2)
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
|
||||
|
||||
#undef OBJECT_ID
|
||||
#undef SIMPLE_VALUE
|
||||
|
||||
@@ -958,13 +958,13 @@ ecma_object_get_class_name (ecma_object_t *obj_p) /**< object */
|
||||
{
|
||||
return LIT_MAGIC_STRING_DATE_UL;
|
||||
}
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
case ECMA_BUILTIN_ID_REGEXP_PROTOTYPE:
|
||||
{
|
||||
return LIT_MAGIC_STRING_REGEXP_UL;
|
||||
}
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
default:
|
||||
{
|
||||
JERRY_ASSERT (ecma_builtin_is (obj_p, ECMA_BUILTIN_ID_GLOBAL));
|
||||
|
||||
@@ -55,12 +55,19 @@
|
||||
#define RE_GLOBAL_START_IDX 0
|
||||
#define RE_GLOBAL_END_IDX 1
|
||||
|
||||
/**
|
||||
* Check if a RegExp opcode is a capture group or not
|
||||
*/
|
||||
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
|
||||
|
||||
/**
|
||||
* Parse RegExp flags (global, ignoreCase, multiline)
|
||||
*
|
||||
* See also: ECMA-262 v5, 15.10.4.1
|
||||
*
|
||||
* @return ecma value
|
||||
* @return empty ecma value - if parsed successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
ecma_value_t
|
||||
@@ -123,7 +130,7 @@ re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags
|
||||
return ret_value;
|
||||
} /* re_parse_regexp_flags */
|
||||
|
||||
/*
|
||||
/**
|
||||
* Initializes the source, global, ignoreCase, multiline, and lastIndex properties of RegExp instance.
|
||||
*/
|
||||
void
|
||||
@@ -223,11 +230,11 @@ re_initialize_props (ecma_object_t *re_obj_p, /**< RegExp obejct */
|
||||
*
|
||||
* See also: ECMA-262 v5, 15.10.4.1
|
||||
*
|
||||
* @return ecma value
|
||||
* @return constructed RegExp object
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
ecma_value_t
|
||||
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< input pattern */
|
||||
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< RegExp bytecode */
|
||||
{
|
||||
JERRY_ASSERT (bytecode_p != NULL);
|
||||
|
||||
@@ -259,7 +266,9 @@ ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**<
|
||||
*
|
||||
* See also: ECMA-262 v5, 15.10.4.1
|
||||
*
|
||||
* @return ecma value
|
||||
* @return constructed RegExp object - if pattern and flags were parsed successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
ecma_value_t
|
||||
@@ -367,8 +376,10 @@ re_canonicalize (ecma_char_t ch, /**< character */
|
||||
* See also:
|
||||
* ECMA-262 v5, 15.10.2.1
|
||||
*
|
||||
* @return ecma value
|
||||
* Returned value must be freed with ecma_free_value
|
||||
* @return true - if matched
|
||||
* false - otherwise
|
||||
*
|
||||
* May raise error, so returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
@@ -400,7 +411,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
}
|
||||
|
||||
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
|
||||
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
|
||||
ecma_char_t ch1 = (ecma_char_t) re_get_char (&bc_p); /* Already canonicalized. */
|
||||
ecma_char_t ch2 = re_canonicalize (lit_utf8_read_next (&str_curr_p), is_ignorecase);
|
||||
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);
|
||||
|
||||
@@ -613,8 +624,8 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
|
||||
while (num_of_ranges)
|
||||
{
|
||||
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
|
||||
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
|
||||
ecma_char_t ch1 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
|
||||
ecma_char_t ch2 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
|
||||
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
|
||||
num_of_ranges, ch1, ch2, curr_ch);
|
||||
|
||||
@@ -698,6 +709,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
uint32_t offset = re_get_value (&bc_p);
|
||||
lit_utf8_byte_t *sub_str_p = NULL;
|
||||
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
|
||||
|
||||
if (ecma_is_value_true (match_value))
|
||||
{
|
||||
*out_str_p = sub_str_p;
|
||||
@@ -707,6 +719,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
{
|
||||
return match_value;
|
||||
}
|
||||
|
||||
bc_p += offset;
|
||||
old_bc_p = bc_p;
|
||||
}
|
||||
@@ -839,6 +852,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
{
|
||||
offset = re_get_value (&bc_p);
|
||||
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
|
||||
|
||||
if (ecma_is_value_true (match_value))
|
||||
{
|
||||
*out_str_p = sub_str_p;
|
||||
@@ -848,6 +862,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
{
|
||||
return match_value;
|
||||
}
|
||||
|
||||
bc_p += offset;
|
||||
old_bc_p = bc_p;
|
||||
}
|
||||
@@ -915,6 +930,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
|
||||
lit_utf8_byte_t *sub_str_p = NULL;
|
||||
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
|
||||
|
||||
if (ecma_is_value_true (match_value))
|
||||
{
|
||||
*out_str_p = sub_str_p;
|
||||
@@ -1225,7 +1241,13 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
|
||||
* RegExp helper function to start the recursive matching algorithm
|
||||
* and create the result Array object
|
||||
*
|
||||
* @return ecma value
|
||||
* See also:
|
||||
* ECMA-262 v5, 15.10.6.2
|
||||
*
|
||||
* @return array object - if matched
|
||||
* null - otherwise
|
||||
*
|
||||
* May raise error.
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
ecma_value_t
|
||||
@@ -1475,4 +1497,4 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
|
||||
@@ -32,48 +32,39 @@
|
||||
/**
|
||||
* RegExp flags
|
||||
*/
|
||||
#define RE_FLAG_GLOBAL (1u << 1) /* ECMA-262 v5, 15.10.7.2 */
|
||||
#define RE_FLAG_IGNORE_CASE (1u << 2) /* ECMA-262 v5, 15.10.7.3 */
|
||||
#define RE_FLAG_MULTILINE (1u << 3) /* ECMA-262 v5, 15.10.7.4 */
|
||||
typedef enum
|
||||
{
|
||||
RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */
|
||||
RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
|
||||
RE_FLAG_MULTILINE = (1u << 3) /**< ECMA-262 v5, 15.10.7.4 */
|
||||
} re_flags_t;
|
||||
|
||||
/**
|
||||
* RegExp executor context
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
|
||||
lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t *num_of_iterations_p; /**< number of iterations */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t *num_of_iterations_p; /**< number of iterations */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
} re_matcher_ctx_t;
|
||||
|
||||
extern ecma_value_t
|
||||
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *);
|
||||
|
||||
extern ecma_value_t
|
||||
ecma_op_create_regexp_object (ecma_string_t *, ecma_string_t *);
|
||||
|
||||
extern ecma_value_t
|
||||
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
|
||||
|
||||
extern ecma_char_t
|
||||
re_canonicalize (ecma_char_t, bool);
|
||||
extern void
|
||||
re_set_result_array_properties (ecma_object_t *, ecma_string_t *, uint32_t, int32_t);
|
||||
|
||||
extern ecma_value_t
|
||||
re_parse_regexp_flags (ecma_string_t *, uint16_t *);
|
||||
|
||||
extern void
|
||||
re_initialize_props (ecma_object_t *, ecma_string_t *, uint16_t);
|
||||
ecma_value_t ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *);
|
||||
ecma_value_t ecma_op_create_regexp_object (ecma_string_t *, ecma_string_t *);
|
||||
ecma_value_t ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
|
||||
ecma_char_t re_canonicalize (ecma_char_t, bool);
|
||||
void re_set_result_array_properties (ecma_object_t *, ecma_string_t *, uint32_t, int32_t);
|
||||
ecma_value_t re_parse_regexp_flags (ecma_string_t *, uint16_t *);
|
||||
void re_initialize_props (ecma_object_t *, ecma_string_t *, uint16_t);
|
||||
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !ECMA_REGEXP_OBJECT_H */
|
||||
|
||||
Reference in New Issue
Block a user