RegExp refactoring and improvements
Move RegExp bytecode functions to a separate file. Optimize bytecode lenght on character matching. Implement a basic RegExp cache to optimize memory usage on duplicated RegExp in JS files. Also fix minor style issues and add missing comments. Improve existing comments. JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
|
||||
#include "ecma-globals.h"
|
||||
#include "re-bytecode.h"
|
||||
#include "re-parser.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
@@ -32,71 +33,6 @@
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp opcodes
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_OP_EOF,
|
||||
/* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
|
||||
* Change it carefully. Capture opcodes should be at first.
|
||||
*/
|
||||
RE_OP_CAPTURE_GROUP_START, /**< group start */
|
||||
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START, /**< greedy zero group start */
|
||||
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-greedy zero group start */
|
||||
RE_OP_CAPTURE_GREEDY_GROUP_END, /**< greedy group end */
|
||||
RE_OP_CAPTURE_NON_GREEDY_GROUP_END, /**< non-greedy group end */
|
||||
RE_OP_NON_CAPTURE_GROUP_START, /**< non-capture group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START, /**< non-capture greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-capture non-greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_GROUP_END, /**< non-capture greedy group end */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END, /**< non-capture non-greedy group end */
|
||||
|
||||
RE_OP_MATCH, /**< match */
|
||||
RE_OP_CHAR, /**< any character */
|
||||
RE_OP_SAVE_AT_START, /**< save at start */
|
||||
RE_OP_SAVE_AND_MATCH, /**< save and match */
|
||||
RE_OP_PERIOD, /**< "." */
|
||||
RE_OP_ALTERNATIVE, /**< "|" */
|
||||
RE_OP_GREEDY_ITERATOR, /**< greedy iterator */
|
||||
RE_OP_NON_GREEDY_ITERATOR, /**< non-greedy iterator */
|
||||
RE_OP_ASSERT_START, /**< "^" */
|
||||
RE_OP_ASSERT_END, /**< "$" */
|
||||
RE_OP_ASSERT_WORD_BOUNDARY, /**< "\b" */
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< "\B" */
|
||||
RE_OP_LOOKAHEAD_POS, /**< lookahead pos */
|
||||
RE_OP_LOOKAHEAD_NEG, /**< lookahead neg */
|
||||
RE_OP_BACKREFERENCE, /**< \[0..9] */
|
||||
RE_OP_CHAR_CLASS, /**< [ ] */
|
||||
RE_OP_INV_CHAR_CLASS /**< [^ ] */
|
||||
} re_opcode_t;
|
||||
|
||||
/**
|
||||
* Compiled byte code data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
mem_cpointer_t pattern_cp; /**< original RegExp pattern */
|
||||
uint32_t num_of_captures; /**< number of capturing brackets */
|
||||
uint32_t num_of_non_captures; /**< number of non capturing brackets */
|
||||
} re_compiled_code_t;
|
||||
|
||||
/**
|
||||
* Check if a RegExp opcode is a capture group or not
|
||||
*/
|
||||
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
|
||||
|
||||
/**
|
||||
* Context of RegExp bytecode container
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint8_t *block_start_p; /**< start of bytecode block */
|
||||
uint8_t *block_end_p; /**< end of bytecode block */
|
||||
uint8_t *current_p; /**< current position in bytecode */
|
||||
} re_bytecode_ctx_t;
|
||||
|
||||
/**
|
||||
* Context of RegExp compiler
|
||||
*/
|
||||
@@ -114,11 +50,10 @@ typedef struct
|
||||
ecma_value_t
|
||||
re_compile_bytecode (re_compiled_code_t **, ecma_string_t *, uint16_t);
|
||||
|
||||
re_opcode_t
|
||||
re_get_opcode (uint8_t **);
|
||||
re_compiled_code_t *
|
||||
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, uint16_t flags, uint32_t *idx);
|
||||
|
||||
uint32_t
|
||||
re_get_value (uint8_t **);
|
||||
void re_cache_gc_run ();
|
||||
|
||||
/**
|
||||
* @}
|
||||
|
||||
Reference in New Issue
Block a user