RegExp refactoring and improvements

Move RegExp bytecode functions to a separate file.
Optimize bytecode lenght on character matching.
Implement a basic RegExp cache to optimize memory
usage on duplicated RegExp in JS files. Also fix
minor style issues and add missing comments. Improve
existing comments.

JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
László Langó
2016-02-18 10:23:45 +00:00
parent 3f377692d9
commit 2c72bb1139
16 changed files with 844 additions and 601 deletions
+4 -69
View File
@@ -20,6 +20,7 @@
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
#include "ecma-globals.h"
#include "re-bytecode.h"
#include "re-parser.h"
/** \addtogroup parser Parser
@@ -32,71 +33,6 @@
* @{
*/
/**
* RegExp opcodes
*/
typedef enum
{
RE_OP_EOF,
/* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
* Change it carefully. Capture opcodes should be at first.
*/
RE_OP_CAPTURE_GROUP_START, /**< group start */
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START, /**< greedy zero group start */
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-greedy zero group start */
RE_OP_CAPTURE_GREEDY_GROUP_END, /**< greedy group end */
RE_OP_CAPTURE_NON_GREEDY_GROUP_END, /**< non-greedy group end */
RE_OP_NON_CAPTURE_GROUP_START, /**< non-capture group start */
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START, /**< non-capture greedy zero group start */
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-capture non-greedy zero group start */
RE_OP_NON_CAPTURE_GREEDY_GROUP_END, /**< non-capture greedy group end */
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END, /**< non-capture non-greedy group end */
RE_OP_MATCH, /**< match */
RE_OP_CHAR, /**< any character */
RE_OP_SAVE_AT_START, /**< save at start */
RE_OP_SAVE_AND_MATCH, /**< save and match */
RE_OP_PERIOD, /**< "." */
RE_OP_ALTERNATIVE, /**< "|" */
RE_OP_GREEDY_ITERATOR, /**< greedy iterator */
RE_OP_NON_GREEDY_ITERATOR, /**< non-greedy iterator */
RE_OP_ASSERT_START, /**< "^" */
RE_OP_ASSERT_END, /**< "$" */
RE_OP_ASSERT_WORD_BOUNDARY, /**< "\b" */
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< "\B" */
RE_OP_LOOKAHEAD_POS, /**< lookahead pos */
RE_OP_LOOKAHEAD_NEG, /**< lookahead neg */
RE_OP_BACKREFERENCE, /**< \[0..9] */
RE_OP_CHAR_CLASS, /**< [ ] */
RE_OP_INV_CHAR_CLASS /**< [^ ] */
} re_opcode_t;
/**
* Compiled byte code data.
*/
typedef struct
{
uint16_t flags; /**< RegExp flags */
mem_cpointer_t pattern_cp; /**< original RegExp pattern */
uint32_t num_of_captures; /**< number of capturing brackets */
uint32_t num_of_non_captures; /**< number of non capturing brackets */
} re_compiled_code_t;
/**
* Check if a RegExp opcode is a capture group or not
*/
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
/**
* Context of RegExp bytecode container
*/
typedef struct
{
uint8_t *block_start_p; /**< start of bytecode block */
uint8_t *block_end_p; /**< end of bytecode block */
uint8_t *current_p; /**< current position in bytecode */
} re_bytecode_ctx_t;
/**
* Context of RegExp compiler
*/
@@ -114,11 +50,10 @@ typedef struct
ecma_value_t
re_compile_bytecode (re_compiled_code_t **, ecma_string_t *, uint16_t);
re_opcode_t
re_get_opcode (uint8_t **);
re_compiled_code_t *
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, uint16_t flags, uint32_t *idx);
uint32_t
re_get_value (uint8_t **);
void re_cache_gc_run ();
/**
* @}