Compact Byte Code parser and executor for Jerry.
JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Tamas Gergely tgergely.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Roland Takacs rtakacs.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: István Kádár ikadar@inf.u-szeged.hu JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/* Copyright 2015 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015 University of Szeged.
|
||||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015-2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -22,6 +22,16 @@
|
||||
#include "ecma-globals.h"
|
||||
#include "re-parser.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_compiler Compiler
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp opcodes
|
||||
*/
|
||||
@@ -31,54 +41,60 @@ typedef enum
|
||||
/* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
|
||||
* Change it carefully. Capture opcodes should be at first.
|
||||
*/
|
||||
RE_OP_CAPTURE_GROUP_START,
|
||||
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_CAPTURE_GREEDY_GROUP_END,
|
||||
RE_OP_CAPTURE_NON_GREEDY_GROUP_END,
|
||||
RE_OP_NON_CAPTURE_GROUP_START,
|
||||
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_NON_CAPTURE_GREEDY_GROUP_END,
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END,
|
||||
RE_OP_CAPTURE_GROUP_START, /**< group start */
|
||||
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START, /**< greedy zero group start */
|
||||
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-greedy zero group start */
|
||||
RE_OP_CAPTURE_GREEDY_GROUP_END, /**< greedy group end */
|
||||
RE_OP_CAPTURE_NON_GREEDY_GROUP_END, /**< non-greedy group end */
|
||||
RE_OP_NON_CAPTURE_GROUP_START, /**< non-capture group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START, /**< non-capture greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-capture non-greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_GROUP_END, /**< non-capture greedy group end */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END, /**< non-capture non-greedy group end */
|
||||
|
||||
RE_OP_MATCH,
|
||||
RE_OP_CHAR,
|
||||
RE_OP_SAVE_AT_START,
|
||||
RE_OP_SAVE_AND_MATCH,
|
||||
RE_OP_PERIOD,
|
||||
RE_OP_ALTERNATIVE,
|
||||
RE_OP_GREEDY_ITERATOR,
|
||||
RE_OP_NON_GREEDY_ITERATOR,
|
||||
RE_OP_ASSERT_START,
|
||||
RE_OP_ASSERT_END,
|
||||
RE_OP_ASSERT_WORD_BOUNDARY,
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY,
|
||||
RE_OP_LOOKAHEAD_POS,
|
||||
RE_OP_LOOKAHEAD_NEG,
|
||||
RE_OP_BACKREFERENCE,
|
||||
RE_OP_CHAR_CLASS,
|
||||
RE_OP_INV_CHAR_CLASS
|
||||
RE_OP_MATCH, /**< match */
|
||||
RE_OP_CHAR, /**< any character */
|
||||
RE_OP_SAVE_AT_START, /**< save at start */
|
||||
RE_OP_SAVE_AND_MATCH, /**< save and match */
|
||||
RE_OP_PERIOD, /**< . */
|
||||
RE_OP_ALTERNATIVE, /**< | */
|
||||
RE_OP_GREEDY_ITERATOR, /**< greedy iterator */
|
||||
RE_OP_NON_GREEDY_ITERATOR, /**< non-greedy iterator */
|
||||
RE_OP_ASSERT_START, /**< ^ */
|
||||
RE_OP_ASSERT_END, /**< $ */
|
||||
RE_OP_ASSERT_WORD_BOUNDARY, /**< \b */
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< \B */
|
||||
RE_OP_LOOKAHEAD_POS, /**< lookahead pos */
|
||||
RE_OP_LOOKAHEAD_NEG, /**< lookahead neg */
|
||||
RE_OP_BACKREFERENCE, /**< \[0..9] */
|
||||
RE_OP_CHAR_CLASS, /**< [ ] */
|
||||
RE_OP_INV_CHAR_CLASS /**< [^ ] */
|
||||
} re_opcode_t;
|
||||
|
||||
/**
|
||||
* Compiled byte code data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
mem_cpointer_t pattern_cp; /**< original RegExp pattern */
|
||||
uint32_t num_of_captures; /**< number of capturing brackets */
|
||||
uint32_t num_of_non_captures; /**< number of non capturing brackets */
|
||||
} re_compiled_code_t;
|
||||
|
||||
/**
|
||||
* Check if a RegExp opcode is a capture group or not
|
||||
*/
|
||||
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
|
||||
|
||||
/**
|
||||
* Type of bytecode elements
|
||||
*/
|
||||
typedef uint8_t re_bytecode_t;
|
||||
|
||||
/**
|
||||
* Context of RegExp bytecode container
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
re_bytecode_t *block_start_p; /**< start of bytecode block */
|
||||
re_bytecode_t *block_end_p; /**< end of bytecode block */
|
||||
re_bytecode_t *current_p; /**< current position in bytecode */
|
||||
uint8_t *block_start_p; /**< start of bytecode block */
|
||||
uint8_t *block_end_p; /**< end of bytecode block */
|
||||
uint8_t *current_p; /**< current position in bytecode */
|
||||
} re_bytecode_ctx_t;
|
||||
|
||||
/**
|
||||
@@ -86,23 +102,29 @@ typedef struct
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint8_t flags; /**< RegExp flags */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t highest_backref; /**< highest backreference */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t highest_backref; /**< highest backreference */
|
||||
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
|
||||
re_token_t current_token; /**< current token */
|
||||
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
|
||||
re_token_t current_token; /**< current token */
|
||||
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
|
||||
} re_compiler_ctx_t;
|
||||
|
||||
ecma_completion_value_t
|
||||
re_compile_bytecode (re_bytecode_t **, ecma_string_t *, uint8_t);
|
||||
re_compile_bytecode (re_compiled_code_t **, ecma_string_t *, uint16_t);
|
||||
|
||||
re_opcode_t
|
||||
re_get_opcode (re_bytecode_t **);
|
||||
re_get_opcode (uint8_t **);
|
||||
|
||||
uint32_t
|
||||
re_get_value (re_bytecode_t **);
|
||||
re_get_value (uint8_t **);
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* RE_COMPILER_H */
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !RE_COMPILER_H */
|
||||
|
||||
Reference in New Issue
Block a user