Compact Byte Code parser and executor for Jerry.

JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: Tamas Gergely tgergely.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: Roland Takacs rtakacs.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: István Kádár ikadar@inf.u-szeged.hu
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg
2016-02-05 00:10:10 -08:00
parent db6caf3c48
commit 4d2dd22ced
92 changed files with 17184 additions and 20276 deletions
+70 -48
View File
@@ -1,5 +1,5 @@
/* Copyright 2015 Samsung Electronics Co., Ltd.
* Copyright 2015 University of Szeged.
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2015-2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,6 +22,16 @@
#include "ecma-globals.h"
#include "re-parser.h"
/** \addtogroup parser Parser
* @{
*
* \addtogroup regexparser Regular expression
* @{
*
* \addtogroup regexparser_compiler Compiler
* @{
*/
/**
* RegExp opcodes
*/
@@ -31,54 +41,60 @@ typedef enum
/* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
* Change it carefully. Capture opcodes should be at first.
*/
RE_OP_CAPTURE_GROUP_START,
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START,
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START,
RE_OP_CAPTURE_GREEDY_GROUP_END,
RE_OP_CAPTURE_NON_GREEDY_GROUP_END,
RE_OP_NON_CAPTURE_GROUP_START,
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START,
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START,
RE_OP_NON_CAPTURE_GREEDY_GROUP_END,
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END,
RE_OP_CAPTURE_GROUP_START, /**< group start */
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START, /**< greedy zero group start */
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-greedy zero group start */
RE_OP_CAPTURE_GREEDY_GROUP_END, /**< greedy group end */
RE_OP_CAPTURE_NON_GREEDY_GROUP_END, /**< non-greedy group end */
RE_OP_NON_CAPTURE_GROUP_START, /**< non-capture group start */
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START, /**< non-capture greedy zero group start */
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-capture non-greedy zero group start */
RE_OP_NON_CAPTURE_GREEDY_GROUP_END, /**< non-capture greedy group end */
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END, /**< non-capture non-greedy group end */
RE_OP_MATCH,
RE_OP_CHAR,
RE_OP_SAVE_AT_START,
RE_OP_SAVE_AND_MATCH,
RE_OP_PERIOD,
RE_OP_ALTERNATIVE,
RE_OP_GREEDY_ITERATOR,
RE_OP_NON_GREEDY_ITERATOR,
RE_OP_ASSERT_START,
RE_OP_ASSERT_END,
RE_OP_ASSERT_WORD_BOUNDARY,
RE_OP_ASSERT_NOT_WORD_BOUNDARY,
RE_OP_LOOKAHEAD_POS,
RE_OP_LOOKAHEAD_NEG,
RE_OP_BACKREFERENCE,
RE_OP_CHAR_CLASS,
RE_OP_INV_CHAR_CLASS
RE_OP_MATCH, /**< match */
RE_OP_CHAR, /**< any character */
RE_OP_SAVE_AT_START, /**< save at start */
RE_OP_SAVE_AND_MATCH, /**< save and match */
RE_OP_PERIOD, /**< . */
RE_OP_ALTERNATIVE, /**< | */
RE_OP_GREEDY_ITERATOR, /**< greedy iterator */
RE_OP_NON_GREEDY_ITERATOR, /**< non-greedy iterator */
RE_OP_ASSERT_START, /**< ^ */
RE_OP_ASSERT_END, /**< $ */
RE_OP_ASSERT_WORD_BOUNDARY, /**< \b */
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< \B */
RE_OP_LOOKAHEAD_POS, /**< lookahead pos */
RE_OP_LOOKAHEAD_NEG, /**< lookahead neg */
RE_OP_BACKREFERENCE, /**< \[0..9] */
RE_OP_CHAR_CLASS, /**< [ ] */
RE_OP_INV_CHAR_CLASS /**< [^ ] */
} re_opcode_t;
/**
* Compiled byte code data.
*/
typedef struct
{
uint16_t flags; /**< RegExp flags */
mem_cpointer_t pattern_cp; /**< original RegExp pattern */
uint32_t num_of_captures; /**< number of capturing brackets */
uint32_t num_of_non_captures; /**< number of non capturing brackets */
} re_compiled_code_t;
/**
* Check if a RegExp opcode is a capture group or not
*/
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
/**
* Type of bytecode elements
*/
typedef uint8_t re_bytecode_t;
/**
* Context of RegExp bytecode container
*/
typedef struct
{
re_bytecode_t *block_start_p; /**< start of bytecode block */
re_bytecode_t *block_end_p; /**< end of bytecode block */
re_bytecode_t *current_p; /**< current position in bytecode */
uint8_t *block_start_p; /**< start of bytecode block */
uint8_t *block_end_p; /**< end of bytecode block */
uint8_t *current_p; /**< current position in bytecode */
} re_bytecode_ctx_t;
/**
@@ -86,23 +102,29 @@ typedef struct
*/
typedef struct
{
uint8_t flags; /**< RegExp flags */
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t highest_backref; /**< highest backreference */
uint16_t flags; /**< RegExp flags */
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t highest_backref; /**< highest backreference */
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
re_token_t current_token; /**< current token */
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
re_token_t current_token; /**< current token */
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
} re_compiler_ctx_t;
ecma_completion_value_t
re_compile_bytecode (re_bytecode_t **, ecma_string_t *, uint8_t);
re_compile_bytecode (re_compiled_code_t **, ecma_string_t *, uint16_t);
re_opcode_t
re_get_opcode (re_bytecode_t **);
re_get_opcode (uint8_t **);
uint32_t
re_get_value (re_bytecode_t **);
re_get_value (uint8_t **);
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* RE_COMPILER_H */
/**
* @}
* @}
* @}
*/
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !RE_COMPILER_H */