Compact Byte Code parser and executor for Jerry.
JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Tamas Gergely tgergely.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Roland Takacs rtakacs.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: István Kádár ikadar@inf.u-szeged.hu JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/* Copyright 2015 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015 University of Szeged.
|
||||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015-2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -25,6 +25,16 @@
|
||||
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_bytecode Bytecode
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Size of block of RegExp bytecode. Used for allocation
|
||||
*/
|
||||
@@ -34,7 +44,7 @@
|
||||
* Get length of bytecode
|
||||
*/
|
||||
static uint32_t
|
||||
re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p)
|
||||
re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
return ((uint32_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p));
|
||||
} /* re_get_bytecode_length */
|
||||
@@ -47,7 +57,7 @@ re_dump_bytecode (re_bytecode_ctx_t *bc_ctx);
|
||||
*
|
||||
* @return current position in RegExp bytecode
|
||||
*/
|
||||
static re_bytecode_t*
|
||||
static uint8_t *
|
||||
re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
JERRY_ASSERT (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p >= 0);
|
||||
@@ -62,8 +72,8 @@ re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytec
|
||||
JERRY_ASSERT (bc_ctx_p->current_p - bc_ctx_p->block_start_p >= 0);
|
||||
size_t current_ptr_offset = static_cast<size_t> (bc_ctx_p->current_p - bc_ctx_p->block_start_p);
|
||||
|
||||
re_bytecode_t *new_block_start_p = (re_bytecode_t *) mem_heap_alloc_block (new_block_size,
|
||||
MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
uint8_t *new_block_start_p = (uint8_t *) mem_heap_alloc_block (new_block_size,
|
||||
MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
if (bc_ctx_p->current_p)
|
||||
{
|
||||
memcpy (new_block_start_p, bc_ctx_p->block_start_p, static_cast<size_t> (current_ptr_offset));
|
||||
@@ -81,12 +91,12 @@ re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytec
|
||||
*/
|
||||
static void
|
||||
re_bytecode_list_append (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
re_bytecode_t *bytecode_p, /**< input bytecode */
|
||||
uint8_t *bytecode_p, /**< input bytecode */
|
||||
size_t length) /**< length of input */
|
||||
{
|
||||
JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
|
||||
re_bytecode_t *current_p = bc_ctx_p->current_p;
|
||||
uint8_t *current_p = bc_ctx_p->current_p;
|
||||
if (current_p + length > bc_ctx_p->block_end_p)
|
||||
{
|
||||
current_p = re_realloc_regexp_bytecode_block (bc_ctx_p);
|
||||
@@ -102,24 +112,24 @@ re_bytecode_list_append (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode conte
|
||||
static void
|
||||
re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
size_t offset, /**< distance from the start of the container */
|
||||
re_bytecode_t *bytecode_p, /**< input bytecode */
|
||||
uint8_t *bytecode_p, /**< input bytecode */
|
||||
size_t length) /**< length of input */
|
||||
{
|
||||
JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
|
||||
re_bytecode_t *current_p = bc_ctx_p->current_p;
|
||||
uint8_t *current_p = bc_ctx_p->current_p;
|
||||
if (current_p + length > bc_ctx_p->block_end_p)
|
||||
{
|
||||
re_realloc_regexp_bytecode_block (bc_ctx_p);
|
||||
}
|
||||
|
||||
re_bytecode_t *src_p = bc_ctx_p->block_start_p + offset;
|
||||
uint8_t *src_p = bc_ctx_p->block_start_p + offset;
|
||||
if ((re_get_bytecode_length (bc_ctx_p) - offset) > 0)
|
||||
{
|
||||
re_bytecode_t *dest_p = src_p + length;
|
||||
re_bytecode_t *tmp_block_start_p;
|
||||
tmp_block_start_p = (re_bytecode_t *) mem_heap_alloc_block ((re_get_bytecode_length (bc_ctx_p) - offset),
|
||||
MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
uint8_t *dest_p = src_p + length;
|
||||
uint8_t *tmp_block_start_p;
|
||||
tmp_block_start_p = (uint8_t *) mem_heap_alloc_block ((re_get_bytecode_length (bc_ctx_p) - offset),
|
||||
MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
memcpy (tmp_block_start_p, src_p, (size_t) (re_get_bytecode_length (bc_ctx_p) - offset));
|
||||
memcpy (dest_p, tmp_block_start_p, (size_t) (re_get_bytecode_length (bc_ctx_p) - offset));
|
||||
mem_heap_free_block (tmp_block_start_p);
|
||||
@@ -136,7 +146,7 @@ static void
|
||||
re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
re_bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t));
|
||||
re_bytecode_list_append (bc_ctx_p, (uint8_t*) &opcode, sizeof (uint8_t));
|
||||
} /* re_append_opcode */
|
||||
|
||||
/**
|
||||
@@ -146,7 +156,7 @@ static void
|
||||
re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t value) /**< input value */
|
||||
{
|
||||
re_bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &value, sizeof (uint32_t));
|
||||
re_bytecode_list_append (bc_ctx_p, (uint8_t*) &value, sizeof (uint32_t));
|
||||
} /* re_append_u32 */
|
||||
|
||||
/**
|
||||
@@ -168,7 +178,7 @@ re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t offset, /**< distance from the start of the container */
|
||||
re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
re_bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t));
|
||||
re_bytecode_list_insert (bc_ctx_p, offset, (uint8_t*) &opcode, sizeof (uint8_t));
|
||||
} /* re_insert_opcode */
|
||||
|
||||
/**
|
||||
@@ -179,17 +189,17 @@ re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t offset, /**< distance from the start of the container */
|
||||
uint32_t value) /**< input value */
|
||||
{
|
||||
re_bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &value, sizeof (uint32_t));
|
||||
re_bytecode_list_insert (bc_ctx_p, offset, (uint8_t*) &value, sizeof (uint32_t));
|
||||
} /* re_insert_u32 */
|
||||
|
||||
/**
|
||||
* Get a RegExp opcode
|
||||
*/
|
||||
re_opcode_t
|
||||
re_get_opcode (re_bytecode_t **bc_p) /**< pointer to bytecode start */
|
||||
re_get_opcode (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
re_bytecode_t bytecode = **bc_p;
|
||||
(*bc_p) += sizeof (re_bytecode_t);
|
||||
uint8_t bytecode = **bc_p;
|
||||
(*bc_p) += sizeof (uint8_t);
|
||||
return (re_opcode_t) bytecode;
|
||||
} /* get_opcode */
|
||||
|
||||
@@ -197,7 +207,7 @@ re_get_opcode (re_bytecode_t **bc_p) /**< pointer to bytecode start */
|
||||
* Get a parameter of a RegExp opcode
|
||||
*/
|
||||
uint32_t
|
||||
re_get_value (re_bytecode_t **bc_p) /**< pointer to bytecode start */
|
||||
re_get_value (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
uint32_t value = *((uint32_t*) *bc_p);
|
||||
(*bc_p) += sizeof (uint32_t);
|
||||
@@ -368,6 +378,13 @@ re_insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compile
|
||||
re_insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable);
|
||||
} /* re_insert_into_group_with_jump */
|
||||
|
||||
/**
|
||||
* @}
|
||||
*
|
||||
* \addtogroup regexparser_compiler Compiler
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Parse alternatives
|
||||
*
|
||||
@@ -614,9 +631,9 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
* Returned value must be freed with ecma_free_completion_value
|
||||
*/
|
||||
ecma_completion_value_t
|
||||
re_compile_bytecode (re_bytecode_t **out_bytecode_p, /**< out:pointer to bytecode */
|
||||
re_compile_bytecode (re_compiled_code_t **out_bytecode_p, /**< out:pointer to bytecode */
|
||||
ecma_string_t *pattern_str_p, /**< pattern */
|
||||
uint8_t flags) /**< flags */
|
||||
uint16_t flags) /**< flags */
|
||||
{
|
||||
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
|
||||
re_compiler_ctx_t re_ctx;
|
||||
@@ -661,9 +678,18 @@ re_compile_bytecode (re_bytecode_t **out_bytecode_p, /**< out:pointer to bytecod
|
||||
re_append_opcode (&bc_ctx, RE_OP_EOF);
|
||||
|
||||
/* 3. Insert extra informations for bytecode header */
|
||||
re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_non_captures);
|
||||
re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_captures * 2);
|
||||
re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.flags);
|
||||
re_compiled_code_t re_compiled_code;
|
||||
|
||||
re_compiled_code.flags = re_ctx.flags | (1 << ECMA_BYTECODE_REF_SHIFT);
|
||||
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
|
||||
ecma_copy_or_ref_ecma_string (pattern_str_p));
|
||||
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
|
||||
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
|
||||
|
||||
re_bytecode_list_insert (&bc_ctx,
|
||||
0,
|
||||
(uint8_t *) &re_compiled_code,
|
||||
sizeof (re_compiled_code_t));
|
||||
}
|
||||
ECMA_FINALIZE (empty);
|
||||
|
||||
@@ -679,7 +705,7 @@ re_compile_bytecode (re_bytecode_t **out_bytecode_p, /**< out:pointer to bytecod
|
||||
{
|
||||
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
|
||||
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
|
||||
*out_bytecode_p = bc_ctx.block_start_p;
|
||||
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
|
||||
}
|
||||
|
||||
#ifdef JERRY_ENABLE_LOG
|
||||
@@ -694,12 +720,14 @@ re_compile_bytecode (re_bytecode_t **out_bytecode_p, /**< out:pointer to bytecod
|
||||
* RegExp bytecode dumper
|
||||
*/
|
||||
void
|
||||
re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p)
|
||||
re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
re_bytecode_t *bytecode_p = bc_ctx_p->block_start_p;
|
||||
JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
|
||||
JERRY_DLOG ("%d | ", re_get_value (&bytecode_p));
|
||||
re_compiled_code_t *compiled_code_p = bc_ctx_p->block_start_p;
|
||||
JERRY_DLOG ("%d ", compiled_code_p->flags);
|
||||
JERRY_DLOG ("%d ", compiled_code_p->num_of_captures);
|
||||
JERRY_DLOG ("%d | ", compiled_code_p->num_of_non_captures);
|
||||
|
||||
uint8_t *bytecode_p = (uint8_t *) (compiled_code_p + 1);
|
||||
|
||||
re_opcode_t op;
|
||||
while ((op = re_get_opcode (&bytecode_p)))
|
||||
@@ -891,4 +919,9 @@ re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p)
|
||||
} /* re_dump_bytecode */
|
||||
#endif /* JERRY_ENABLE_LOG */
|
||||
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/* Copyright 2015 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015 University of Szeged.
|
||||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015-2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -22,6 +22,16 @@
|
||||
#include "ecma-globals.h"
|
||||
#include "re-parser.h"
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_compiler Compiler
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp opcodes
|
||||
*/
|
||||
@@ -31,54 +41,60 @@ typedef enum
|
||||
/* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
|
||||
* Change it carefully. Capture opcodes should be at first.
|
||||
*/
|
||||
RE_OP_CAPTURE_GROUP_START,
|
||||
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_CAPTURE_GREEDY_GROUP_END,
|
||||
RE_OP_CAPTURE_NON_GREEDY_GROUP_END,
|
||||
RE_OP_NON_CAPTURE_GROUP_START,
|
||||
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START,
|
||||
RE_OP_NON_CAPTURE_GREEDY_GROUP_END,
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END,
|
||||
RE_OP_CAPTURE_GROUP_START, /**< group start */
|
||||
RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START, /**< greedy zero group start */
|
||||
RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-greedy zero group start */
|
||||
RE_OP_CAPTURE_GREEDY_GROUP_END, /**< greedy group end */
|
||||
RE_OP_CAPTURE_NON_GREEDY_GROUP_END, /**< non-greedy group end */
|
||||
RE_OP_NON_CAPTURE_GROUP_START, /**< non-capture group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START, /**< non-capture greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START, /**< non-capture non-greedy zero group start */
|
||||
RE_OP_NON_CAPTURE_GREEDY_GROUP_END, /**< non-capture greedy group end */
|
||||
RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END, /**< non-capture non-greedy group end */
|
||||
|
||||
RE_OP_MATCH,
|
||||
RE_OP_CHAR,
|
||||
RE_OP_SAVE_AT_START,
|
||||
RE_OP_SAVE_AND_MATCH,
|
||||
RE_OP_PERIOD,
|
||||
RE_OP_ALTERNATIVE,
|
||||
RE_OP_GREEDY_ITERATOR,
|
||||
RE_OP_NON_GREEDY_ITERATOR,
|
||||
RE_OP_ASSERT_START,
|
||||
RE_OP_ASSERT_END,
|
||||
RE_OP_ASSERT_WORD_BOUNDARY,
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY,
|
||||
RE_OP_LOOKAHEAD_POS,
|
||||
RE_OP_LOOKAHEAD_NEG,
|
||||
RE_OP_BACKREFERENCE,
|
||||
RE_OP_CHAR_CLASS,
|
||||
RE_OP_INV_CHAR_CLASS
|
||||
RE_OP_MATCH, /**< match */
|
||||
RE_OP_CHAR, /**< any character */
|
||||
RE_OP_SAVE_AT_START, /**< save at start */
|
||||
RE_OP_SAVE_AND_MATCH, /**< save and match */
|
||||
RE_OP_PERIOD, /**< . */
|
||||
RE_OP_ALTERNATIVE, /**< | */
|
||||
RE_OP_GREEDY_ITERATOR, /**< greedy iterator */
|
||||
RE_OP_NON_GREEDY_ITERATOR, /**< non-greedy iterator */
|
||||
RE_OP_ASSERT_START, /**< ^ */
|
||||
RE_OP_ASSERT_END, /**< $ */
|
||||
RE_OP_ASSERT_WORD_BOUNDARY, /**< \b */
|
||||
RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< \B */
|
||||
RE_OP_LOOKAHEAD_POS, /**< lookahead pos */
|
||||
RE_OP_LOOKAHEAD_NEG, /**< lookahead neg */
|
||||
RE_OP_BACKREFERENCE, /**< \[0..9] */
|
||||
RE_OP_CHAR_CLASS, /**< [ ] */
|
||||
RE_OP_INV_CHAR_CLASS /**< [^ ] */
|
||||
} re_opcode_t;
|
||||
|
||||
/**
|
||||
* Compiled byte code data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
mem_cpointer_t pattern_cp; /**< original RegExp pattern */
|
||||
uint32_t num_of_captures; /**< number of capturing brackets */
|
||||
uint32_t num_of_non_captures; /**< number of non capturing brackets */
|
||||
} re_compiled_code_t;
|
||||
|
||||
/**
|
||||
* Check if a RegExp opcode is a capture group or not
|
||||
*/
|
||||
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
|
||||
|
||||
/**
|
||||
* Type of bytecode elements
|
||||
*/
|
||||
typedef uint8_t re_bytecode_t;
|
||||
|
||||
/**
|
||||
* Context of RegExp bytecode container
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
re_bytecode_t *block_start_p; /**< start of bytecode block */
|
||||
re_bytecode_t *block_end_p; /**< end of bytecode block */
|
||||
re_bytecode_t *current_p; /**< current position in bytecode */
|
||||
uint8_t *block_start_p; /**< start of bytecode block */
|
||||
uint8_t *block_end_p; /**< end of bytecode block */
|
||||
uint8_t *current_p; /**< current position in bytecode */
|
||||
} re_bytecode_ctx_t;
|
||||
|
||||
/**
|
||||
@@ -86,23 +102,29 @@ typedef struct
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint8_t flags; /**< RegExp flags */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t highest_backref; /**< highest backreference */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t highest_backref; /**< highest backreference */
|
||||
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
|
||||
re_token_t current_token; /**< current token */
|
||||
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
|
||||
re_token_t current_token; /**< current token */
|
||||
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */
|
||||
} re_compiler_ctx_t;
|
||||
|
||||
ecma_completion_value_t
|
||||
re_compile_bytecode (re_bytecode_t **, ecma_string_t *, uint8_t);
|
||||
re_compile_bytecode (re_compiled_code_t **, ecma_string_t *, uint16_t);
|
||||
|
||||
re_opcode_t
|
||||
re_get_opcode (re_bytecode_t **);
|
||||
re_get_opcode (uint8_t **);
|
||||
|
||||
uint32_t
|
||||
re_get_value (re_bytecode_t **);
|
||||
re_get_value (uint8_t **);
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* RE_COMPILER_H */
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !RE_COMPILER_H */
|
||||
|
||||
@@ -24,6 +24,16 @@
|
||||
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_parser Parser
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lookup a character in the input string.
|
||||
*
|
||||
@@ -894,4 +904,10 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
return ret_value;
|
||||
} /* re_parse_next_token */
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/* Copyright 2015 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015 University of Szeged.
|
||||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015-2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -19,49 +19,65 @@
|
||||
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
|
||||
#include "opcodes-dumper.h"
|
||||
/** \addtogroup parser Parser
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser Regular expression
|
||||
* @{
|
||||
*
|
||||
* \addtogroup regexparser_bytecode Bytecode
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp token type definitions
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_TOK_EOF, /* EOF */
|
||||
RE_TOK_BACKREFERENCE, /* \[0..9] */
|
||||
RE_TOK_CHAR, /* any character */
|
||||
RE_TOK_ALTERNATIVE, /* | */
|
||||
RE_TOK_ASSERT_START, /* ^ */
|
||||
RE_TOK_ASSERT_END, /* $ */
|
||||
RE_TOK_PERIOD, /* . */
|
||||
RE_TOK_START_CAPTURE_GROUP, /* ( */
|
||||
RE_TOK_START_NON_CAPTURE_GROUP, /* (?: */
|
||||
RE_TOK_END_GROUP, /* ')' */
|
||||
RE_TOK_EOF, /* EOF */
|
||||
RE_TOK_BACKREFERENCE, /* \[0..9] */
|
||||
RE_TOK_CHAR, /* any character */
|
||||
RE_TOK_ALTERNATIVE, /* | */
|
||||
RE_TOK_ASSERT_START, /* ^ */
|
||||
RE_TOK_ASSERT_END, /* $ */
|
||||
RE_TOK_PERIOD, /* . */
|
||||
RE_TOK_START_CAPTURE_GROUP, /* ( */
|
||||
RE_TOK_START_NON_CAPTURE_GROUP, /* (?: */
|
||||
RE_TOK_END_GROUP, /* ')' */
|
||||
RE_TOK_ASSERT_START_POS_LOOKAHEAD, /* (?= */
|
||||
RE_TOK_ASSERT_START_NEG_LOOKAHEAD, /* (?! */
|
||||
RE_TOK_ASSERT_WORD_BOUNDARY, /* \b */
|
||||
RE_TOK_ASSERT_NOT_WORD_BOUNDARY, /* \B */
|
||||
RE_TOK_DIGIT, /* \d */
|
||||
RE_TOK_NOT_DIGIT, /* \D */
|
||||
RE_TOK_WHITE, /* \s */
|
||||
RE_TOK_NOT_WHITE, /* \S */
|
||||
RE_TOK_WORD_CHAR, /* \w */
|
||||
RE_TOK_NOT_WORD_CHAR, /* \W */
|
||||
RE_TOK_START_CHAR_CLASS, /* [ ] */
|
||||
RE_TOK_START_INV_CHAR_CLASS, /* [^ ] */
|
||||
RE_TOK_ASSERT_WORD_BOUNDARY, /* \b */
|
||||
RE_TOK_ASSERT_NOT_WORD_BOUNDARY, /* \B */
|
||||
RE_TOK_DIGIT, /* \d */
|
||||
RE_TOK_NOT_DIGIT, /* \D */
|
||||
RE_TOK_WHITE, /* \s */
|
||||
RE_TOK_NOT_WHITE, /* \S */
|
||||
RE_TOK_WORD_CHAR, /* \w */
|
||||
RE_TOK_NOT_WORD_CHAR, /* \W */
|
||||
RE_TOK_START_CHAR_CLASS, /* [ ] */
|
||||
RE_TOK_START_INV_CHAR_CLASS, /* [^ ] */
|
||||
} re_token_type_t;
|
||||
|
||||
/**
|
||||
* RegExp constant of infinite
|
||||
*/
|
||||
* @}
|
||||
*
|
||||
* \addtogroup regexparser_parser Parser
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* RegExp constant of infinite
|
||||
*/
|
||||
#define RE_ITERATOR_INFINITE ((uint32_t)-1)
|
||||
|
||||
/**
|
||||
* Maximum number of decimal escape digits
|
||||
*/
|
||||
* Maximum number of decimal escape digits
|
||||
*/
|
||||
#define RE_MAX_RE_DECESC_DIGITS 9
|
||||
|
||||
/**
|
||||
* Undefined character (out of the range of the codeunit)
|
||||
*/
|
||||
* Undefined character (out of the range of the codeunit)
|
||||
*/
|
||||
#define RE_CHAR_UNDEF 0xFFFFFFFF
|
||||
|
||||
/**
|
||||
@@ -69,11 +85,11 @@ typedef enum
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
re_token_type_t type; /**< type of the token */
|
||||
uint32_t value; /**< value of the token */
|
||||
uint32_t qmin; /**< minimum number of token iterations */
|
||||
uint32_t qmax; /**< maximum number of token iterations */
|
||||
bool greedy; /**< type of iteration */
|
||||
re_token_type_t type; /**< type of the token */
|
||||
uint32_t value; /**< value of the token */
|
||||
uint32_t qmin; /**< minimum number of token iterations */
|
||||
uint32_t qmax; /**< maximum number of token iterations */
|
||||
bool greedy; /**< type of iteration */
|
||||
} re_token_t;
|
||||
|
||||
/**
|
||||
@@ -82,10 +98,10 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
lit_utf8_byte_t *input_start_p; /**< start of input pattern */
|
||||
lit_utf8_byte_t *input_curr_p; /**< current position in input pattern */
|
||||
lit_utf8_byte_t *input_end_p; /**< end of input pattern */
|
||||
int num_of_groups; /**< number of groups */
|
||||
uint32_t num_of_classes; /**< number of character classes */
|
||||
lit_utf8_byte_t *input_curr_p; /**< current position in input pattern */
|
||||
lit_utf8_byte_t *input_end_p; /**< end of input pattern */
|
||||
int num_of_groups; /**< number of groups */
|
||||
uint32_t num_of_classes; /**< number of character classes */
|
||||
} re_parser_ctx_t;
|
||||
|
||||
typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end);
|
||||
@@ -96,5 +112,10 @@ re_parse_char_class (re_parser_ctx_t *, re_char_class_callback, void *, re_token
|
||||
ecma_completion_value_t
|
||||
re_parse_next_token (re_parser_ctx_t *, re_token_t *);
|
||||
|
||||
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* RE_PARSER_H */
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
|
||||
#endif /* !RE_PARSER_H */
|
||||
|
||||
Reference in New Issue
Block a user