Few improvements for RegExp

Added eviction mechanism to RegExp cache and small
refactoring. Fixed a bug when logging is enabled.

Related issue: #927

JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
László Langó
2016-03-09 09:55:26 +01:00
parent 7a07e55411
commit 6f536c7942
2 changed files with 101 additions and 88 deletions
+49 -33
View File
@@ -445,23 +445,23 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
} /* re_parse_alternative */ } /* re_parse_alternative */
static const re_compiled_code_t *re_cache[RE_CACHE_SIZE]; static const re_compiled_code_t *re_cache[RE_CACHE_SIZE];
static uint8_t re_cache_idx = RE_CACHE_SIZE;
/** /**
* Search for the given pattern in the RegExp cache * Search for the given pattern in the RegExp cache
* *
* @return compiled bytecode - if found * @return index of bytecode in cache - if found
* NULL - otherwise * RE_CACHE_SIZE - otherwise
*/ */
const re_compiled_code_t * static uint8_t
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */ re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
uint16_t flags, /**< flags */ uint16_t flags) /**< flags */
uint32_t *idx) /**< [out] index */
{ {
uint32_t free_idx = RE_CACHE_SIZE; uint8_t free_idx = RE_CACHE_SIZE;
for (*idx = 0u; *idx < RE_CACHE_SIZE; (*idx)++) for (uint8_t idx = 0u; idx < RE_CACHE_SIZE; idx++)
{ {
const re_compiled_code_t *cached_bytecode_p = re_cache[*idx]; const re_compiled_code_t *cached_bytecode_p = re_cache[idx];
if (cached_bytecode_p != NULL) if (cached_bytecode_p != NULL)
{ {
@@ -472,19 +472,18 @@ re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
&& ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p)) && ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
{ {
JERRY_DDLOG ("RegExp is found in cache\n"); JERRY_DDLOG ("RegExp is found in cache\n");
return re_cache[*idx]; return idx;
} }
} }
else else
{ {
/* mark as free, so it can be overridden if the cache is full */ /* mark as free, so it can be overridden if the cache is full */
free_idx = *idx; free_idx = idx;
} }
} }
JERRY_DDLOG ("RegExp is NOT found in cache\n"); JERRY_DDLOG ("RegExp is NOT found in cache\n");
*idx = free_idx; return free_idx;
return NULL;
} /* re_find_bytecode_in_cache */ } /* re_find_bytecode_in_cache */
/** /**
@@ -521,6 +520,20 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
uint16_t flags) /**< flags */ uint16_t flags) /**< flags */
{ {
ecma_value_t ret_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY); ecma_value_t ret_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);
uint8_t cache_idx = re_find_bytecode_in_cache (pattern_str_p, flags);
if (cache_idx < RE_CACHE_SIZE)
{
*out_bytecode_p = re_cache[cache_idx];
if (*out_bytecode_p != NULL)
{
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
return ret_value;
}
}
/* not in the RegExp cache, so compile it */
re_compiler_ctx_t re_ctx; re_compiler_ctx_t re_ctx;
re_ctx.flags = flags; re_ctx.flags = flags;
re_ctx.highest_backref = 0; re_ctx.highest_backref = 0;
@@ -533,15 +546,6 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
re_ctx.bytecode_ctx_p = &bc_ctx; re_ctx.bytecode_ctx_p = &bc_ctx;
uint32_t cache_idx;
*out_bytecode_p = re_find_bytecode_in_cache (pattern_str_p, flags, &cache_idx);
if (*out_bytecode_p != NULL)
{
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
}
else
{ /* not in the RegExp cache, so compile it */
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p); lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t); MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
@@ -564,7 +568,7 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
/* 2. Check for invalid backreference */ /* 2. Check for invalid backreference */
if (re_ctx.highest_backref >= re_ctx.num_of_captures) if (re_ctx.highest_backref >= re_ctx.num_of_captures)
{ {
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid backreference.\n")); ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
} }
else else
{ {
@@ -593,30 +597,42 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
if (!ecma_is_value_empty (ret_value)) if (!ecma_is_value_empty (ret_value))
{ {
/* Compilation failed, free bytecode. */ /* Compilation failed, free bytecode. */
JERRY_DDLOG ("RegExp compilation failed!\n");
mem_heap_free_block_size_stored (bc_ctx.block_start_p); mem_heap_free_block_size_stored (bc_ctx.block_start_p);
*out_bytecode_p = NULL; *out_bytecode_p = NULL;
} }
else else
{ {
#ifdef JERRY_ENABLE_LOG
re_dump_bytecode (&bc_ctx);
#endif
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */ /* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
JERRY_ASSERT (bc_ctx.block_start_p != NULL); JERRY_ASSERT (bc_ctx.block_start_p != NULL);
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p; *out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
if (cache_idx < RE_CACHE_SIZE) if (cache_idx == RE_CACHE_SIZE)
{ {
if (re_cache_idx == 0u)
{
re_cache_idx = RE_CACHE_SIZE;
}
const re_compiled_code_t *cached_bytecode_p = re_cache[--re_cache_idx];
JERRY_DDLOG ("RegExp cache is full! Remove the element on idx: %d\n", re_cache_idx);
if (cached_bytecode_p != NULL)
{
ecma_bytecode_deref ((ecma_compiled_code_t *) cached_bytecode_p);
}
cache_idx = re_cache_idx;
}
JERRY_DDLOG ("Insert bytecode into RegExp cache (idx: %d).\n", cache_idx);
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p); ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
re_cache[cache_idx] = *out_bytecode_p; re_cache[cache_idx] = *out_bytecode_p;
} }
else
{
JERRY_DDLOG ("RegExp cache is full! Cannot add new bytecode to it.");
}
}
}
#ifdef JERRY_ENABLE_LOG
re_dump_bytecode (&bc_ctx);
#endif
return ret_value; return ret_value;
} /* re_compile_bytecode */ } /* re_compile_bytecode */
-3
View File
@@ -50,9 +50,6 @@ typedef struct
ecma_value_t ecma_value_t
re_compile_bytecode (const re_compiled_code_t **, ecma_string_t *, uint16_t); re_compile_bytecode (const re_compiled_code_t **, ecma_string_t *, uint16_t);
const re_compiled_code_t *
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, uint16_t flags, uint32_t *idx);
void re_cache_gc_run (); void re_cache_gc_run ();
/** /**