Optimize string concatenation. (#2141)

This patch adds two new string concatenation functions:
ecma_append_chars_to_string and ecma_append_magic_string_to_string

The former appends a cesu8 byte array and the latter appends a magic string
to the end of an ecma-string. These two free (dereference) their ecma-string
argument, and this change is also applied to the original ecma_concat_ecma_strings
function which simplifies string handling in most cases.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg
2017-12-19 13:45:48 +01:00
committed by GitHub
parent bd574956e3
commit a2d3ea61eb
16 changed files with 369 additions and 555 deletions
+171 -108
View File
@@ -480,34 +480,33 @@ ecma_new_ecma_length_string (void)
} /* ecma_new_ecma_length_string */
/**
* Concatenate ecma-strings
* Append a cesu8 string after an ecma-string
*
* @return concatenation of two ecma-strings
* Note:
* The string1_p argument is freed. If it needs to be preserved,
* call ecma_ref_ecma_string with string1_p before the call.
*
* @return concatenation of an ecma-string and a cesu8 string
*/
ecma_string_t *
ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
ecma_string_t *string2_p) /**< second ecma-string */
ecma_append_chars_to_string (ecma_string_t *string1_p, /**< base ecma-string */
const lit_utf8_byte_t *cesu8_string2_p, /**< characters to be appended */
lit_utf8_size_t cesu8_string2_size, /**< byte size of cesu8_string2_p */
lit_utf8_size_t cesu8_string2_length) /**< character length of cesu8_string2_p */
{
JERRY_ASSERT (string1_p != NULL
&& string2_p != NULL);
JERRY_ASSERT (string1_p != NULL && cesu8_string2_size > 0 && cesu8_string2_length > 0);
if (ecma_string_is_empty (string1_p))
if (unlikely (ecma_string_is_empty (string1_p)))
{
ecma_ref_ecma_string (string2_p);
return string2_p;
}
else if (ecma_string_is_empty (string2_p))
{
ecma_ref_ecma_string (string1_p);
return string1_p;
ecma_deref_ecma_string (string1_p);
return ecma_new_ecma_string_from_utf8 (cesu8_string2_p, cesu8_string2_size);
}
const lit_utf8_byte_t *utf8_string1_p, *utf8_string2_p;
lit_utf8_size_t utf8_string1_size, utf8_string2_size;
lit_utf8_size_t utf8_string1_length, utf8_string2_length;
const lit_utf8_byte_t *cesu8_string1_p;
lit_utf8_size_t cesu8_string1_size;
lit_utf8_size_t cesu8_string1_length;
lit_utf8_byte_t uint32_to_string_buffer1[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
lit_utf8_byte_t uint32_to_string_buffer2[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
bool string1_is_uint32 = false;
bool string1_rehash_needed = false;
@@ -516,36 +515,36 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
utf8_string1_p = (lit_utf8_byte_t *) (string1_p + 1);
utf8_string1_size = string1_p->u.utf8_string.size;
utf8_string1_length = string1_p->u.utf8_string.length;
cesu8_string1_p = (lit_utf8_byte_t *) (string1_p + 1);
cesu8_string1_size = string1_p->u.utf8_string.size;
cesu8_string1_length = string1_p->u.utf8_string.length;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string1_p;
utf8_string1_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
utf8_string1_size = string1_p->u.long_utf8_string_size;
utf8_string1_length = long_string_desc_p->long_utf8_string_length;
cesu8_string1_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
cesu8_string1_size = string1_p->u.long_utf8_string_size;
cesu8_string1_length = long_string_desc_p->long_utf8_string_length;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
uint32_to_string_buffer1,
ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
utf8_string1_p = uint32_to_string_buffer1;
utf8_string1_length = utf8_string1_size;
cesu8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
uint32_to_string_buffer,
ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
cesu8_string1_p = uint32_to_string_buffer;
cesu8_string1_length = cesu8_string1_size;
string1_is_uint32 = true;
string1_rehash_needed = true;
break;
}
case ECMA_STRING_CONTAINER_MAGIC_STRING:
{
utf8_string1_p = lit_get_magic_string_utf8 (string1_p->u.magic_string_id);
utf8_string1_size = lit_get_magic_string_size (string1_p->u.magic_string_id);
utf8_string1_length = utf8_string1_size;
cesu8_string1_p = lit_get_magic_string_utf8 (string1_p->u.magic_string_id);
cesu8_string1_size = lit_get_magic_string_size (string1_p->u.magic_string_id);
cesu8_string1_length = cesu8_string1_size;
string1_rehash_needed = true;
break;
}
@@ -553,93 +552,48 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
{
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
utf8_string1_p = lit_get_magic_string_ex_utf8 (string1_p->u.magic_string_id);
utf8_string1_size = lit_get_magic_string_ex_size (string1_p->u.magic_string_id);
utf8_string1_length = lit_utf8_string_length (utf8_string1_p, utf8_string1_size);
cesu8_string1_p = lit_get_magic_string_ex_utf8 (string1_p->u.magic_string_id);
cesu8_string1_size = lit_get_magic_string_ex_size (string1_p->u.magic_string_id);
cesu8_string1_length = lit_utf8_string_length (cesu8_string1_p, cesu8_string1_size);
string1_rehash_needed = true;
break;
}
}
switch (ECMA_STRING_GET_CONTAINER (string2_p))
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
utf8_string2_p = (lit_utf8_byte_t *) (string2_p + 1);
utf8_string2_size = string2_p->u.utf8_string.size;
utf8_string2_length = string2_p->u.utf8_string.length;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string2_p;
JERRY_ASSERT (cesu8_string1_length > 0);
JERRY_ASSERT (cesu8_string1_length <= cesu8_string1_size);
utf8_string2_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
utf8_string2_size = string2_p->u.long_utf8_string_size;
utf8_string2_length = long_string_desc_p->long_utf8_string_length;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
uint32_to_string_buffer2,
ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
utf8_string2_p = uint32_to_string_buffer2;
utf8_string2_length = utf8_string2_size;
break;
}
case ECMA_STRING_CONTAINER_MAGIC_STRING:
{
utf8_string2_p = lit_get_magic_string_utf8 (string2_p->u.magic_string_id);
utf8_string2_size = lit_get_magic_string_size (string2_p->u.magic_string_id);
utf8_string2_length = utf8_string2_size;
break;
}
default:
{
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
lit_utf8_size_t new_size = cesu8_string1_size + cesu8_string2_size;
utf8_string2_p = lit_get_magic_string_ex_utf8 (string2_p->u.magic_string_id);
utf8_string2_size = lit_get_magic_string_ex_size (string2_p->u.magic_string_id);
utf8_string2_length = lit_utf8_string_length (utf8_string2_p, utf8_string2_size);
break;
}
}
JERRY_ASSERT (utf8_string1_size > 0);
JERRY_ASSERT (utf8_string2_size > 0);
JERRY_ASSERT (utf8_string1_length <= utf8_string1_size);
JERRY_ASSERT (utf8_string2_length <= utf8_string2_size);
lit_utf8_size_t new_size = utf8_string1_size + utf8_string2_size;
/* It is impossible to allocate this large string. */
if (new_size < (utf8_string1_size | utf8_string2_size))
/* Poor man's carry flag check: it is impossible to allocate this large string. */
if (new_size < (cesu8_string1_size | cesu8_string2_size))
{
jerry_fatal (ERR_OUT_OF_MEMORY);
}
lit_magic_string_id_t magic_string_id;
magic_string_id = lit_is_utf8_string_pair_magic (utf8_string1_p,
utf8_string1_size,
utf8_string2_p,
utf8_string2_size);
magic_string_id = lit_is_utf8_string_pair_magic (cesu8_string1_p,
cesu8_string1_size,
cesu8_string2_p,
cesu8_string2_size);
if (magic_string_id != LIT_MAGIC_STRING__COUNT)
{
ecma_deref_ecma_string (string1_p);
return ecma_get_magic_string (magic_string_id);
}
if (string1_is_uint32 && new_size <= ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
{
memcpy (uint32_to_string_buffer1 + utf8_string1_size,
utf8_string2_p,
utf8_string2_size);
memcpy (uint32_to_string_buffer + cesu8_string1_size,
cesu8_string2_p,
cesu8_string2_size);
uint32_t array_index;
if (ecma_string_to_array_index (uint32_to_string_buffer1, new_size, &array_index))
if (ecma_string_to_array_index (uint32_to_string_buffer, new_size, &array_index))
{
ecma_deref_ecma_string (string1_p);
return ecma_new_ecma_string_from_uint32 (array_index);
}
}
@@ -647,13 +601,14 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
if (lit_get_magic_string_ex_count () > 0)
{
lit_magic_string_ex_id_t magic_string_ex_id;
magic_string_ex_id = lit_is_ex_utf8_string_pair_magic (utf8_string1_p,
utf8_string1_size,
utf8_string2_p,
utf8_string2_size);
magic_string_ex_id = lit_is_ex_utf8_string_pair_magic (cesu8_string1_p,
cesu8_string1_size,
cesu8_string2_p,
cesu8_string2_size);
if (magic_string_ex_id < lit_get_magic_string_ex_count ())
{
ecma_deref_ecma_string (string1_p);
return ecma_get_magic_string_ex (magic_string_ex_id);
}
}
@@ -668,7 +623,7 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_uint32_field = 0;
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
string_desc_p->u.utf8_string.length = (uint16_t) (cesu8_string1_length + cesu8_string2_length);
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
}
@@ -681,7 +636,7 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
string_desc_p->u.long_utf8_string_size = new_size;
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
long_string_desc_p->long_utf8_string_length = utf8_string1_length + utf8_string2_length;
long_string_desc_p->long_utf8_string_length = cesu8_string1_length + cesu8_string2_length;
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
}
@@ -690,16 +645,124 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
if (string1_rehash_needed)
{
hash_start = lit_utf8_string_calc_hash (utf8_string1_p, utf8_string1_size);
hash_start = lit_utf8_string_calc_hash (cesu8_string1_p, cesu8_string1_size);
}
string_desc_p->hash = lit_utf8_string_hash_combine (hash_start, utf8_string2_p, utf8_string2_size);
string_desc_p->hash = lit_utf8_string_hash_combine (hash_start, cesu8_string2_p, cesu8_string2_size);
memcpy (data_p, utf8_string1_p, utf8_string1_size);
memcpy (data_p + utf8_string1_size, utf8_string2_p, utf8_string2_size);
memcpy (data_p, cesu8_string1_p, cesu8_string1_size);
memcpy (data_p + cesu8_string1_size, cesu8_string2_p, cesu8_string2_size);
ecma_deref_ecma_string (string1_p);
return string_desc_p;
} /* ecma_append_chars_to_string */
/**
* Concatenate ecma-strings
*
* Note:
* The string1_p argument is freed. If it needs to be preserved,
* call ecma_ref_ecma_string with string1_p before the call.
*
* @return concatenation of two ecma-strings
*/
ecma_string_t *
ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
ecma_string_t *string2_p) /**< second ecma-string */
{
JERRY_ASSERT (string1_p != NULL
&& string2_p != NULL);
if (unlikely (ecma_string_is_empty (string1_p)))
{
ecma_deref_ecma_string (string1_p);
ecma_ref_ecma_string (string2_p);
return string2_p;
}
else if (unlikely (ecma_string_is_empty (string2_p)))
{
return string1_p;
}
const lit_utf8_byte_t *cesu8_string2_p;
lit_utf8_size_t cesu8_string2_size;
lit_utf8_size_t cesu8_string2_length;
lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
switch (ECMA_STRING_GET_CONTAINER (string2_p))
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
cesu8_string2_p = (lit_utf8_byte_t *) (string2_p + 1);
cesu8_string2_size = string2_p->u.utf8_string.size;
cesu8_string2_length = string2_p->u.utf8_string.length;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string2_p;
cesu8_string2_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
cesu8_string2_size = string2_p->u.long_utf8_string_size;
cesu8_string2_length = long_string_desc_p->long_utf8_string_length;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
cesu8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
uint32_to_string_buffer,
ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
cesu8_string2_p = uint32_to_string_buffer;
cesu8_string2_length = cesu8_string2_size;
break;
}
case ECMA_STRING_CONTAINER_MAGIC_STRING:
{
cesu8_string2_p = lit_get_magic_string_utf8 (string2_p->u.magic_string_id);
cesu8_string2_size = lit_get_magic_string_size (string2_p->u.magic_string_id);
cesu8_string2_length = cesu8_string2_size;
break;
}
default:
{
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
cesu8_string2_p = lit_get_magic_string_ex_utf8 (string2_p->u.magic_string_id);
cesu8_string2_size = lit_get_magic_string_ex_size (string2_p->u.magic_string_id);
cesu8_string2_length = lit_utf8_string_length (cesu8_string2_p, cesu8_string2_size);
break;
}
}
return ecma_append_chars_to_string (string1_p, cesu8_string2_p, cesu8_string2_size, cesu8_string2_length);
} /* ecma_concat_ecma_strings */
/**
* Append a magic string after an ecma-string
*
* Note:
* The string1_p argument is freed. If it needs to be preserved,
* call ecma_ref_ecma_string with string1_p before the call.
*
* @return concatenation of an ecma-string and a magic string
*/
ecma_string_t *
ecma_append_magic_string_to_string (ecma_string_t *string1_p,
lit_magic_string_id_t string2_id)
{
if (unlikely (ecma_string_is_empty (string1_p)))
{
ecma_deref_ecma_string (string1_p);
return ecma_new_ecma_string_from_magic_string_id (string2_id);
}
const lit_utf8_byte_t *cesu8_string2_p = lit_get_magic_string_utf8 (string2_id);
lit_utf8_size_t cesu8_string2_size = lit_get_magic_string_size (string2_id);
return ecma_append_chars_to_string (string1_p, cesu8_string2_p, cesu8_string2_size, cesu8_string2_size);
} /* ecma_append_magic_string_to_string */
/**
* Increase reference counter of ecma-string.
*/
@@ -1337,8 +1400,8 @@ inline bool __attr_always_inline___
ecma_compare_ecma_string_to_magic_id (const ecma_string_t *string_p, /**< property name */
lit_magic_string_id_t id) /**< magic string id */
{
return (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING
&& string_p->u.magic_string_id == id);
return (string_p->u.magic_string_id == id
&& ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING);
} /* ecma_compare_ecma_string_to_magic_id */
/**
+5
View File
@@ -173,7 +173,12 @@ ecma_string_t *ecma_new_ecma_string_from_number (ecma_number_t num);
ecma_string_t *ecma_new_ecma_string_from_magic_string_id (lit_magic_string_id_t id);
ecma_string_t *ecma_new_ecma_string_from_magic_string_ex_id (lit_magic_string_ex_id_t id);
ecma_string_t *ecma_new_ecma_length_string (void);
ecma_string_t *ecma_append_chars_to_string (ecma_string_t *string1_p,
const lit_utf8_byte_t *cesu8_string2_p,
lit_utf8_size_t cesu8_string2_size,
lit_utf8_size_t cesu8_string2_length);
ecma_string_t *ecma_concat_ecma_strings (ecma_string_t *string1_p, ecma_string_t *string2_p);
ecma_string_t *ecma_append_magic_string_to_string (ecma_string_t *string1_p, lit_magic_string_id_t string2_id);
void ecma_ref_ecma_string (ecma_string_t *string_p);
void ecma_deref_ecma_string (ecma_string_t *string_p);
ecma_number_t ecma_string_to_number (const ecma_string_t *str_p);