Add an API function to calculate the UTF-8 encoded string length from Jerry string. (#1460)

JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
Robert Sipka
2016-12-01 10:51:23 +01:00
committed by GitHub
parent 958344ee16
commit 23cf7fd177
8 changed files with 160 additions and 11 deletions
@@ -1479,6 +1479,57 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
}
} /* ecma_string_get_length */
/**
* Get length of UTF-8 encoded string length from ecma-string
*
* @return number of characters in the UTF-8 encoded string
*/
ecma_length_t
ecma_string_get_utf8_length (const ecma_string_t *string_p) /**< ecma-string */
{
switch (ECMA_STRING_GET_CONTAINER (string_p))
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
if (string_p->u.utf8_string.size == (lit_utf8_size_t) string_p->u.utf8_string.length)
{
return (ecma_length_t) (string_p->u.utf8_string.length);
}
return lit_get_utf8_length_of_cesu8_string ((const lit_utf8_byte_t *) (string_p + 1),
(lit_utf8_size_t) string_p->u.utf8_string.size);
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p;
if (string_p->u.long_utf8_string_size == (lit_utf8_size_t) long_string_p->long_utf8_string_length)
{
return (ecma_length_t) (long_string_p->long_utf8_string_length);
}
return lit_get_utf8_length_of_cesu8_string ((const lit_utf8_byte_t *) (string_p + 1),
(lit_utf8_size_t) string_p->u.long_utf8_string_size);
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
return ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
}
case ECMA_STRING_CONTAINER_MAGIC_STRING:
{
JERRY_ASSERT (ECMA_STRING_IS_ASCII (lit_get_magic_string_utf8 (string_p->u.magic_string_id),
lit_get_magic_string_size (string_p->u.magic_string_id)));
return lit_get_magic_string_size (string_p->u.magic_string_id);
}
default:
{
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
return lit_get_utf8_length_of_cesu8_string (lit_get_magic_string_ex_utf8 (string_p->u.magic_string_ex_id),
lit_get_magic_string_ex_size (string_p->u.magic_string_ex_id));
}
}
} /* ecma_string_get_utf8_length */
/**
* Get size of ecma-string
*
+1
View File
@@ -195,6 +195,7 @@ extern bool ecma_string_compare_to_property_name (ecma_property_t, jmem_cpointer
extern bool ecma_compare_ecma_strings (const ecma_string_t *, const ecma_string_t *);
extern bool ecma_compare_ecma_strings_relational (const ecma_string_t *, const ecma_string_t *);
extern ecma_length_t ecma_string_get_length (const ecma_string_t *);
extern ecma_length_t ecma_string_get_utf8_length (const ecma_string_t *);
extern lit_utf8_size_t ecma_string_get_size (const ecma_string_t *);
extern lit_utf8_size_t ecma_string_get_utf8_size (const ecma_string_t *);
extern ecma_char_t ecma_string_get_char_at_pos (const ecma_string_t *, ecma_length_t);
+1
View File
@@ -210,6 +210,7 @@ double jerry_get_number_value (const jerry_value_t);
jerry_size_t jerry_get_string_size (const jerry_value_t);
jerry_size_t jerry_get_utf8_string_size (const jerry_value_t);
jerry_length_t jerry_get_string_length (const jerry_value_t);
jerry_length_t jerry_get_utf8_string_length (const jerry_value_t);
jerry_size_t jerry_string_to_char_buffer (const jerry_value_t, jerry_char_t *, jerry_size_t);
/**
+21
View File
@@ -1086,6 +1086,27 @@ jerry_get_string_length (const jerry_value_t value) /**< input string */
return ecma_string_get_length (ecma_get_string_from_value (value));
} /* jerry_get_string_length */
/**
* Get UTF-8 string length from Jerry string
*
* Note:
* Returns 0, if the value parameter is not a string.
*
* @return number of characters in the string
*/
jerry_length_t
jerry_get_utf8_string_length (const jerry_value_t value) /**< input string */
{
jerry_assert_api_available ();
if (!ecma_is_value_string (value))
{
return 0;
}
return ecma_string_get_utf8_length (ecma_get_string_from_value (value));
} /* jerry_get_utf8_string_length */
/**
* Copy the characters of a string into a specified buffer.
*
+36 -8
View File
@@ -292,22 +292,19 @@ lit_get_utf8_size_of_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, /**< cesu
{
lit_utf8_size_t offset = 0;
lit_utf8_size_t utf8_buf_size = cesu8_buf_size;
ecma_char_t prev_ch = 0;
while (offset < cesu8_buf_size)
{
ecma_char_t ch;
offset += lit_read_code_unit_from_utf8 (cesu8_buf_p + offset, &ch);
if (lit_is_code_point_utf16_high_surrogate (ch) && (offset < cesu8_buf_size))
if (lit_is_code_point_utf16_low_surrogate (ch) && lit_is_code_point_utf16_high_surrogate (prev_ch))
{
ecma_char_t next_ch;
offset += lit_read_code_unit_from_utf8 (cesu8_buf_p + offset, &next_ch);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
utf8_buf_size -= 2;
}
utf8_buf_size -= 2;
}
prev_ch = ch;
}
JERRY_ASSERT (offset == cesu8_buf_size);
@@ -315,6 +312,37 @@ lit_get_utf8_size_of_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, /**< cesu
return utf8_buf_size;
} /* lit_get_utf8_size_of_cesu8_string */
/**
* Calculate length of an utf-8 encoded string from cesu-8 encoded string
*
* @return length of an utf-8 encoded string
*/
ecma_length_t
lit_get_utf8_length_of_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, /**< cesu-8 string */
lit_utf8_size_t cesu8_buf_size) /**< string size */
{
lit_utf8_size_t offset = 0;
ecma_length_t utf8_length = 0;
ecma_char_t prev_ch = 0;
while (offset < cesu8_buf_size)
{
ecma_char_t ch;
offset += lit_read_code_unit_from_utf8 (cesu8_buf_p + offset, &ch);
if (!lit_is_code_point_utf16_low_surrogate (ch) || !lit_is_code_point_utf16_high_surrogate (prev_ch))
{
utf8_length++;
}
prev_ch = ch;
}
JERRY_ASSERT (offset == cesu8_buf_size);
return utf8_length;
} /* lit_get_utf8_length_of_cesu8_string */
/**
* Decodes a unicode code point from non-empty utf-8-encoded buffer
*
+1
View File
@@ -99,6 +99,7 @@ lit_utf8_size_t lit_get_utf8_size_of_cesu8_string (const lit_utf8_byte_t *, lit_
/* length */
ecma_length_t lit_utf8_string_length (const lit_utf8_byte_t *, lit_utf8_size_t);
ecma_length_t lit_get_utf8_length_of_cesu8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
/* hash */
lit_string_hash_t lit_utf8_string_calc_hash (const lit_utf8_byte_t *, lit_utf8_size_t);