Add jerry_string_to_utf8_char_buffer API function. (#1501)

Copy the characters of a string into a specified utf-8 string buffer.

JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
Robert Sipka
2017-01-05 09:35:31 +01:00
committed by GitHub
parent 8ebbfda996
commit 445ca1d6de
9 changed files with 247 additions and 10 deletions
+55
View File
@@ -778,6 +778,61 @@ lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
}
} /* lit_code_point_to_utf8 */
/**
* Convert cesu-8 string to an utf-8 string and put it into the buffer.
* It is the caller's responsibility to make sure that the string fits in the buffer.
*
* @return number of bytes copied to the buffer.
*/
lit_utf8_size_t
lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string, /**< cesu-8 string */
lit_utf8_size_t cesu8_size, /**< size of cesu-8 string */
lit_utf8_byte_t *utf8_string, /**< destination utf-8 buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t utf8_size) /**< size of utf-8 buffer */
{
const lit_utf8_byte_t *cesu8_pos = cesu8_string;
const lit_utf8_byte_t *cesu8_end_pos = cesu8_string + cesu8_size;
lit_utf8_byte_t *utf8_pos = utf8_string;
lit_utf8_byte_t *utf8_end_pos = utf8_string + utf8_size;
lit_utf8_size_t size = 0;
ecma_char_t prev_ch = 0;
lit_utf8_size_t prev_ch_size = 0;
while (cesu8_pos < cesu8_end_pos)
{
ecma_char_t ch;
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
if (lit_is_code_point_utf16_low_surrogate (ch) && lit_is_code_point_utf16_high_surrogate (prev_ch))
{
JERRY_ASSERT (code_unit_size == prev_ch_size);
utf8_pos -= prev_ch_size;
lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_ch, ch);
lit_code_point_to_utf8 (code_point, utf8_pos);
size++;
}
else
{
memcpy (utf8_pos, cesu8_pos, code_unit_size);
size += code_unit_size;
}
utf8_pos = utf8_string + size;
cesu8_pos += code_unit_size;
prev_ch = ch;
prev_ch_size = code_unit_size;
}
JERRY_ASSERT (cesu8_pos == cesu8_end_pos);
JERRY_ASSERT (utf8_pos <= utf8_end_pos);
return size;
} /* lit_convert_cesu8_string_to_utf8_string */
/**
* Convert surrogate pair to code point
*
+4
View File
@@ -114,6 +114,10 @@ lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t fi
lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t code_unit, lit_utf8_byte_t *buf_p);
lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
lit_utf8_size_t lit_code_point_to_cesu8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
lit_utf8_size_t lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string,
lit_utf8_size_t cesu8_size,
lit_utf8_byte_t *utf8_string,
lit_utf8_size_t utf8_size);
lit_code_point_t lit_convert_surrogate_pair_to_code_point (ecma_char_t high_surrogate, ecma_char_t low_surrogate);
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, lit_utf8_size_t string1_size,