Copy the characters of an UTF-8 encoded substring into a specified buffer (#1524)

JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
Robert Sipka
2017-01-24 15:04:48 +01:00
committed by László Langó
parent 976c8aee80
commit 124582793f
6 changed files with 264 additions and 2 deletions
+132 -1
View File
@@ -980,7 +980,7 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
} /* ecma_string_copy_to_utf8_buffer */
/**
* Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string beetween the specified
* Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string between the specified
* start position and the end position (or the end of the string, whichever comes first), and copy these characters
* into the buffer.
*
@@ -1059,6 +1059,137 @@ ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ec
return size;
} /* ecma_substring_copy_to_cesu8_buffer */
/**
* Convert ecma-string's contents to an utf-8 string, extract the parts of the converted string between the specified
* start position and the end position (or the end of the string, whichever comes first), and copy these characters
* into the buffer.
*
* @return number of bytes, actually copied to the buffer.
*/
lit_utf8_size_t
ecma_substring_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
ecma_length_t start_pos, /**< position of the first character */
ecma_length_t end_pos, /**< position of the last character */
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t buffer_size) /**< size of buffer */
{
JERRY_ASSERT (string_desc_p != NULL);
JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
lit_utf8_size_t size = 0;
ecma_length_t utf8_str_length = ecma_string_get_utf8_length (string_desc_p);
if (start_pos >= utf8_str_length || start_pos >= end_pos)
{
return 0;
}
if (end_pos > utf8_str_length)
{
end_pos = utf8_str_length;
}
ECMA_STRING_TO_UTF8_STRING (string_desc_p, cesu8_str_p, cesu8_str_size);
ecma_length_t cesu8_str_length = ecma_string_get_length (string_desc_p);
if (cesu8_str_length == cesu8_str_size)
{
cesu8_str_p += start_pos;
size = end_pos - start_pos;
if (size > buffer_size)
{
size = buffer_size;
}
memcpy (buffer_p, cesu8_str_p, size);
}
else
{
const lit_utf8_byte_t *cesu8_end_pos = cesu8_str_p + cesu8_str_size;
end_pos -= start_pos;
while (start_pos--)
{
ecma_char_t ch;
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &ch);
cesu8_str_p += code_unit_size;
if ((cesu8_str_p != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
{
ecma_char_t next_ch;
lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &next_ch);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
JERRY_ASSERT (code_unit_size == next_ch_size);
cesu8_str_p += code_unit_size;
}
}
}
const lit_utf8_byte_t *cesu8_pos = cesu8_str_p;
lit_utf8_byte_t *utf8_pos = buffer_p;
lit_utf8_byte_t *utf8_end_pos = buffer_p + buffer_size;
while (end_pos--)
{
ecma_char_t ch;
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
if ((size + code_unit_size) > buffer_size)
{
break;
}
if (((cesu8_pos + code_unit_size) != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
{
ecma_char_t next_ch;
lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_pos + code_unit_size, &next_ch);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
JERRY_ASSERT (code_unit_size == next_ch_size);
if ((size + code_unit_size + 1) > buffer_size)
{
break;
}
cesu8_pos += next_ch_size;
lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (ch, next_ch);
lit_code_point_to_utf8 (code_point, utf8_pos);
size += (code_unit_size + 1);
}
else
{
memcpy (utf8_pos, cesu8_pos, code_unit_size);
size += code_unit_size;
}
}
else
{
memcpy (utf8_pos, cesu8_pos, code_unit_size);
size += code_unit_size;
}
utf8_pos = buffer_p + size;
cesu8_pos += code_unit_size;
}
JERRY_ASSERT (utf8_pos <= utf8_end_pos);
}
ECMA_FINALIZE_UTF8_STRING (cesu8_str_p, cesu8_str_size);
JERRY_ASSERT (size <= buffer_size);
return size;
} /* ecma_substring_copy_to_utf8_buffer */
/**
* Convert ecma-string's contents to a cesu-8 string and put it to the buffer.
* It is the caller's responsibility to make sure that the string fits in the buffer.
+6
View File
@@ -191,6 +191,12 @@ ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p,
ecma_length_t end_pos,
lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);
lit_utf8_size_t
ecma_substring_copy_to_utf8_buffer (const ecma_string_t *string_desc_p,
ecma_length_t start_pos,
ecma_length_t end_pos,
lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);
void ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);
const lit_utf8_byte_t *ecma_string_raw_chars (const ecma_string_t *string_p, lit_utf8_size_t *size_p, bool *is_ascii_p);
+5
View File
@@ -237,6 +237,11 @@ jerry_size_t jerry_substring_to_char_buffer (const jerry_value_t value,
jerry_length_t end_pos,
jerry_char_t *buffer_p,
jerry_size_t buffer_size);
jerry_size_t jerry_substring_to_utf8_char_buffer (const jerry_value_t value,
jerry_length_t start_pos,
jerry_length_t end_pos,
jerry_char_t *buffer_p,
jerry_size_t buffer_size);
/**
* Functions for array object values
+34
View File
@@ -1233,6 +1233,40 @@ jerry_substring_to_char_buffer (const jerry_value_t value, /**< input string val
buffer_size);
} /* jerry_substring_to_char_buffer */
/**
* Copy the characters of an utf-8 encoded substring into a specified buffer.
*
* Note:
* The '\0' character could occur anywhere in the returned string
* Returns 0, if the value parameter is not a string.
* It will extract the substring beetween the specified start position
* and the end position (or the end of the string, whichever comes first).
*
* @return number of bytes copied to the buffer.
*/
jerry_size_t
jerry_substring_to_utf8_char_buffer (const jerry_value_t value, /**< input string value */
jerry_length_t start_pos, /**< position of the first character */
jerry_length_t end_pos, /**< position of the last character */
jerry_char_t *buffer_p, /**< [out] output characters buffer */
jerry_size_t buffer_size) /**< size of output buffer */
{
jerry_assert_api_available ();
if (!ecma_is_value_string (value) || buffer_p == NULL)
{
return 0;
}
ecma_string_t *str_p = ecma_get_string_from_value (value);
return ecma_substring_copy_to_utf8_buffer (str_p,
start_pos,
end_pos,
(lit_utf8_byte_t *) buffer_p,
buffer_size);
} /* jerry_substring_to_utf8_char_buffer */
/**
* Checks whether the object or it's prototype objects have the given property.
*