Copy the characters of an UTF-8 encoded substring into a specified buffer (#1524)

JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
Robert Sipka
2017-01-24 15:04:48 +01:00
committed by László Langó
parent 976c8aee80
commit 124582793f
6 changed files with 264 additions and 2 deletions
+52 -1
View File
@@ -1406,7 +1406,7 @@ jerry_string_to_utf8_char_buffer (const jerry_value_t value,
Copy the characters of a cesu-8 encoded substring into a specified buffer. Copy the characters of a cesu-8 encoded substring into a specified buffer.
The '\0' character could occur in character buffer. Returns 0, if the value The '\0' character could occur in character buffer. Returns 0, if the value
parameter is not a string. It will extract the substring beetween the parameter is not a string. It will extract the substring between the
specified start position and the end position (or the end of the string, specified start position and the end position (or the end of the string,
whichever comes first). whichever comes first).
@@ -1452,6 +1452,57 @@ jerry_substring_to_char_buffer (const jerry_value_t value,
- [jerry_get_string_size](#jerry_get_string_size) - [jerry_get_string_size](#jerry_get_string_size)
- [jerry_get_string_length](#jerry_get_string_length) - [jerry_get_string_length](#jerry_get_string_length)
## jerry_substring_to_utf8_char_buffer
**Summary**
Copy the characters of an utf-8 encoded substring into a specified buffer.
The '\0' character could occur in character buffer. Returns 0, if the value
parameter is not a string. It will extract the substring between the specified
start position and the end position (or the end of the string, whichever
comes first).
**Prototype**
```c
jerry_size_t
jerry_substring_to_utf8_char_buffer (const jerry_value_t value,
jerry_length_t start_pos,
jerry_length_t end_pos,
jerry_char_t *buffer_p,
jerry_size_t buffer_size);
```
- `value` - input string value
- `start_pos` - position of the first character
- `end_pos` - position of the last character
- `buffer_p` - pointer to output buffer
- `buffer_size` - size of the buffer
- return value - number of bytes, actually copied to the buffer
**Example**
```c
{
jerry_value_t value;
... // create or acquire value
jerry_size_t req_sz = jerry_get_utf8_string_size (value);
jerry_char_t str_buf_p[req_sz];
jerry_length_t start_pos = 0;
jerry_length_t end_pos = jerry_get_utf8_string_length (value);
jerry_substring_to_utf8_char_buffer (value, start_pos, end_pos, str_buf_p, req_sz);
jerry_release_value (value);
}
```
**See also**
- [jerry_create_string_from_utf8](#jerry_create_string)
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
- [jerry_get_utf8_string_length](#jerry_get_utf8_string_length)
# Functions for array object values # Functions for array object values
## jerry_get_array_length ## jerry_get_array_length
+132 -1
View File
@@ -980,7 +980,7 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
} /* ecma_string_copy_to_utf8_buffer */ } /* ecma_string_copy_to_utf8_buffer */
/** /**
* Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string beetween the specified * Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string between the specified
* start position and the end position (or the end of the string, whichever comes first), and copy these characters * start position and the end position (or the end of the string, whichever comes first), and copy these characters
* into the buffer. * into the buffer.
* *
@@ -1059,6 +1059,137 @@ ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ec
return size; return size;
} /* ecma_substring_copy_to_cesu8_buffer */ } /* ecma_substring_copy_to_cesu8_buffer */
/**
* Convert ecma-string's contents to an utf-8 string, extract the parts of the converted string between the specified
* start position and the end position (or the end of the string, whichever comes first), and copy these characters
* into the buffer.
*
* @return number of bytes, actually copied to the buffer.
*/
lit_utf8_size_t
ecma_substring_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
ecma_length_t start_pos, /**< position of the first character */
ecma_length_t end_pos, /**< position of the last character */
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t buffer_size) /**< size of buffer */
{
JERRY_ASSERT (string_desc_p != NULL);
JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
lit_utf8_size_t size = 0;
ecma_length_t utf8_str_length = ecma_string_get_utf8_length (string_desc_p);
if (start_pos >= utf8_str_length || start_pos >= end_pos)
{
return 0;
}
if (end_pos > utf8_str_length)
{
end_pos = utf8_str_length;
}
ECMA_STRING_TO_UTF8_STRING (string_desc_p, cesu8_str_p, cesu8_str_size);
ecma_length_t cesu8_str_length = ecma_string_get_length (string_desc_p);
if (cesu8_str_length == cesu8_str_size)
{
cesu8_str_p += start_pos;
size = end_pos - start_pos;
if (size > buffer_size)
{
size = buffer_size;
}
memcpy (buffer_p, cesu8_str_p, size);
}
else
{
const lit_utf8_byte_t *cesu8_end_pos = cesu8_str_p + cesu8_str_size;
end_pos -= start_pos;
while (start_pos--)
{
ecma_char_t ch;
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &ch);
cesu8_str_p += code_unit_size;
if ((cesu8_str_p != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
{
ecma_char_t next_ch;
lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &next_ch);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
JERRY_ASSERT (code_unit_size == next_ch_size);
cesu8_str_p += code_unit_size;
}
}
}
const lit_utf8_byte_t *cesu8_pos = cesu8_str_p;
lit_utf8_byte_t *utf8_pos = buffer_p;
lit_utf8_byte_t *utf8_end_pos = buffer_p + buffer_size;
while (end_pos--)
{
ecma_char_t ch;
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
if ((size + code_unit_size) > buffer_size)
{
break;
}
if (((cesu8_pos + code_unit_size) != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
{
ecma_char_t next_ch;
lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_pos + code_unit_size, &next_ch);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
JERRY_ASSERT (code_unit_size == next_ch_size);
if ((size + code_unit_size + 1) > buffer_size)
{
break;
}
cesu8_pos += next_ch_size;
lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (ch, next_ch);
lit_code_point_to_utf8 (code_point, utf8_pos);
size += (code_unit_size + 1);
}
else
{
memcpy (utf8_pos, cesu8_pos, code_unit_size);
size += code_unit_size;
}
}
else
{
memcpy (utf8_pos, cesu8_pos, code_unit_size);
size += code_unit_size;
}
utf8_pos = buffer_p + size;
cesu8_pos += code_unit_size;
}
JERRY_ASSERT (utf8_pos <= utf8_end_pos);
}
ECMA_FINALIZE_UTF8_STRING (cesu8_str_p, cesu8_str_size);
JERRY_ASSERT (size <= buffer_size);
return size;
} /* ecma_substring_copy_to_utf8_buffer */
/** /**
* Convert ecma-string's contents to a cesu-8 string and put it to the buffer. * Convert ecma-string's contents to a cesu-8 string and put it to the buffer.
* It is the caller's responsibility to make sure that the string fits in the buffer. * It is the caller's responsibility to make sure that the string fits in the buffer.
+6
View File
@@ -191,6 +191,12 @@ ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p,
ecma_length_t end_pos, ecma_length_t end_pos,
lit_utf8_byte_t *buffer_p, lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size); lit_utf8_size_t buffer_size);
lit_utf8_size_t
ecma_substring_copy_to_utf8_buffer (const ecma_string_t *string_desc_p,
ecma_length_t start_pos,
ecma_length_t end_pos,
lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);
void ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p, void ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size); lit_utf8_size_t buffer_size);
const lit_utf8_byte_t *ecma_string_raw_chars (const ecma_string_t *string_p, lit_utf8_size_t *size_p, bool *is_ascii_p); const lit_utf8_byte_t *ecma_string_raw_chars (const ecma_string_t *string_p, lit_utf8_size_t *size_p, bool *is_ascii_p);
+5
View File
@@ -237,6 +237,11 @@ jerry_size_t jerry_substring_to_char_buffer (const jerry_value_t value,
jerry_length_t end_pos, jerry_length_t end_pos,
jerry_char_t *buffer_p, jerry_char_t *buffer_p,
jerry_size_t buffer_size); jerry_size_t buffer_size);
jerry_size_t jerry_substring_to_utf8_char_buffer (const jerry_value_t value,
jerry_length_t start_pos,
jerry_length_t end_pos,
jerry_char_t *buffer_p,
jerry_size_t buffer_size);
/** /**
* Functions for array object values * Functions for array object values
+34
View File
@@ -1233,6 +1233,40 @@ jerry_substring_to_char_buffer (const jerry_value_t value, /**< input string val
buffer_size); buffer_size);
} /* jerry_substring_to_char_buffer */ } /* jerry_substring_to_char_buffer */
/**
* Copy the characters of an utf-8 encoded substring into a specified buffer.
*
* Note:
* The '\0' character could occur anywhere in the returned string
* Returns 0, if the value parameter is not a string.
* It will extract the substring beetween the specified start position
* and the end position (or the end of the string, whichever comes first).
*
* @return number of bytes copied to the buffer.
*/
jerry_size_t
jerry_substring_to_utf8_char_buffer (const jerry_value_t value, /**< input string value */
jerry_length_t start_pos, /**< position of the first character */
jerry_length_t end_pos, /**< position of the last character */
jerry_char_t *buffer_p, /**< [out] output characters buffer */
jerry_size_t buffer_size) /**< size of output buffer */
{
jerry_assert_api_available ();
if (!ecma_is_value_string (value) || buffer_p == NULL)
{
return 0;
}
ecma_string_t *str_p = ecma_get_string_from_value (value);
return ecma_substring_copy_to_utf8_buffer (str_p,
start_pos,
end_pos,
(lit_utf8_byte_t *) buffer_p,
buffer_size);
} /* jerry_substring_to_utf8_char_buffer */
/** /**
* Checks whether the object or it's prototype objects have the given property. * Checks whether the object or it's prototype objects have the given property.
* *
+35
View File
@@ -398,6 +398,41 @@ main (void)
TEST_ASSERT (jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) test_string, utf8_sz) == 14); TEST_ASSERT (jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) test_string, utf8_sz) == 14);
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4", utf8_sz)); TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4", utf8_sz));
sz = jerry_substring_to_utf8_char_buffer (args[0], 0, utf8_length, (jerry_char_t *) test_string, utf8_sz);
TEST_ASSERT (sz == 14);
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4", sz));
sz = jerry_substring_to_utf8_char_buffer (args[0], 0, utf8_length + 1, (jerry_char_t *) test_string, utf8_sz);
TEST_ASSERT (sz == 14);
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4", sz));
sz = jerry_substring_to_utf8_char_buffer (args[0], utf8_length, 0, (jerry_char_t *) test_string, utf8_sz);
TEST_ASSERT (sz == 0);
sz = jerry_substring_to_utf8_char_buffer (args[0], 0, utf8_length, (jerry_char_t *) test_string, utf8_sz - 1);
TEST_ASSERT (sz == 10);
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 ", sz));
sz = jerry_substring_to_utf8_char_buffer (args[0], 0, utf8_length - 1, (jerry_char_t *) test_string, utf8_sz);
TEST_ASSERT (sz == 10);
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 ", sz));
sz = jerry_substring_to_utf8_char_buffer (args[0],
utf8_length - 2,
utf8_length - 1,
(jerry_char_t *) test_string,
utf8_sz);
TEST_ASSERT (sz == 1);
TEST_ASSERT (!strncmp (test_string, " ", sz));
sz = jerry_substring_to_utf8_char_buffer (args[0],
utf8_length - 3,
utf8_length - 2,
(jerry_char_t *) test_string,
utf8_sz);
TEST_ASSERT (sz == 4);
TEST_ASSERT (!strncmp (test_string, "\xf0\x9d\x94\xa3", sz));
jerry_release_value (args[0]); jerry_release_value (args[0]);
/* Test string: 'str: {DESERET CAPITAL LETTER LONG I}' */ /* Test string: 'str: {DESERET CAPITAL LETTER LONG I}' */