From 7139f0172afbada6fb6bbbcfcd6c1bdd59b13209 Mon Sep 17 00:00:00 2001 From: Robert Sipka Date: Fri, 13 Jan 2017 13:27:07 +0100 Subject: [PATCH] Copy the characters of a cesu-8 encoded substring into a specified buffer (#1516) JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com --- docs/02.API-REFERENCE.md | 52 +++++++++++++ jerry-core/ecma/base/ecma-helpers-string.c | 80 +++++++++++++++++++ jerry-core/ecma/base/ecma-helpers.h | 6 ++ jerry-core/jerry-api.h | 5 ++ jerry-core/jerry.c | 34 ++++++++ tests/unit/test-api.c | 91 +++++++++++++++++++++- 6 files changed, 267 insertions(+), 1 deletion(-) diff --git a/docs/02.API-REFERENCE.md b/docs/02.API-REFERENCE.md index e1d506b8b..2cf45077c 100644 --- a/docs/02.API-REFERENCE.md +++ b/docs/02.API-REFERENCE.md @@ -1400,6 +1400,58 @@ jerry_string_to_utf8_char_buffer (const jerry_value_t value, - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) - [jerry_get_utf8_string_size](#jerry_get_utf8_string_size) +## jerry_substring_to_char_buffer + +**Summary** + +Copy the characters of a cesu-8 encoded substring into a specified buffer. +The '\0' character could occur in character buffer. Returns 0, if the value +parameter is not a string. It will extract the substring beetween the +specified start position and the end position (or the end of the string, +whichever comes first). + +**Prototype** + +```c +jerry_size_t +jerry_substring_to_char_buffer (const jerry_value_t value, + jerry_length_t start_pos, + jerry_length_t end_pos, + jerry_char_t *buffer_p, + jerry_size_t buffer_size); +``` + +- `value` - input string value +- `start_pos` - position of the first character +- `end_pos` - position of the last character +- `buffer_p` - pointer to output buffer +- `buffer_size` - size of the buffer +- return value - number of bytes, actually copied to the buffer + +**Example** + +```c +{ + jerry_value_t value; + ... // create or acquire value + + jerry_size_t req_sz = jerry_get_string_size (value); + jerry_char_t str_buf_p[req_sz]; + jerry_length_t start_pos = 0; + jerry_length_t end_pos = jerry_get_string_length (value); + + jerry_substring_to_char_buffer (value, start_pos, end_pos, str_buf_p, req_sz); + + jerry_release_value (value); +} +``` + +**See also** + +- [jerry_create_string](#jerry_create_string) +- [jerry_get_string_size](#jerry_get_string_size) +- [jerry_get_string_length](#jerry_get_string_length) + # Functions for array object values ## jerry_get_array_length diff --git a/jerry-core/ecma/base/ecma-helpers-string.c b/jerry-core/ecma/base/ecma-helpers-string.c index e7c1881b2..6bae6d6b7 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.c +++ b/jerry-core/ecma/base/ecma-helpers-string.c @@ -979,6 +979,86 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s return size; } /* ecma_string_copy_to_utf8_buffer */ +/** + * Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string beetween the specified + * start position and the end position (or the end of the string, whichever comes first), and copy these characters + * into the buffer. + * + * @return number of bytes, actually copied to the buffer. + */ +lit_utf8_size_t +ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */ + ecma_length_t start_pos, /**< position of the first character */ + ecma_length_t end_pos, /**< position of the last character */ + lit_utf8_byte_t *buffer_p, /**< destination buffer pointer + * (can be NULL if buffer_size == 0) */ + lit_utf8_size_t buffer_size) /**< size of buffer */ +{ + JERRY_ASSERT (string_desc_p != NULL); + JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE); + JERRY_ASSERT (buffer_p != NULL || buffer_size == 0); + + ecma_length_t string_length = ecma_string_get_length (string_desc_p); + lit_utf8_size_t size = 0; + + if (start_pos >= string_length || start_pos >= end_pos) + { + return 0; + } + + if (end_pos > string_length) + { + end_pos = string_length; + } + + ECMA_STRING_TO_UTF8_STRING (string_desc_p, utf8_str_p, utf8_str_size); + + const lit_utf8_byte_t *start_p = utf8_str_p; + + if (string_length == utf8_str_size) + { + start_p += start_pos; + size = end_pos - start_pos; + + if (size > buffer_size) + { + size = buffer_size; + } + + memcpy (buffer_p, start_p, size); + } + else + { + end_pos -= start_pos; + while (start_pos--) + { + start_p += lit_get_unicode_char_size_by_utf8_first_byte (*start_p); + } + + const lit_utf8_byte_t *end_p = start_p; + + while (end_pos--) + { + lit_utf8_size_t code_unit_size = lit_get_unicode_char_size_by_utf8_first_byte (*end_p); + + if ((size + code_unit_size) > buffer_size) + { + break; + } + + end_p += code_unit_size; + size += code_unit_size; + } + + memcpy (buffer_p, start_p, size); + } + + ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size); + + JERRY_ASSERT (size <= buffer_size); + return size; +} /* ecma_substring_copy_to_cesu8_buffer */ + /** * Convert ecma-string's contents to a cesu-8 string and put it to the buffer. * It is the caller's responsibility to make sure that the string fits in the buffer. diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index 1d70e1c21..dc134dc03 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -185,6 +185,12 @@ lit_utf8_size_t __attr_return_value_should_be_checked___ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p, lit_utf8_size_t buffer_size); +lit_utf8_size_t +ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, + ecma_length_t start_pos, + ecma_length_t end_pos, + lit_utf8_byte_t *buffer_p, + lit_utf8_size_t buffer_size); void ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p, lit_utf8_size_t buffer_size); const lit_utf8_byte_t *ecma_string_raw_chars (const ecma_string_t *string_p, lit_utf8_size_t *size_p, bool *is_ascii_p); diff --git a/jerry-core/jerry-api.h b/jerry-core/jerry-api.h index dcdceea44..aa3b274f8 100644 --- a/jerry-core/jerry-api.h +++ b/jerry-core/jerry-api.h @@ -233,6 +233,11 @@ jerry_size_t jerry_string_to_char_buffer (const jerry_value_t value, jerry_char_ jerry_size_t jerry_string_to_utf8_char_buffer (const jerry_value_t value, jerry_char_t *buffer_p, jerry_size_t buffer_size); +jerry_size_t jerry_substring_to_char_buffer (const jerry_value_t value, + jerry_length_t start_pos, + jerry_length_t end_pos, + jerry_char_t *buffer_p, + jerry_size_t buffer_size); /** * Functions for array object values diff --git a/jerry-core/jerry.c b/jerry-core/jerry.c index 078435e57..d2e14693b 100644 --- a/jerry-core/jerry.c +++ b/jerry-core/jerry.c @@ -1199,6 +1199,40 @@ jerry_string_to_utf8_char_buffer (const jerry_value_t value, /**< input string v buffer_size); } /* jerry_string_to_utf8_char_buffer */ +/** + * Copy the characters of an cesu-8 encoded substring into a specified buffer. + * + * Note: + * The '\0' character could occur anywhere in the returned string + * Returns 0, if the value parameter is not a string. + * It will extract the substring beetween the specified start position + * and the end position (or the end of the string, whichever comes first). + * + * @return number of bytes copied to the buffer. + */ +jerry_size_t +jerry_substring_to_char_buffer (const jerry_value_t value, /**< input string value */ + jerry_length_t start_pos, /**< position of the first character */ + jerry_length_t end_pos, /**< position of the last character */ + jerry_char_t *buffer_p, /**< [out] output characters buffer */ + jerry_size_t buffer_size) /**< size of output buffer */ +{ + jerry_assert_api_available (); + + if (!ecma_is_value_string (value) || buffer_p == NULL) + { + return 0; + } + + ecma_string_t *str_p = ecma_get_string_from_value (value); + + return ecma_substring_copy_to_cesu8_buffer (str_p, + start_pos, + end_pos, + (lit_utf8_byte_t *) buffer_p, + buffer_size); +} /* jerry_substring_to_char_buffer */ + /** * Checks whether the object or it's prototype objects have the given property. * diff --git a/tests/unit/test-api.c b/tests/unit/test-api.c index e2600078f..4192b2771 100644 --- a/tests/unit/test-api.c +++ b/tests/unit/test-api.c @@ -376,7 +376,7 @@ main (void) jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) string_from_utf8_string, utf8_sz); jerry_string_to_utf8_char_buffer (args[1], (jerry_char_t *) string_from_cesu8_string, cesu8_sz); - TEST_ASSERT (!strncmp (string_from_utf8, string_from_cesu8, utf8_sz)); + TEST_ASSERT (!strncmp (string_from_utf8_string, string_from_cesu8_string, utf8_sz)); jerry_release_value (args[0]); jerry_release_value (args[1]); @@ -430,6 +430,95 @@ main (void) TEST_ASSERT (utf8_sz == 12); jerry_release_value (args[0]); + /* Test jerry_substring_to_char_buffer */ + args[0] = jerry_create_string ((jerry_char_t *) "an ascii string"); + + /* Buffer size */ + cesu8_sz = 5; + + char substring[cesu8_sz]; + sz = jerry_substring_to_char_buffer (args[0], 3, 8, (jerry_char_t *) substring, cesu8_sz); + TEST_ASSERT (sz == 5); + TEST_ASSERT (!strncmp (substring, "ascii", sz)); + + /* Buffer size is 5, substring length is 11 => copied only the first 5 char */ + sz = jerry_substring_to_char_buffer (args[0], 0, 11, (jerry_char_t *) substring, cesu8_sz); + + TEST_ASSERT (sz == 5); + TEST_ASSERT (!strncmp (substring, "an as", sz)); + + /* Position of the first character is greater than the string length */ + sz = jerry_substring_to_char_buffer (args[0], 16, 21, (jerry_char_t *) substring, cesu8_sz); + TEST_ASSERT (sz == 0); + + sz = jerry_substring_to_char_buffer (args[0], 14, 15, (jerry_char_t *) substring, cesu8_sz); + TEST_ASSERT (sz == 1); + TEST_ASSERT (!strncmp (substring, "g", sz)); + + sz = jerry_substring_to_char_buffer (args[0], 0, 1, (jerry_char_t *) substring, cesu8_sz); + TEST_ASSERT (sz == 1); + TEST_ASSERT (!strncmp (substring, "a", sz)); + + cesu8_length = jerry_get_string_length (args[0]); + cesu8_sz = jerry_get_string_size (args[0]); + TEST_ASSERT (cesu8_length == 15); + TEST_ASSERT (cesu8_length == cesu8_sz); + + sz = jerry_substring_to_char_buffer (args[0], 0, cesu8_length, (jerry_char_t *) substring, cesu8_sz); + TEST_ASSERT (sz = 15); + TEST_ASSERT (!strncmp (substring, "an ascii string", sz)); + + jerry_release_value (args[0]); + + /* Test jerry_substring_to_char_buffer: '0101' */ + args[0] = jerry_create_string ((jerry_char_t *) "0101"); + cesu8_sz = jerry_get_string_size (args[0]); + + char number_substring[cesu8_sz]; + + sz = jerry_substring_to_char_buffer (args[0], 1, 3, (jerry_char_t *) number_substring, cesu8_sz); + TEST_ASSERT (sz == 2); + TEST_ASSERT (!strncmp (number_substring, "10", sz)); + + jerry_release_value (args[0]); + + /* Test jerry_substring_to_char_buffer: 'str: {greek zero sign}' */ + args[0] = jerry_create_string ((jerry_char_t *) "\x73\x74\x72\x3a \xed\xa0\x80\xed\xb6\x8a"); + cesu8_sz = jerry_get_string_size (args[0]); + cesu8_length = jerry_get_string_length (args[0]); + TEST_ASSERT (cesu8_sz == 11); + TEST_ASSERT (cesu8_length = 7); + + char supl_substring[cesu8_sz]; + + sz = jerry_substring_to_char_buffer (args[0], 0, cesu8_length, (jerry_char_t *) supl_substring, cesu8_sz); + TEST_ASSERT (sz == 11); + TEST_ASSERT (!strncmp (supl_substring, "\x73\x74\x72\x3a \xed\xa0\x80\xed\xb6\x8a", sz)); + + /* Decrease the buffer size => the low surrogate char will not fit into the buffer */ + cesu8_sz -= 1; + sz = jerry_substring_to_char_buffer (args[0], 0, cesu8_length, (jerry_char_t *) supl_substring, cesu8_sz); + TEST_ASSERT (sz == 8); + TEST_ASSERT (!strncmp (supl_substring, "\x73\x74\x72\x3a \xed\xa0\x80", sz)); + + sz = jerry_substring_to_char_buffer (args[0], + cesu8_length - 1, + cesu8_length, + (jerry_char_t *) supl_substring, + cesu8_sz); + TEST_ASSERT (sz == 3); + TEST_ASSERT (!strncmp (supl_substring, "\xed\xb6\x8a", sz)); + + sz = jerry_substring_to_char_buffer (args[0], + cesu8_length - 2, + cesu8_length - 1, + (jerry_char_t *) supl_substring, + cesu8_sz); + TEST_ASSERT (sz == 3); + TEST_ASSERT (!strncmp (supl_substring, "\xed\xa0\x80", sz)); + + jerry_release_value (args[0]); + /* Get global.boo (non-existing field) */ val_t = get_property (global_obj_val, "boo"); TEST_ASSERT (!jerry_value_has_error_flag (val_t));