Add core unicode functionality.

Add utf-8 processing routines.
Change ecma_char_t from char/uint16_t to uint16_t.
Apply all utf-8 processing routines.
Change char to jerry_api_char in API functions' declarations.

JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
This commit is contained in:
Andrey Shitov
2015-06-29 19:17:17 +03:00
parent c4b0cd2196
commit fd9ff8e3bd
56 changed files with 2468 additions and 1480 deletions
@@ -24,6 +24,7 @@
#include "ecma-string-object.h"
#include "ecma-try-catch-macro.h"
#include "jrt.h"
#include "lit-magic-strings.h"
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS
@@ -131,48 +132,43 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
}
else
{
const ecma_char_t *colon_zt_magic_string_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_COLON_CHAR);
const ecma_char_t *space_zt_magic_string_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_SPACE_CHAR);
const lit_utf8_size_t size = (ecma_string_get_size (name_string_p) +
ecma_string_get_size (msg_string_p) +
lit_get_magic_string_size (LIT_MAGIC_STRING_COLON_CHAR) +
lit_get_magic_string_size (LIT_MAGIC_STRING_SPACE_CHAR));
const ecma_length_t len = (ecma_string_get_length (name_string_p) +
ecma_string_get_length (msg_string_p) +
ecma_zt_string_length (colon_zt_magic_string_p) +
ecma_zt_string_length (space_zt_magic_string_p));
const ssize_t buffer_size = (ssize_t) ((len + 1) * sizeof (ecma_char_t));
const ssize_t buffer_size = (ssize_t) size;
ssize_t buffer_size_left = buffer_size;
MEM_DEFINE_LOCAL_ARRAY (ret_str_buffer, buffer_size, ecma_char_t);
ecma_char_t *ret_str_buffer_p = ret_str_buffer;
MEM_DEFINE_LOCAL_ARRAY (ret_str_buffer, buffer_size, lit_utf8_byte_t);
lit_utf8_byte_t *ret_str_buffer_p = ret_str_buffer;
ssize_t bytes = ecma_string_to_zt_string (name_string_p, ret_str_buffer_p, buffer_size_left);
JERRY_ASSERT (bytes >= 1 && buffer_size_left - bytes >= 0);
ssize_t bytes = ecma_string_to_utf8_string (name_string_p, ret_str_buffer_p, buffer_size_left);
JERRY_ASSERT (bytes >= 0 && buffer_size_left - bytes >= 0);
buffer_size_left -= bytes - 1 /* null character */;
ret_str_buffer_p = (ecma_char_t*) ((uint8_t*) ret_str_buffer + (buffer_size - buffer_size_left));
buffer_size_left -= bytes;
ret_str_buffer_p = ret_str_buffer + buffer_size - buffer_size_left;
ret_str_buffer_p = ecma_copy_zt_string_to_buffer (colon_zt_magic_string_p,
ret_str_buffer_p,
buffer_size_left);
buffer_size_left = buffer_size - (ret_str_buffer_p - ret_str_buffer) * (ssize_t) sizeof (ecma_char_t);
ret_str_buffer_p = lit_copy_magic_string_to_buffer (LIT_MAGIC_STRING_COLON_CHAR,
ret_str_buffer_p,
buffer_size_left);
buffer_size_left = buffer_size - (ret_str_buffer_p - ret_str_buffer);
JERRY_ASSERT (buffer_size_left >= 0);
ret_str_buffer_p = ecma_copy_zt_string_to_buffer (space_zt_magic_string_p,
ret_str_buffer_p,
buffer_size_left);
buffer_size_left = buffer_size - (ret_str_buffer_p - ret_str_buffer) * (ssize_t) sizeof (ecma_char_t);
ret_str_buffer_p = lit_copy_magic_string_to_buffer (LIT_MAGIC_STRING_SPACE_CHAR,
ret_str_buffer_p,
buffer_size_left);
buffer_size_left = buffer_size - (ret_str_buffer_p - ret_str_buffer);
JERRY_ASSERT (buffer_size_left >= 0);
bytes = ecma_string_to_zt_string (msg_string_p, ret_str_buffer_p, buffer_size_left);
JERRY_ASSERT (bytes >= 1 && buffer_size_left - bytes >= 0);
bytes = ecma_string_to_utf8_string (msg_string_p, ret_str_buffer_p, buffer_size_left);
JERRY_ASSERT (bytes >= 0 && buffer_size_left - bytes >= 0);
buffer_size_left -= bytes - 1 /* null character */;
ret_str_buffer_p = (ecma_char_t*) ((uint8_t*) ret_str_buffer + (buffer_size - buffer_size_left));
buffer_size_left -= bytes;
JERRY_ASSERT (buffer_size_left >= 0);
JERRY_ASSERT (buffer_size_left >= (ssize_t) sizeof (ecma_char_t));
*ret_str_buffer_p = ECMA_CHAR_NULL;
ret_str_p = ecma_new_ecma_string (ret_str_buffer);
ret_str_p = ecma_new_ecma_string_from_utf8 (ret_str_buffer,
(jerry_api_size_t) (buffer_size - buffer_size_left));
MEM_FINALIZE_LOCAL_ARRAY (ret_str_buffer);
}
@@ -21,6 +21,7 @@
#include "ecma-lex-env.h"
#include "ecma-try-catch-macro.h"
#include "serializer.h"
#include "lit-magic-strings.h"
#include "parser.h"
#define ECMA_BUILTINS_INTERNAL
@@ -73,22 +74,22 @@ ecma_builtin_function_dispatch_construct (const ecma_value_t *arguments_list_p,
/* Last string, if any, is the function's body, and the rest, if any - are the function's parameter names */
MEM_DEFINE_LOCAL_ARRAY (string_params_p,
arguments_list_len == 0 ? 1 : arguments_list_len,
ecma_string_t*);
ecma_string_t *);
uint32_t params_count;
size_t zt_strings_buffer_size;
size_t strings_buffer_size;
if (arguments_list_len == 0)
{
/* 3. */
string_params_p[0] = ecma_new_ecma_string_from_magic_string_id (LIT_MAGIC_STRING__EMPTY);
zt_strings_buffer_size = sizeof (ecma_char_t);
strings_buffer_size = lit_get_magic_string_size (LIT_MAGIC_STRING__EMPTY);
params_count = 1;
}
else
{
/* 4., 5., 6. */
zt_strings_buffer_size = 0;
strings_buffer_size = 0;
params_count = 0;
while (params_count < arguments_list_len
@@ -99,8 +100,7 @@ ecma_builtin_function_dispatch_construct (const ecma_value_t *arguments_list_p,
ret_value);
string_params_p[params_count] = ecma_copy_or_ref_ecma_string (ecma_get_string_from_value (str_arg_value));
zt_strings_buffer_size += ((size_t) ecma_string_get_length (string_params_p[params_count]) +
sizeof (ecma_char_t));
strings_buffer_size += ecma_string_get_size (string_params_p[params_count]);
params_count++;
ECMA_FINALIZE (str_arg_value);
@@ -111,30 +111,35 @@ ecma_builtin_function_dispatch_construct (const ecma_value_t *arguments_list_p,
{
JERRY_ASSERT (params_count >= 1);
MEM_DEFINE_LOCAL_ARRAY (zt_string_params_p,
MEM_DEFINE_LOCAL_ARRAY (utf8_string_params_p,
params_count,
ecma_char_t*);
MEM_DEFINE_LOCAL_ARRAY (zt_string_buffer_p,
zt_strings_buffer_size,
ecma_char_t);
lit_utf8_byte_t *);
MEM_DEFINE_LOCAL_ARRAY (utf8_string_params_size,
params_count,
size_t);
MEM_DEFINE_LOCAL_ARRAY (utf8_string_buffer_p,
strings_buffer_size,
lit_utf8_byte_t);
ssize_t zt_string_buffer_pos = 0;
ssize_t utf8_string_buffer_pos = 0;
for (uint32_t i = 0; i < params_count; i++)
{
ssize_t sz = ecma_string_to_zt_string (string_params_p[i],
&zt_string_buffer_p[zt_string_buffer_pos],
(ssize_t) zt_strings_buffer_size - zt_string_buffer_pos);
JERRY_ASSERT (sz > 0);
ssize_t sz = ecma_string_to_utf8_string (string_params_p[i],
&utf8_string_buffer_p[utf8_string_buffer_pos],
(ssize_t) strings_buffer_size - utf8_string_buffer_pos);
JERRY_ASSERT (sz >= 0);
zt_string_params_p[i] = zt_string_buffer_p + zt_string_buffer_pos;
utf8_string_params_p[i] = utf8_string_buffer_p + utf8_string_buffer_pos;
utf8_string_params_size[i] = (size_t) sz;
zt_string_buffer_pos += sz;
utf8_string_buffer_pos += sz;
}
const opcode_t* opcodes_p;
bool is_syntax_correct;
is_syntax_correct = parser_parse_new_function ((const char **) zt_string_params_p,
is_syntax_correct = parser_parse_new_function ((const jerry_api_char_t **) utf8_string_params_p,
utf8_string_params_size,
params_count,
&opcodes_p);
@@ -180,8 +185,9 @@ ecma_builtin_function_dispatch_construct (const ecma_value_t *arguments_list_p,
ret_value = ecma_make_normal_completion_value (ecma_make_object_value (func_obj_p));
}
MEM_FINALIZE_LOCAL_ARRAY (zt_string_buffer_p);
MEM_FINALIZE_LOCAL_ARRAY (zt_string_params_p);
MEM_FINALIZE_LOCAL_ARRAY (utf8_string_buffer_p);
MEM_FINALIZE_LOCAL_ARRAY (utf8_string_params_size);
MEM_FINALIZE_LOCAL_ARRAY (utf8_string_params_p);
}
for (uint32_t i = 0; i < params_count; i++)
@@ -24,6 +24,7 @@
#include "ecma-helpers.h"
#include "ecma-try-catch-macro.h"
#include "jrt.h"
#include "lit-magic-strings.h"
#include "vm.h"
#include "jrt-libc-includes.h"
@@ -108,22 +109,22 @@ ecma_builtin_global_object_parse_int (ecma_value_t this_arg __attr_unused___, /*
ECMA_TRY_CATCH (string_var, ecma_op_to_string (string), ret_value);
ecma_string_t *number_str_p = ecma_get_string_from_value (string_var);
ecma_length_t string_len = ecma_string_get_length (number_str_p);
ecma_length_t str_size = ecma_string_get_length (number_str_p);
MEM_DEFINE_LOCAL_ARRAY (zt_string_buff, string_len + 1, ecma_char_t);
MEM_DEFINE_LOCAL_ARRAY (utf8_string_buff, str_size + 1, lit_utf8_byte_t);
size_t string_buf_size = (size_t) (string_len + 1) * sizeof (ecma_char_t);
ssize_t bytes_copied = ecma_string_to_zt_string (number_str_p,
zt_string_buff,
(ssize_t) string_buf_size);
JERRY_ASSERT (bytes_copied > 0);
ssize_t bytes_copied = ecma_string_to_utf8_string (number_str_p,
utf8_string_buff,
(ssize_t) str_size);
JERRY_ASSERT (bytes_copied >= 0);
utf8_string_buff[str_size] = LIT_BYTE_NULL;
/* 2. Remove leading whitespace. */
ecma_length_t start = string_len;
ecma_length_t end = string_len;
ecma_length_t start = str_size;
ecma_length_t end = str_size;
for (ecma_length_t i = 0; i < end; i++)
{
if (!(isspace (zt_string_buff[i])))
if (!(isspace (utf8_string_buff[i])))
{
start = i;
break;
@@ -134,13 +135,13 @@ ecma_builtin_global_object_parse_int (ecma_value_t this_arg __attr_unused___, /*
int sign = 1;
/* 4. */
if (zt_string_buff[start] == '-')
if (utf8_string_buff[start] == '-')
{
sign = -1;
}
/* 5. */
if (zt_string_buff[start] == '-' || zt_string_buff[start] == '+')
if (utf8_string_buff[start] == '-' || utf8_string_buff[start] == '+')
{
start++;
}
@@ -180,8 +181,8 @@ ecma_builtin_global_object_parse_int (ecma_value_t this_arg __attr_unused___, /*
if (strip_prefix)
{
if (end - start >= 2
&& zt_string_buff[start] == '0'
&& (zt_string_buff[start + 1] == 'x' || zt_string_buff[start + 1] == 'X'))
&& utf8_string_buff[start] == '0'
&& (utf8_string_buff[start + 1] == 'x' || utf8_string_buff[start + 1] == 'X'))
{
start += 2;
@@ -190,27 +191,27 @@ ecma_builtin_global_object_parse_int (ecma_value_t this_arg __attr_unused___, /*
}
/* 11. Check if characters are in [0, Radix - 1]. We also convert them to number values in the process. */
for (ecma_length_t i = start; i < end; i++)
for (lit_utf8_size_t i = start; i < end; i++)
{
if ((zt_string_buff[i]) >= 'a' && zt_string_buff[i] <= 'z')
if ((utf8_string_buff[i]) >= 'a' && utf8_string_buff[i] <= 'z')
{
zt_string_buff[i] = (ecma_char_t) (zt_string_buff[i] - 'a' + 10);
utf8_string_buff[i] = (lit_utf8_byte_t) (utf8_string_buff[i] - 'a' + 10);
}
else if (zt_string_buff[i] >= 'A' && zt_string_buff[i] <= 'Z')
else if (utf8_string_buff[i] >= 'A' && utf8_string_buff[i] <= 'Z')
{
zt_string_buff[i] = (ecma_char_t) (zt_string_buff[i] - 'A' + 10);
utf8_string_buff[i] = (lit_utf8_byte_t) (utf8_string_buff[i] - 'A' + 10);
}
else if (isdigit (zt_string_buff[i]))
else if (isdigit (utf8_string_buff[i]))
{
zt_string_buff[i] = (ecma_char_t) (zt_string_buff[i] - '0');
utf8_string_buff[i] = (lit_utf8_byte_t) (utf8_string_buff[i] - '0');
}
else
{
/* Not a valid number char, set value to radix so it fails to pass as a valid character. */
zt_string_buff[i] = (ecma_char_t) rad;
utf8_string_buff[i] = (lit_utf8_byte_t) rad;
}
if (!(zt_string_buff[i] < rad))
if (!(utf8_string_buff[i] < rad))
{
end = i;
break;
@@ -235,7 +236,7 @@ ecma_builtin_global_object_parse_int (ecma_value_t this_arg __attr_unused___, /*
/* 13. and 14. */
for (int32_t i = (int32_t) end - 1; i >= (int32_t) start; i--)
{
*value_p += (ecma_number_t) zt_string_buff[i] * multiplier;
*value_p += (ecma_number_t) utf8_string_buff[i] * multiplier;
multiplier *= (ecma_number_t) rad;
}
@@ -249,7 +250,7 @@ ecma_builtin_global_object_parse_int (ecma_value_t this_arg __attr_unused___, /*
}
ECMA_OP_TO_NUMBER_FINALIZE (radix_num);
MEM_FINALIZE_LOCAL_ARRAY (zt_string_buff);
MEM_FINALIZE_LOCAL_ARRAY (utf8_string_buff);
ECMA_FINALIZE (string_var);
return ret_value;
} /* ecma_builtin_global_object_parse_int */
@@ -273,21 +274,21 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
ECMA_TRY_CATCH (string_var, ecma_op_to_string (string), ret_value);
ecma_string_t *number_str_p = ecma_get_string_from_value (string_var);
ecma_length_t string_len = ecma_string_get_length (number_str_p);
lit_utf8_size_t str_size = ecma_string_get_size (number_str_p);
MEM_DEFINE_LOCAL_ARRAY (zt_string_buff, string_len + 1, ecma_char_t);
MEM_DEFINE_LOCAL_ARRAY (utf8_string_buff, str_size + 1, lit_utf8_byte_t);
size_t string_buf_size = (size_t) (string_len + 1) * sizeof (ecma_char_t);
ssize_t bytes_copied = ecma_string_to_zt_string (number_str_p,
zt_string_buff,
(ssize_t) string_buf_size);
JERRY_ASSERT (bytes_copied > 0);
ssize_t bytes_copied = ecma_string_to_utf8_string (number_str_p,
utf8_string_buff,
(ssize_t) str_size);
JERRY_ASSERT (bytes_copied >= 0);
utf8_string_buff[str_size] = LIT_BYTE_NULL;
/* 2. Find first non whitespace char. */
ecma_length_t start = 0;
for (ecma_length_t i = 0; i < string_len; i++)
lit_utf8_size_t start = 0;
for (lit_utf8_size_t i = 0; i < str_size; i++)
{
if (!isspace (zt_string_buff[i]))
if (!isspace (utf8_string_buff[i]))
{
start = i;
break;
@@ -297,12 +298,12 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
bool sign = false;
/* Check if sign is present. */
if (zt_string_buff[start] == '-')
if (utf8_string_buff[start] == '-')
{
sign = true;
start++;
}
else if (zt_string_buff[start] == '+')
else if (utf8_string_buff[start] == '+')
{
start++;
}
@@ -310,11 +311,11 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
ecma_number_t *ret_num_p = ecma_alloc_number ();
/* Check if string is equal to "Infinity". */
const ecma_char_t *infinity_zt_str_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_INFINITY_UL);
const lit_utf8_byte_t *infinity_utf8_str_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING_INFINITY_UL);
for (ecma_length_t i = 0; infinity_zt_str_p[i] == zt_string_buff[start + i]; i++)
for (lit_utf8_size_t i = 0; infinity_utf8_str_p[i] == utf8_string_buff[start + i]; i++)
{
if (infinity_zt_str_p[i + 1] == 0)
if (infinity_utf8_str_p[i + 1] == 0)
{
*ret_num_p = ecma_number_make_infinity (sign);
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
@@ -324,19 +325,19 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
if (ecma_is_completion_value_empty (ret_value))
{
ecma_length_t current = start;
ecma_length_t end = string_len;
lit_utf8_size_t current = start;
lit_utf8_size_t end = str_size;
bool has_whole_part = false;
bool has_fraction_part = false;
if (isdigit (zt_string_buff[current]))
if (isdigit (utf8_string_buff[current]))
{
has_whole_part = true;
/* Check digits of whole part. */
for (ecma_length_t i = current; i < string_len; i++, current++)
for (lit_utf8_size_t i = current; i < str_size; i++, current++)
{
if (!isdigit (zt_string_buff[current]))
if (!isdigit (utf8_string_buff[current]))
{
break;
}
@@ -346,18 +347,18 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
end = current;
/* Check decimal point. */
if (zt_string_buff[current] == '.')
if (utf8_string_buff[current] == '.')
{
current++;
if (isdigit (zt_string_buff[current]))
if (isdigit (utf8_string_buff[current]))
{
has_fraction_part = true;
/* Check digits of fractional part. */
for (ecma_length_t i = current; i < string_len; i++, current++)
for (lit_utf8_size_t i = current; i < str_size; i++, current++)
{
if (!isdigit (zt_string_buff[current]))
if (!isdigit (utf8_string_buff[current]))
{
break;
}
@@ -368,24 +369,24 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
}
/* Check exponent. */
if ((zt_string_buff[current] == 'e' || zt_string_buff[current] == 'E')
if ((utf8_string_buff[current] == 'e' || utf8_string_buff[current] == 'E')
&& (has_whole_part || has_fraction_part))
{
current++;
/* Check sign of exponent. */
if (zt_string_buff[current] == '-' || zt_string_buff[current] == '+')
if (utf8_string_buff[current] == '-' || utf8_string_buff[current] == '+')
{
current++;
}
if (isdigit (zt_string_buff[current]))
if (isdigit (utf8_string_buff[current]))
{
/* Check digits of exponent part. */
for (ecma_length_t i = current; i < string_len; i++, current++)
for (lit_utf8_size_t i = current; i < str_size; i++, current++)
{
if (!isdigit (zt_string_buff[current]))
if (!isdigit (utf8_string_buff[current]))
{
break;
}
@@ -402,14 +403,8 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
}
else
{
if (end < string_len)
{
/* 4. End of valid number, terminate the string. */
zt_string_buff[end] = '\0';
}
/* 5. */
*ret_num_p = ecma_zt_string_to_number (zt_string_buff + start);
*ret_num_p = ecma_utf8_string_to_number (utf8_string_buff + start, end - start);
if (sign)
{
@@ -420,8 +415,9 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
}
}
MEM_FINALIZE_LOCAL_ARRAY (zt_string_buff);
MEM_FINALIZE_LOCAL_ARRAY (utf8_string_buff);
ECMA_FINALIZE (string_var);
return ret_value;
} /* ecma_builtin_global_object_parse_float */
@@ -524,7 +520,7 @@ static uint8_t unescaped_uri_component_set[16] =
* It returns with ECMA_BUILTIN_HEX_TO_BYTE_ERROR if a parse error is occured.
*/
static uint32_t
ecma_builtin_global_object_hex_to_byte (ecma_char_t *source_p) /**< source string */
ecma_builtin_global_object_hex_to_byte (lit_utf8_byte_t *source_p) /**< source string */
{
uint32_t decoded_byte = 0;
@@ -536,7 +532,7 @@ ecma_builtin_global_object_hex_to_byte (ecma_char_t *source_p) /**< source strin
return ECMA_BUILTIN_HEX_TO_BYTE_ERROR;
}
for (int i = 0; i < 2; i++)
for (lit_utf8_size_t i = 0; i < 2; i++)
{
source_p++;
decoded_byte <<= 4;
@@ -581,19 +577,19 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
JERRY_ASSERT (ecma_is_value_string (string));
ecma_string_t *input_string_p = ecma_get_string_from_value (string);
uint32_t input_length = (uint32_t) ecma_string_get_length (input_string_p);
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
MEM_DEFINE_LOCAL_ARRAY (input_start_p,
input_length + 1,
ecma_char_t);
input_size,
lit_utf8_byte_t);
ecma_string_to_zt_string (input_string_p,
input_start_p,
(ssize_t) (input_length + 1) * (ssize_t) sizeof (ecma_char_t));
ecma_string_to_utf8_string (input_string_p,
input_start_p,
(ssize_t) (input_size));
ecma_char_t *input_char_p = input_start_p;
ecma_char_t *input_end_p = input_start_p + input_length;
uint32_t output_length = 1;
lit_utf8_byte_t *input_char_p = input_start_p;
lit_utf8_byte_t *input_end_p = input_start_p + input_size;
lit_utf8_size_t output_size = 0;
/*
* The URI decoding has two major phases: first we validate the input,
@@ -605,7 +601,7 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
/* Input validation. */
if (*input_char_p != '%')
{
output_length++;
output_size++;
input_char_p++;
continue;
}
@@ -628,11 +624,11 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
if (ecma_builtin_global_object_character_is_in (decoded_byte, reserved_uri_bitset)
&& !ecma_builtin_global_object_character_is_in (decoded_byte, unescaped_uri_component_set))
{
output_length += 3;
output_size += 3;
}
else
{
output_length++;
output_size++;
}
}
else if (decoded_byte < 0xc0 || decoded_byte >= 0xf8)
@@ -670,6 +666,8 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
character = decoded_byte & 0x07;
}
output_size += (count + 1);
do
{
decoded_byte = ecma_builtin_global_object_hex_to_byte (input_char_p);
@@ -701,19 +699,17 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
break;
}
output_length += (character <= 0xffff) ? 1 : 2;
}
}
if (ecma_is_completion_value_empty (ret_value))
{
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
output_length,
ecma_char_t);
output_size,
lit_utf8_byte_t);
input_char_p = input_start_p;
ecma_char_t *output_char_p = output_start_p;
lit_utf8_byte_t *output_char_p = output_start_p;
while (input_char_p < input_end_p)
{
@@ -740,7 +736,7 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
}
else
{
*output_char_p = (ecma_char_t) decoded_byte;
*output_char_p = (lit_utf8_byte_t) decoded_byte;
output_char_p++;
}
}
@@ -778,26 +774,13 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
}
while (--count > 0);
if (character < 0x10000)
{
*output_char_p = (ecma_char_t) character;
output_char_p++;
}
else
{
character -= 0x10000;
*output_char_p = (ecma_char_t) (0xd800 | (character & 0x3ff));
output_char_p++;
*output_char_p = (ecma_char_t) (0xdc00 | (character >> 10));
output_char_p++;
}
output_char_p += lit_code_point_to_utf8 (character, output_char_p);
}
}
*output_char_p = '\0';
JERRY_ASSERT (output_start_p + output_length == output_char_p + 1);
JERRY_ASSERT (output_start_p + output_size == output_char_p);
ecma_string_t *output_string_p = ecma_new_ecma_string (output_start_p);
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_size);
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
@@ -847,16 +830,16 @@ ecma_builtin_global_object_decode_uri_component (ecma_value_t this_arg __attr_un
* Helper function to encode byte as hexadecimal values.
*/
static void
ecma_builtin_global_object_byte_to_hex (ecma_char_t *dest_p, /**< destination pointer */
ecma_builtin_global_object_byte_to_hex (lit_utf8_byte_t *dest_p, /**< destination pointer */
uint32_t byte) /**< value */
{
JERRY_ASSERT (byte < 256);
dest_p[0] = '%';
ecma_char_t hex_digit = (ecma_char_t) (byte >> 4);
dest_p[1] = (ecma_char_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0'));
hex_digit = (ecma_char_t) (byte & 0xf);
dest_p[2] = (ecma_char_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0'));
dest_p[1] = (lit_utf8_byte_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0'));
hex_digit = (lit_utf8_byte_t) (byte & 0xf);
dest_p[2] = (lit_utf8_byte_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0'));
} /* ecma_builtin_global_object_byte_to_hex */
/**
@@ -878,27 +861,29 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen
JERRY_ASSERT (ecma_is_value_string (string));
ecma_string_t *input_string_p = ecma_get_string_from_value (string);
uint32_t input_length = (uint32_t) ecma_string_get_length (input_string_p);
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
MEM_DEFINE_LOCAL_ARRAY (input_start_p,
input_length + 1,
ecma_char_t);
input_size + 1,
lit_utf8_byte_t);
ecma_string_to_zt_string (input_string_p,
input_start_p,
(ssize_t) (input_length + 1) * (ssize_t) sizeof (ecma_char_t));
input_start_p[input_size] = LIT_BYTE_NULL;
ecma_string_to_utf8_string (input_string_p,
input_start_p,
(ssize_t) (input_size));
/*
* The URI encoding has two major phases: first we validate the input,
* and compute the length of the output, then we encode the input.
*/
ecma_char_t *input_char_p = input_start_p;
uint32_t output_length = 1;
for (uint32_t i = 0; i < input_length; i++)
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_start_p, input_size);
lit_utf8_size_t output_length = 1;
while (!lit_utf8_iterator_reached_buffer_end (&iter))
{
/* Input validation. */
uint32_t character = *input_char_p++;
lit_code_point_t character = lit_utf8_iterator_read_code_unit_and_increment (&iter);
if (character <= 0x7f)
{
@@ -942,20 +927,20 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen
{
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
output_length,
ecma_char_t);
lit_utf8_byte_t);
input_char_p = input_start_p;
ecma_char_t *output_char_p = output_start_p;
for (uint32_t i = 0; i < input_length; i++)
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_start_p, input_size);
lit_utf8_byte_t *output_char_p = output_start_p;
while (!lit_utf8_iterator_reached_buffer_end (&iter))
{
/* Input decode. */
uint32_t character = *input_char_p++;
lit_code_point_t character = lit_utf8_iterator_read_code_unit_and_increment (&iter);
if (character <= 0x7f)
{
if (ecma_builtin_global_object_character_is_in (character, unescaped_uri_bitset))
{
*output_char_p++ = (ecma_char_t) character;
*output_char_p++ = (lit_utf8_byte_t) character;
}
else
{
@@ -995,7 +980,7 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen
*output_char_p = '\0';
JERRY_ASSERT (output_start_p + output_length == output_char_p + 1);
ecma_string_t *output_string_p = ecma_new_ecma_string (output_start_p);
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length - 1);
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
@@ -24,6 +24,7 @@
#include "ecma-helpers.h"
#include "ecma-objects.h"
#include "ecma-try-catch-macro.h"
#include "lit-magic-strings.h"
/** \addtogroup ecma ECMA
* @{
@@ -83,40 +84,29 @@ ecma_builtin_helper_object_to_string (const ecma_value_t this_arg) /**< this arg
'Null' or one of possible object's classes.
The string with null character is maximum 19 characters long. */
const ssize_t buffer_size = 19;
MEM_DEFINE_LOCAL_ARRAY (str_buffer, buffer_size, ecma_char_t);
MEM_DEFINE_LOCAL_ARRAY (str_buffer, buffer_size, lit_utf8_byte_t);
const ecma_char_t *left_square_zt_str_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_LEFT_SQUARE_CHAR);
const ecma_char_t *object_zt_str_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_OBJECT);
const ecma_char_t *space_zt_str_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_SPACE_CHAR);
const ecma_char_t *type_name_zt_str_p = lit_get_magic_string_zt (type_string);
const ecma_char_t *right_square_zt_str_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_RIGHT_SQUARE_CHAR);
ecma_char_t *buffer_ptr = str_buffer;
lit_utf8_byte_t *buffer_ptr = str_buffer;
ssize_t buffer_size_left = buffer_size;
buffer_ptr = ecma_copy_zt_string_to_buffer (left_square_zt_str_p,
buffer_ptr,
buffer_size_left);
buffer_size_left = buffer_size - (buffer_ptr - str_buffer) * (ssize_t) sizeof (ecma_char_t);
buffer_ptr = ecma_copy_zt_string_to_buffer (object_zt_str_p,
buffer_ptr,
buffer_size_left);
buffer_size_left = buffer_size - (buffer_ptr - str_buffer) * (ssize_t) sizeof (ecma_char_t);
buffer_ptr = ecma_copy_zt_string_to_buffer (space_zt_str_p,
buffer_ptr,
buffer_size_left);
buffer_size_left = buffer_size - (buffer_ptr - str_buffer) * (ssize_t) sizeof (ecma_char_t);
buffer_ptr = ecma_copy_zt_string_to_buffer (type_name_zt_str_p,
buffer_ptr,
buffer_size_left);
buffer_size_left = buffer_size - (buffer_ptr - str_buffer) * (ssize_t) sizeof (ecma_char_t);
buffer_ptr = ecma_copy_zt_string_to_buffer (right_square_zt_str_p,
buffer_ptr,
buffer_size_left);
buffer_size_left = buffer_size - (buffer_ptr - str_buffer) * (ssize_t) sizeof (ecma_char_t);
const lit_magic_string_id_t magic_string_ids[] =
{
LIT_MAGIC_STRING_LEFT_SQUARE_CHAR,
LIT_MAGIC_STRING_OBJECT,
LIT_MAGIC_STRING_SPACE_CHAR,
type_string,
LIT_MAGIC_STRING_RIGHT_SQUARE_CHAR
};
for (uint32_t i = 0; i < sizeof (magic_string_ids) / sizeof (lit_magic_string_id_t); ++i)
{
buffer_ptr = lit_copy_magic_string_to_buffer (magic_string_ids[i], buffer_ptr, buffer_size_left);
buffer_size_left = buffer_size - (buffer_ptr - str_buffer);
}
JERRY_ASSERT (buffer_size_left >= 0);
ret_string_p = ecma_new_ecma_string (str_buffer);
ret_string_p = ecma_new_ecma_string_from_utf8 (str_buffer, (lit_utf8_size_t) (buffer_size - buffer_size_left));
MEM_FINALIZE_LOCAL_ARRAY (str_buffer);
@@ -234,7 +234,7 @@ ecma_builtin_number_prototype_object_to_fixed (ecma_value_t this_arg, /**< this
if (is_negative)
{
ecma_string_t *neg_str_p = ecma_new_ecma_string ((const ecma_char_t *) "-");
ecma_string_t *neg_str_p = ecma_new_ecma_string_from_utf8 ((const lit_utf8_byte_t *) "-", 1);
ecma_string_t *neg_inf_str_p = ecma_concat_ecma_strings (neg_str_p, infinity_str_p);
ecma_deref_ecma_string (infinity_str_p);
ecma_deref_ecma_string (neg_str_p);
@@ -276,9 +276,9 @@ ecma_builtin_number_prototype_object_to_fixed (ecma_value_t this_arg, /**< this
}
JERRY_ASSERT (buffer_size > 0);
MEM_DEFINE_LOCAL_ARRAY (buff, buffer_size, ecma_char_t);
MEM_DEFINE_LOCAL_ARRAY (buff, buffer_size, lit_utf8_byte_t);
ecma_char_t* p = buff;
lit_utf8_byte_t *p = buff;
if (is_negative)
{
@@ -321,7 +321,7 @@ ecma_builtin_number_prototype_object_to_fixed (ecma_value_t this_arg, /**< this
digit++;
}
*p = (ecma_char_t) ((ecma_char_t) digit + '0');
*p = (lit_utf8_byte_t) ((lit_utf8_byte_t) digit + '0');
p++;
}
}
@@ -339,7 +339,7 @@ ecma_builtin_number_prototype_object_to_fixed (ecma_value_t this_arg, /**< this
digit++;
}
*p = (ecma_char_t) ((ecma_char_t) digit + '0');
*p = (lit_utf8_byte_t) ((lit_utf8_byte_t) digit + '0');
p++;
}
@@ -361,7 +361,7 @@ ecma_builtin_number_prototype_object_to_fixed (ecma_value_t this_arg, /**< this
digit++;
}
*p = (ecma_char_t) ((ecma_char_t) digit + '0');
*p = (lit_utf8_byte_t) ((lit_utf8_byte_t) digit + '0');
p++;
}
}
@@ -369,7 +369,7 @@ ecma_builtin_number_prototype_object_to_fixed (ecma_value_t this_arg, /**< this
JERRY_ASSERT (p - buff < buffer_size);
/* String terminator. */
*p = 0;
ecma_string_t* str = ecma_new_ecma_string ((ecma_char_t *) buff);
ecma_string_t* str = ecma_new_ecma_string_from_utf8 (buff, (lit_utf8_size_t) (p - buff));
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (str));
MEM_FINALIZE_LOCAL_ARRAY (buff);
@@ -60,7 +60,7 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
if (ecma_object_get_class_name (ecma_get_object_from_value (this_arg)) != LIT_MAGIC_STRING_REGEXP_UL)
{
ret_value = ecma_raise_type_error ((const ecma_char_t *) "Incomplete RegExp type");
ret_value = ecma_raise_type_error ("Incomplete RegExp type");
}
else
{
@@ -77,16 +77,18 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value);
/* Convert ecma_String_t *to regexp_bytecode_t* */
ecma_length_t input_str_len = ecma_string_get_length (input_str_p);
lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p);
MEM_DEFINE_LOCAL_ARRAY (input_zt_str_p, input_str_len + 1, ecma_char_t);
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size + 1, lit_utf8_byte_t);
ssize_t zt_str_size = (ssize_t) (sizeof (ecma_char_t) * (input_str_len + 1));
ecma_string_to_zt_string (input_str_p, input_zt_str_p, zt_str_size);
ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size);
ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, input_zt_str_p);
FIXME ("Update ecma_regexp_exec_helper so that zero symbol is not needed.");
input_utf8_buffer_p[input_str_size] = LIT_BYTE_NULL;
MEM_FINALIZE_LOCAL_ARRAY (input_zt_str_p);
ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, input_utf8_buffer_p, input_str_size);
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
ECMA_FINALIZE (input_str_value);
@@ -145,7 +147,7 @@ ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argume
if (ecma_object_get_class_name (ecma_get_object_from_value (this_arg)) != LIT_MAGIC_STRING_REGEXP_UL)
{
ret_value = ecma_raise_type_error ((const ecma_char_t *) "Incomplete RegExp type");
ret_value = ecma_raise_type_error ("Incomplete RegExp type");
}
else
{
@@ -94,7 +94,7 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
}
else
{
ret_value = ecma_raise_type_error ((const ecma_char_t *) "Invalid argument of RegExp call.");
ret_value = ecma_raise_type_error ("Invalid argument of RegExp call.");
}
}
else
@@ -327,10 +327,10 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg
/* 3. */
ecma_string_t *get_string_val = ecma_get_string_from_value (to_string_val);
const uint32_t len = (uint32_t) ecma_string_get_length (get_string_val);
const ecma_length_t len = ecma_string_get_length (get_string_val);
/* 4. 6. */
uint32_t start = 0, end = len;
/* 4. */
ecma_length_t start = 0, end = len;
ECMA_OP_TO_NUMBER_TRY_CATCH (start_num,
arg1,
@@ -360,24 +360,9 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg
if (ecma_is_completion_value_empty (ret_value))
{
/* 8. */
const uint32_t span = (start > end) ? 0 : end - start;
const uint32_t new_str_size = (uint32_t) sizeof (ecma_char_t) * (span + 1);
MEM_DEFINE_LOCAL_ARRAY (new_str_buffer, new_str_size, ecma_char_t);
/* 9. */
for (uint32_t idx = 0; idx < span; idx++)
{
new_str_buffer[idx] = ecma_string_get_char_at_pos (get_string_val, start + idx);
}
new_str_buffer[span] = '\0';
ecma_string_t* new_str = ecma_new_ecma_string ((ecma_char_t *) new_str_buffer);
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (new_str));
MEM_FINALIZE_LOCAL_ARRAY (new_str_buffer);
/* 8-9. */
ecma_string_t *new_str_p = ecma_string_substr (get_string_val, start, end);
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (new_str_p));
}
ECMA_FINALIZE (to_string_val);
@@ -507,45 +492,37 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
ecma_string_t *original_string_p = ecma_get_string_from_value (to_string_val);
/* 3 */
const uint32_t len = (uint32_t) ecma_string_get_length (original_string_p);
const lit_utf8_size_t size = ecma_string_get_size (original_string_p);
const ecma_length_t length = ecma_string_get_size (original_string_p);
/* Workaround: avoid repeated call of ecma_string_get_char_at_pos() because its overhead */
uint32_t zt_str_size = (uint32_t) sizeof (ecma_char_t) * (len + 1);
ecma_char_t *original_zt_str_p = (ecma_char_t*) mem_heap_alloc_block (zt_str_size,
MEM_HEAP_ALLOC_SHORT_TERM);
ecma_string_to_zt_string (original_string_p, original_zt_str_p, (ssize_t) zt_str_size);
lit_utf8_byte_t *original_utf8_str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (size + 1,
MEM_HEAP_ALLOC_SHORT_TERM);
ecma_string_to_utf8_string (original_string_p, original_utf8_str_p, (ssize_t) size);
uint32_t prefix = 0, postfix = 0;
uint32_t new_len = 0;
while (prefix < len && isspace (original_zt_str_p[prefix]))
while (prefix < length && isspace (lit_utf8_string_code_unit_at (original_utf8_str_p, size, prefix)))
{
prefix++;
}
while (postfix < len - prefix && isspace (original_zt_str_p[len - postfix - 1]))
while (postfix < length - prefix && isspace (lit_utf8_string_code_unit_at (original_utf8_str_p,
size,
length - postfix - 1)))
{
postfix++;
}
new_len = prefix < len ? len - prefix - postfix : 0;
new_len = prefix < size ? size - prefix - postfix : 0;
MEM_DEFINE_LOCAL_ARRAY (new_str_buffer, new_len + 1, ecma_char_t);
for (uint32_t idx = 0; idx < new_len; ++idx)
{
new_str_buffer[idx] = original_zt_str_p[idx + prefix];
}
new_str_buffer[new_len] = '\0';
ecma_string_t *new_str_p = ecma_new_ecma_string ((ecma_char_t *) new_str_buffer);
ecma_string_t *new_str_p = ecma_string_substr (original_string_p, prefix, prefix + new_len);
/* 4 */
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (new_str_p));
MEM_FINALIZE_LOCAL_ARRAY (new_str_buffer);
mem_heap_free_block (original_zt_str_p);
mem_heap_free_block (original_utf8_str_p);
ECMA_FINALIZE (to_string_val);
ECMA_FINALIZE (check_coercible_val);
@@ -60,12 +60,19 @@ ecma_builtin_string_object_from_char_code (ecma_value_t this_arg __attr_unused__
{
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
size_t zt_str_buffer_size = sizeof (ecma_char_t) * (args_number + 1u);
if (args_number == 0)
{
ecma_string_t *ret_str_p = ecma_new_ecma_string_from_utf8 (NULL, 0);
return ecma_make_normal_completion_value (ecma_make_string_value (ret_str_p));
}
ecma_char_t *ret_zt_str_p = (ecma_char_t*) mem_heap_alloc_block (zt_str_buffer_size,
MEM_HEAP_ALLOC_SHORT_TERM);
ret_zt_str_p[args_number] = ECMA_CHAR_NULL;
lit_utf8_size_t utf8_buf_size = args_number * LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
ecma_string_t *ret_str_p;
MEM_DEFINE_LOCAL_ARRAY (utf8_buf_p, utf8_buf_size, lit_utf8_byte_t);
lit_utf8_size_t utf8_buf_used = 0;
FIXME ("Support surrogate pairs");
for (ecma_length_t arg_index = 0;
arg_index < args_number;
arg_index++)
@@ -73,26 +80,17 @@ ecma_builtin_string_object_from_char_code (ecma_value_t this_arg __attr_unused__
ECMA_OP_TO_NUMBER_TRY_CATCH (arg_num, args[arg_index], ret_value);
uint32_t uint32_char_code = ecma_number_to_uint32 (arg_num);
uint16_t uint16_char_code = (uint16_t) uint32_char_code;
ecma_char_t code_unit = (uint16_t) uint32_char_code;
#if CONFIG_ECMA_CHAR_ENCODING == CONFIG_ECMA_CHAR_ASCII
if ((uint16_char_code >> JERRY_BITSINBYTE) != 0)
{
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_TYPE));
}
else
{
ret_zt_str_p[arg_index] = (ecma_char_t) uint16_char_code;
}
#elif CONFIG_ECMA_CHAR_ENCODING == CONFIG_ECMA_CHAR_UTF16
ret_zt_str_p[arg_index] = (ecma_char_t) uint16_char_code;
#endif /* CONFIG_ECMA_CHAR_ENCODING == CONFIG_ECMA_CHAR_UTF16 */
JERRY_ASSERT (utf8_buf_used <= utf8_buf_size - LIT_UTF8_MAX_BYTES_IN_CODE_UNIT);
utf8_buf_used += lit_code_unit_to_utf8 (code_unit, utf8_buf_p + utf8_buf_used);
JERRY_ASSERT (utf8_buf_used <= utf8_buf_size);
ECMA_OP_TO_NUMBER_FINALIZE (arg_num);
if (ecma_is_completion_value_throw (ret_value))
{
mem_heap_free_block (ret_zt_str_p);
mem_heap_free_block (utf8_buf_p);
return ret_value;
}
@@ -100,9 +98,9 @@ ecma_builtin_string_object_from_char_code (ecma_value_t this_arg __attr_unused__
JERRY_ASSERT (ecma_is_completion_value_empty (ret_value));
}
ecma_string_t *ret_str_p = ecma_new_ecma_string (ret_zt_str_p);
ret_str_p = ecma_new_ecma_string_from_utf8 (utf8_buf_p, utf8_buf_used);
mem_heap_free_block (ret_zt_str_p);
MEM_FINALIZE_LOCAL_ARRAY (utf8_buf_p);
return ecma_make_normal_completion_value (ecma_make_string_value (ret_str_p));
} /* ecma_builtin_string_object_from_char_code */
@@ -609,7 +609,7 @@ ecma_builtin_bin_search_for_magic_string_id_in_array (const lit_magic_string_id_
if (ids[mid] == key)
{
return mid;
return (int32_t) mid;
}
else if (ids[mid] > key)
{