New trim helper function (#3014)
Created a new trim helper function which is used in every situation where we need to trim a string Co-authored-by: Tibor Dusnoki tdusnoki@inf.u-szeged.hu JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
committed by
Dániel Bátyai
parent
fd075322fb
commit
97e348528a
@@ -299,57 +299,25 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
return ECMA_NUMBER_ZERO;
|
||||
}
|
||||
|
||||
const lit_utf8_byte_t *str_curr_p = str_p;
|
||||
const lit_utf8_byte_t *str_end_p = str_p + str_size;
|
||||
ecma_char_t code_unit;
|
||||
ecma_string_trim_helper (&str_p, &str_size);
|
||||
const lit_utf8_byte_t *end_p = str_p + (str_size - 1);
|
||||
|
||||
while (str_curr_p < str_end_p)
|
||||
{
|
||||
code_unit = lit_utf8_peek_next (str_curr_p);
|
||||
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
|
||||
{
|
||||
lit_utf8_incr (&str_curr_p);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const lit_utf8_byte_t *begin_p = str_curr_p;
|
||||
str_curr_p = (lit_utf8_byte_t *) str_end_p;
|
||||
|
||||
while (str_curr_p > str_p)
|
||||
{
|
||||
code_unit = lit_utf8_peek_prev (str_curr_p);
|
||||
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
|
||||
{
|
||||
lit_utf8_decr (&str_curr_p);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const lit_utf8_byte_t *end_p = str_curr_p - 1;
|
||||
|
||||
if (begin_p > end_p)
|
||||
if (str_size < 1)
|
||||
{
|
||||
return ECMA_NUMBER_ZERO;
|
||||
}
|
||||
|
||||
if ((end_p >= begin_p + 2)
|
||||
&& begin_p[0] == LIT_CHAR_0
|
||||
&& (begin_p[1] == LIT_CHAR_LOWERCASE_X
|
||||
|| begin_p[1] == LIT_CHAR_UPPERCASE_X))
|
||||
if ((end_p >= str_p + 2)
|
||||
&& str_p[0] == LIT_CHAR_0
|
||||
&& (str_p[1] == LIT_CHAR_LOWERCASE_X
|
||||
|| str_p[1] == LIT_CHAR_UPPERCASE_X))
|
||||
{
|
||||
/* Hex literal handling */
|
||||
begin_p += 2;
|
||||
str_p += 2;
|
||||
|
||||
ecma_number_t num = ECMA_NUMBER_ZERO;
|
||||
|
||||
for (const lit_utf8_byte_t * iter_p = begin_p;
|
||||
for (const lit_utf8_byte_t * iter_p = str_p;
|
||||
iter_p <= end_p;
|
||||
iter_p++)
|
||||
{
|
||||
@@ -383,18 +351,18 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
|
||||
bool sign = false; /* positive */
|
||||
|
||||
if (*begin_p == LIT_CHAR_PLUS)
|
||||
if (*str_p == LIT_CHAR_PLUS)
|
||||
{
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
else if (*begin_p == LIT_CHAR_MINUS)
|
||||
else if (*str_p == LIT_CHAR_MINUS)
|
||||
{
|
||||
sign = true; /* negative */
|
||||
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
|
||||
if (begin_p > end_p)
|
||||
if (str_p > end_p)
|
||||
{
|
||||
return ecma_number_make_nan ();
|
||||
}
|
||||
@@ -404,7 +372,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
|
||||
JERRY_ASSERT (strlen ((const char *) infinity_zt_str_p) == 8);
|
||||
|
||||
if ((end_p - begin_p) == (8 - 1) && memcmp (infinity_zt_str_p, begin_p, 8) == 0)
|
||||
if ((end_p - str_p) == (8 - 1) && memcmp (infinity_zt_str_p, str_p, 8) == 0)
|
||||
{
|
||||
return ecma_number_make_infinity (sign);
|
||||
}
|
||||
@@ -415,15 +383,15 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
bool digit_seen = false;
|
||||
|
||||
/* Parsing digits before dot (or before end of digits part if there is no dot in number) */
|
||||
while (begin_p <= end_p)
|
||||
while (str_p <= end_p)
|
||||
{
|
||||
int32_t digit_value;
|
||||
|
||||
if (*begin_p >= LIT_CHAR_0
|
||||
&& *begin_p <= LIT_CHAR_9)
|
||||
if (*str_p >= LIT_CHAR_0
|
||||
&& *str_p <= LIT_CHAR_9)
|
||||
{
|
||||
digit_seen = true;
|
||||
digit_value = (*begin_p - LIT_CHAR_0);
|
||||
digit_value = (*str_p - LIT_CHAR_0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -443,29 +411,29 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
}
|
||||
}
|
||||
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
|
||||
if (begin_p <= end_p
|
||||
&& *begin_p == LIT_CHAR_DOT)
|
||||
if (str_p <= end_p
|
||||
&& *str_p == LIT_CHAR_DOT)
|
||||
{
|
||||
begin_p++;
|
||||
str_p++;
|
||||
|
||||
if (!digit_seen && begin_p > end_p)
|
||||
if (!digit_seen && str_p > end_p)
|
||||
{
|
||||
return ecma_number_make_nan ();
|
||||
}
|
||||
|
||||
/* Parsing number's part that is placed after dot */
|
||||
while (begin_p <= end_p)
|
||||
while (str_p <= end_p)
|
||||
{
|
||||
int32_t digit_value;
|
||||
|
||||
if (*begin_p >= LIT_CHAR_0
|
||||
&& *begin_p <= LIT_CHAR_9)
|
||||
if (*str_p >= LIT_CHAR_0
|
||||
&& *str_p <= LIT_CHAR_9)
|
||||
{
|
||||
digit_seen = true;
|
||||
digit_value = (*begin_p - LIT_CHAR_0);
|
||||
digit_value = (*str_p - LIT_CHAR_0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -483,7 +451,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
e--;
|
||||
}
|
||||
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -491,40 +459,40 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
int32_t e_in_lit = 0;
|
||||
bool e_in_lit_sign = false;
|
||||
|
||||
if (begin_p <= end_p
|
||||
&& (*begin_p == LIT_CHAR_LOWERCASE_E
|
||||
|| *begin_p == LIT_CHAR_UPPERCASE_E))
|
||||
if (str_p <= end_p
|
||||
&& (*str_p == LIT_CHAR_LOWERCASE_E
|
||||
|| *str_p == LIT_CHAR_UPPERCASE_E))
|
||||
{
|
||||
begin_p++;
|
||||
str_p++;
|
||||
|
||||
if (!digit_seen || begin_p > end_p)
|
||||
if (!digit_seen || str_p > end_p)
|
||||
{
|
||||
return ecma_number_make_nan ();
|
||||
}
|
||||
|
||||
if (*begin_p == LIT_CHAR_PLUS)
|
||||
if (*str_p == LIT_CHAR_PLUS)
|
||||
{
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
else if (*begin_p == LIT_CHAR_MINUS)
|
||||
else if (*str_p == LIT_CHAR_MINUS)
|
||||
{
|
||||
e_in_lit_sign = true;
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
|
||||
if (begin_p > end_p)
|
||||
if (str_p > end_p)
|
||||
{
|
||||
return ecma_number_make_nan ();
|
||||
}
|
||||
|
||||
while (begin_p <= end_p)
|
||||
while (str_p <= end_p)
|
||||
{
|
||||
int32_t digit_value;
|
||||
|
||||
if (*begin_p >= LIT_CHAR_0
|
||||
&& *begin_p <= LIT_CHAR_9)
|
||||
if (*str_p >= LIT_CHAR_0
|
||||
&& *str_p <= LIT_CHAR_9)
|
||||
{
|
||||
digit_value = (*begin_p - LIT_CHAR_0);
|
||||
digit_value = (*str_p - LIT_CHAR_0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -543,7 +511,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
return sign ? -ECMA_NUMBER_ZERO : ECMA_NUMBER_ZERO;
|
||||
}
|
||||
|
||||
begin_p++;
|
||||
str_p++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -569,12 +537,12 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
e_sign = false;
|
||||
}
|
||||
|
||||
if (begin_p <= end_p)
|
||||
if (str_p <= end_p)
|
||||
{
|
||||
return ecma_number_make_nan ();
|
||||
}
|
||||
|
||||
JERRY_ASSERT (begin_p == end_p + 1);
|
||||
JERRY_ASSERT (str_p == end_p + 1);
|
||||
|
||||
if (fraction_uint64 == 0)
|
||||
{
|
||||
|
||||
@@ -2449,6 +2449,57 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
|
||||
return ecma_string_p;
|
||||
} /* ecma_string_substr */
|
||||
|
||||
/**
|
||||
* Helper function for trimming.
|
||||
*
|
||||
* Used by:
|
||||
* - ecma_string_trim
|
||||
* - ecma_utf8_string_to_number
|
||||
* - ecma_builtin_global_object_parse_int
|
||||
* - ecma_builtin_global_object_parse_float
|
||||
*/
|
||||
void
|
||||
ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] current string position */
|
||||
lit_utf8_size_t *utf8_str_size) /**< [in, out] size of the given string */
|
||||
{
|
||||
ecma_char_t ch;
|
||||
lit_utf8_size_t read_size;
|
||||
const lit_utf8_byte_t *nonws_start_p = *utf8_str_p + *utf8_str_size;
|
||||
const lit_utf8_byte_t *current_p = *utf8_str_p;
|
||||
|
||||
while (current_p < nonws_start_p)
|
||||
{
|
||||
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
|
||||
|
||||
if (!lit_char_is_white_space (ch)
|
||||
&& !lit_char_is_line_terminator (ch))
|
||||
{
|
||||
nonws_start_p = current_p;
|
||||
break;
|
||||
}
|
||||
|
||||
current_p += read_size;
|
||||
}
|
||||
|
||||
current_p = *utf8_str_p + *utf8_str_size;
|
||||
|
||||
while (current_p > nonws_start_p)
|
||||
{
|
||||
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
|
||||
|
||||
if (!lit_char_is_white_space (ch)
|
||||
&& !lit_char_is_line_terminator (ch))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
current_p -= read_size;
|
||||
}
|
||||
|
||||
*utf8_str_p = nonws_start_p;
|
||||
*utf8_str_size = (lit_utf8_size_t) (current_p - nonws_start_p);
|
||||
} /* ecma_string_trim_helper */
|
||||
|
||||
/**
|
||||
* Trim leading and trailing whitespace characters from string.
|
||||
*
|
||||
@@ -2459,63 +2510,24 @@ ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string
|
||||
{
|
||||
ecma_string_t *ret_string_p;
|
||||
|
||||
ECMA_STRING_TO_UTF8_STRING (string_p, utf8_str_p, utf8_str_size);
|
||||
lit_utf8_size_t utf8_str_size;
|
||||
uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
|
||||
const lit_utf8_byte_t *utf8_str_p = ecma_string_get_chars (string_p, &utf8_str_size, &flags);
|
||||
|
||||
if (utf8_str_size > 0)
|
||||
{
|
||||
ecma_char_t ch;
|
||||
lit_utf8_size_t read_size;
|
||||
const lit_utf8_byte_t *nonws_start_p = utf8_str_p + utf8_str_size;
|
||||
const lit_utf8_byte_t *current_p = utf8_str_p;
|
||||
|
||||
/* Trim front. */
|
||||
while (current_p < nonws_start_p)
|
||||
{
|
||||
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
|
||||
|
||||
if (!lit_char_is_white_space (ch)
|
||||
&& !lit_char_is_line_terminator (ch))
|
||||
{
|
||||
nonws_start_p = current_p;
|
||||
break;
|
||||
}
|
||||
|
||||
current_p += read_size;
|
||||
}
|
||||
|
||||
current_p = utf8_str_p + utf8_str_size;
|
||||
|
||||
/* Trim back. */
|
||||
while (current_p > utf8_str_p)
|
||||
{
|
||||
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
|
||||
|
||||
if (!lit_char_is_white_space (ch)
|
||||
&& !lit_char_is_line_terminator (ch))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
current_p -= read_size;
|
||||
}
|
||||
|
||||
/* Construct new string. */
|
||||
if (current_p > nonws_start_p)
|
||||
{
|
||||
ret_string_p = ecma_new_ecma_string_from_utf8 (nonws_start_p,
|
||||
(lit_utf8_size_t) (current_p - nonws_start_p));
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
|
||||
}
|
||||
ecma_string_trim_helper (&utf8_str_p, &utf8_str_size);
|
||||
ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, utf8_str_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
|
||||
}
|
||||
|
||||
ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size);
|
||||
if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
|
||||
{
|
||||
jmem_heap_free_block ((void *) utf8_str_p, utf8_str_size);
|
||||
}
|
||||
|
||||
return ret_string_p;
|
||||
} /* ecma_string_trim */
|
||||
|
||||
@@ -296,6 +296,8 @@ lit_magic_string_id_t ecma_get_string_magic (const ecma_string_t *string_p);
|
||||
|
||||
lit_string_hash_t ecma_string_hash (const ecma_string_t *string_p);
|
||||
ecma_string_t *ecma_string_substr (const ecma_string_t *string_p, ecma_length_t start_pos, ecma_length_t end_pos);
|
||||
void ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p,
|
||||
lit_utf8_size_t *utf8_str_size);
|
||||
ecma_string_t *ecma_string_trim (const ecma_string_t *string_p);
|
||||
|
||||
ecma_stringbuilder_t ecma_stringbuilder_create (void);
|
||||
|
||||
@@ -111,31 +111,6 @@ ecma_builtin_global_object_eval (ecma_value_t x) /**< routine's first argument *
|
||||
return ecma_op_eval (ecma_get_string_from_value (x), parse_opts);
|
||||
} /* ecma_builtin_global_object_eval */
|
||||
|
||||
/**
|
||||
* Helper function for trimming leading whitespaces
|
||||
* for the Global object's 'parseInt' and 'parseFloat' routines
|
||||
*/
|
||||
static void
|
||||
ecma_builtin_global_remove_leading_white_spaces (const lit_utf8_byte_t **string_curr_p, /**< [in, out] current string
|
||||
* position */
|
||||
const lit_utf8_byte_t *string_end_p, /**< end of the string buffer */
|
||||
const lit_utf8_byte_t **start_p) /**< [in, out] start position of the
|
||||
* trimmed string */
|
||||
{
|
||||
while (*string_curr_p < string_end_p)
|
||||
{
|
||||
ecma_char_t current_char = lit_utf8_read_next (string_curr_p);
|
||||
|
||||
if (!lit_char_is_white_space (current_char)
|
||||
&& !lit_char_is_line_terminator (current_char))
|
||||
{
|
||||
lit_utf8_decr (string_curr_p);
|
||||
*start_p = *string_curr_p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* ecma_builtin_global_remove_leading_white_spaces */
|
||||
|
||||
/**
|
||||
* The Global object's 'parseInt' routine
|
||||
*
|
||||
@@ -158,13 +133,13 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
|
||||
}
|
||||
|
||||
const lit_utf8_byte_t *string_curr_p = string_buff;
|
||||
const lit_utf8_byte_t *string_end_p = string_buff + string_buff_size;
|
||||
|
||||
/* 2. Remove leading whitespace. */
|
||||
const lit_utf8_byte_t *start_p = string_end_p;
|
||||
const lit_utf8_byte_t *end_p = start_p;
|
||||
ecma_string_trim_helper (&string_curr_p, &string_buff_size);
|
||||
|
||||
ecma_builtin_global_remove_leading_white_spaces (&string_curr_p, string_end_p, &start_p);
|
||||
const lit_utf8_byte_t *string_end_p = string_curr_p + string_buff_size;
|
||||
const lit_utf8_byte_t *start_p = string_curr_p;
|
||||
const lit_utf8_byte_t *end_p = string_end_p;
|
||||
|
||||
if (string_curr_p >= string_end_p)
|
||||
{
|
||||
@@ -336,14 +311,14 @@ ecma_builtin_global_object_parse_float (const lit_utf8_byte_t *string_buff, /**<
|
||||
}
|
||||
|
||||
const lit_utf8_byte_t *str_curr_p = string_buff;
|
||||
const lit_utf8_byte_t *str_end_p = string_buff + string_buff_size;
|
||||
|
||||
const lit_utf8_byte_t *start_p = str_end_p;
|
||||
/* 2. Remove leading whitespace. */
|
||||
ecma_string_trim_helper (&str_curr_p, &string_buff_size);
|
||||
|
||||
const lit_utf8_byte_t *str_end_p = str_curr_p + string_buff_size;
|
||||
const lit_utf8_byte_t *start_p = str_curr_p;
|
||||
const lit_utf8_byte_t *end_p = str_end_p;
|
||||
|
||||
/* 2. Find first non whitespace char and set starting position. */
|
||||
ecma_builtin_global_remove_leading_white_spaces (&str_curr_p, str_end_p, &start_p);
|
||||
|
||||
bool sign = false;
|
||||
ecma_char_t current;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user