New trim helper function (#3014)

Created a new trim helper function which is used in every situation where we need to trim a string

Co-authored-by: Tibor Dusnoki tdusnoki@inf.u-szeged.hu
JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
Szilagyi Adam
2019-08-26 17:30:22 +02:00
committed by Dániel Bátyai
parent fd075322fb
commit 97e348528a
4 changed files with 117 additions and 160 deletions
+46 -78
View File
@@ -299,57 +299,25 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
return ECMA_NUMBER_ZERO;
}
const lit_utf8_byte_t *str_curr_p = str_p;
const lit_utf8_byte_t *str_end_p = str_p + str_size;
ecma_char_t code_unit;
ecma_string_trim_helper (&str_p, &str_size);
const lit_utf8_byte_t *end_p = str_p + (str_size - 1);
while (str_curr_p < str_end_p)
{
code_unit = lit_utf8_peek_next (str_curr_p);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_incr (&str_curr_p);
}
else
{
break;
}
}
const lit_utf8_byte_t *begin_p = str_curr_p;
str_curr_p = (lit_utf8_byte_t *) str_end_p;
while (str_curr_p > str_p)
{
code_unit = lit_utf8_peek_prev (str_curr_p);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_decr (&str_curr_p);
}
else
{
break;
}
}
const lit_utf8_byte_t *end_p = str_curr_p - 1;
if (begin_p > end_p)
if (str_size < 1)
{
return ECMA_NUMBER_ZERO;
}
if ((end_p >= begin_p + 2)
&& begin_p[0] == LIT_CHAR_0
&& (begin_p[1] == LIT_CHAR_LOWERCASE_X
|| begin_p[1] == LIT_CHAR_UPPERCASE_X))
if ((end_p >= str_p + 2)
&& str_p[0] == LIT_CHAR_0
&& (str_p[1] == LIT_CHAR_LOWERCASE_X
|| str_p[1] == LIT_CHAR_UPPERCASE_X))
{
/* Hex literal handling */
begin_p += 2;
str_p += 2;
ecma_number_t num = ECMA_NUMBER_ZERO;
for (const lit_utf8_byte_t * iter_p = begin_p;
for (const lit_utf8_byte_t * iter_p = str_p;
iter_p <= end_p;
iter_p++)
{
@@ -383,18 +351,18 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
bool sign = false; /* positive */
if (*begin_p == LIT_CHAR_PLUS)
if (*str_p == LIT_CHAR_PLUS)
{
begin_p++;
str_p++;
}
else if (*begin_p == LIT_CHAR_MINUS)
else if (*str_p == LIT_CHAR_MINUS)
{
sign = true; /* negative */
begin_p++;
str_p++;
}
if (begin_p > end_p)
if (str_p > end_p)
{
return ecma_number_make_nan ();
}
@@ -404,7 +372,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
JERRY_ASSERT (strlen ((const char *) infinity_zt_str_p) == 8);
if ((end_p - begin_p) == (8 - 1) && memcmp (infinity_zt_str_p, begin_p, 8) == 0)
if ((end_p - str_p) == (8 - 1) && memcmp (infinity_zt_str_p, str_p, 8) == 0)
{
return ecma_number_make_infinity (sign);
}
@@ -415,15 +383,15 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
bool digit_seen = false;
/* Parsing digits before dot (or before end of digits part if there is no dot in number) */
while (begin_p <= end_p)
while (str_p <= end_p)
{
int32_t digit_value;
if (*begin_p >= LIT_CHAR_0
&& *begin_p <= LIT_CHAR_9)
if (*str_p >= LIT_CHAR_0
&& *str_p <= LIT_CHAR_9)
{
digit_seen = true;
digit_value = (*begin_p - LIT_CHAR_0);
digit_value = (*str_p - LIT_CHAR_0);
}
else
{
@@ -443,29 +411,29 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
}
}
begin_p++;
str_p++;
}
if (begin_p <= end_p
&& *begin_p == LIT_CHAR_DOT)
if (str_p <= end_p
&& *str_p == LIT_CHAR_DOT)
{
begin_p++;
str_p++;
if (!digit_seen && begin_p > end_p)
if (!digit_seen && str_p > end_p)
{
return ecma_number_make_nan ();
}
/* Parsing number's part that is placed after dot */
while (begin_p <= end_p)
while (str_p <= end_p)
{
int32_t digit_value;
if (*begin_p >= LIT_CHAR_0
&& *begin_p <= LIT_CHAR_9)
if (*str_p >= LIT_CHAR_0
&& *str_p <= LIT_CHAR_9)
{
digit_seen = true;
digit_value = (*begin_p - LIT_CHAR_0);
digit_value = (*str_p - LIT_CHAR_0);
}
else
{
@@ -483,7 +451,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
e--;
}
begin_p++;
str_p++;
}
}
@@ -491,40 +459,40 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
int32_t e_in_lit = 0;
bool e_in_lit_sign = false;
if (begin_p <= end_p
&& (*begin_p == LIT_CHAR_LOWERCASE_E
|| *begin_p == LIT_CHAR_UPPERCASE_E))
if (str_p <= end_p
&& (*str_p == LIT_CHAR_LOWERCASE_E
|| *str_p == LIT_CHAR_UPPERCASE_E))
{
begin_p++;
str_p++;
if (!digit_seen || begin_p > end_p)
if (!digit_seen || str_p > end_p)
{
return ecma_number_make_nan ();
}
if (*begin_p == LIT_CHAR_PLUS)
if (*str_p == LIT_CHAR_PLUS)
{
begin_p++;
str_p++;
}
else if (*begin_p == LIT_CHAR_MINUS)
else if (*str_p == LIT_CHAR_MINUS)
{
e_in_lit_sign = true;
begin_p++;
str_p++;
}
if (begin_p > end_p)
if (str_p > end_p)
{
return ecma_number_make_nan ();
}
while (begin_p <= end_p)
while (str_p <= end_p)
{
int32_t digit_value;
if (*begin_p >= LIT_CHAR_0
&& *begin_p <= LIT_CHAR_9)
if (*str_p >= LIT_CHAR_0
&& *str_p <= LIT_CHAR_9)
{
digit_value = (*begin_p - LIT_CHAR_0);
digit_value = (*str_p - LIT_CHAR_0);
}
else
{
@@ -543,7 +511,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
return sign ? -ECMA_NUMBER_ZERO : ECMA_NUMBER_ZERO;
}
begin_p++;
str_p++;
}
}
@@ -569,12 +537,12 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
e_sign = false;
}
if (begin_p <= end_p)
if (str_p <= end_p)
{
return ecma_number_make_nan ();
}
JERRY_ASSERT (begin_p == end_p + 1);
JERRY_ASSERT (str_p == end_p + 1);
if (fraction_uint64 == 0)
{
+60 -48
View File
@@ -2449,6 +2449,57 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
return ecma_string_p;
} /* ecma_string_substr */
/**
* Helper function for trimming.
*
* Used by:
* - ecma_string_trim
* - ecma_utf8_string_to_number
* - ecma_builtin_global_object_parse_int
* - ecma_builtin_global_object_parse_float
*/
void
ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] current string position */
lit_utf8_size_t *utf8_str_size) /**< [in, out] size of the given string */
{
ecma_char_t ch;
lit_utf8_size_t read_size;
const lit_utf8_byte_t *nonws_start_p = *utf8_str_p + *utf8_str_size;
const lit_utf8_byte_t *current_p = *utf8_str_p;
while (current_p < nonws_start_p)
{
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
nonws_start_p = current_p;
break;
}
current_p += read_size;
}
current_p = *utf8_str_p + *utf8_str_size;
while (current_p > nonws_start_p)
{
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
break;
}
current_p -= read_size;
}
*utf8_str_p = nonws_start_p;
*utf8_str_size = (lit_utf8_size_t) (current_p - nonws_start_p);
} /* ecma_string_trim_helper */
/**
* Trim leading and trailing whitespace characters from string.
*
@@ -2459,63 +2510,24 @@ ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string
{
ecma_string_t *ret_string_p;
ECMA_STRING_TO_UTF8_STRING (string_p, utf8_str_p, utf8_str_size);
lit_utf8_size_t utf8_str_size;
uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
const lit_utf8_byte_t *utf8_str_p = ecma_string_get_chars (string_p, &utf8_str_size, &flags);
if (utf8_str_size > 0)
{
ecma_char_t ch;
lit_utf8_size_t read_size;
const lit_utf8_byte_t *nonws_start_p = utf8_str_p + utf8_str_size;
const lit_utf8_byte_t *current_p = utf8_str_p;
/* Trim front. */
while (current_p < nonws_start_p)
{
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
nonws_start_p = current_p;
break;
}
current_p += read_size;
}
current_p = utf8_str_p + utf8_str_size;
/* Trim back. */
while (current_p > utf8_str_p)
{
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
break;
}
current_p -= read_size;
}
/* Construct new string. */
if (current_p > nonws_start_p)
{
ret_string_p = ecma_new_ecma_string_from_utf8 (nonws_start_p,
(lit_utf8_size_t) (current_p - nonws_start_p));
}
else
{
ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
}
ecma_string_trim_helper (&utf8_str_p, &utf8_str_size);
ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, utf8_str_size);
}
else
{
ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
}
ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size);
if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
{
jmem_heap_free_block ((void *) utf8_str_p, utf8_str_size);
}
return ret_string_p;
} /* ecma_string_trim */
+2
View File
@@ -296,6 +296,8 @@ lit_magic_string_id_t ecma_get_string_magic (const ecma_string_t *string_p);
lit_string_hash_t ecma_string_hash (const ecma_string_t *string_p);
ecma_string_t *ecma_string_substr (const ecma_string_t *string_p, ecma_length_t start_pos, ecma_length_t end_pos);
void ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p,
lit_utf8_size_t *utf8_str_size);
ecma_string_t *ecma_string_trim (const ecma_string_t *string_p);
ecma_stringbuilder_t ecma_stringbuilder_create (void);
@@ -111,31 +111,6 @@ ecma_builtin_global_object_eval (ecma_value_t x) /**< routine's first argument *
return ecma_op_eval (ecma_get_string_from_value (x), parse_opts);
} /* ecma_builtin_global_object_eval */
/**
* Helper function for trimming leading whitespaces
* for the Global object's 'parseInt' and 'parseFloat' routines
*/
static void
ecma_builtin_global_remove_leading_white_spaces (const lit_utf8_byte_t **string_curr_p, /**< [in, out] current string
* position */
const lit_utf8_byte_t *string_end_p, /**< end of the string buffer */
const lit_utf8_byte_t **start_p) /**< [in, out] start position of the
* trimmed string */
{
while (*string_curr_p < string_end_p)
{
ecma_char_t current_char = lit_utf8_read_next (string_curr_p);
if (!lit_char_is_white_space (current_char)
&& !lit_char_is_line_terminator (current_char))
{
lit_utf8_decr (string_curr_p);
*start_p = *string_curr_p;
break;
}
}
} /* ecma_builtin_global_remove_leading_white_spaces */
/**
* The Global object's 'parseInt' routine
*
@@ -158,13 +133,13 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
}
const lit_utf8_byte_t *string_curr_p = string_buff;
const lit_utf8_byte_t *string_end_p = string_buff + string_buff_size;
/* 2. Remove leading whitespace. */
const lit_utf8_byte_t *start_p = string_end_p;
const lit_utf8_byte_t *end_p = start_p;
ecma_string_trim_helper (&string_curr_p, &string_buff_size);
ecma_builtin_global_remove_leading_white_spaces (&string_curr_p, string_end_p, &start_p);
const lit_utf8_byte_t *string_end_p = string_curr_p + string_buff_size;
const lit_utf8_byte_t *start_p = string_curr_p;
const lit_utf8_byte_t *end_p = string_end_p;
if (string_curr_p >= string_end_p)
{
@@ -336,14 +311,14 @@ ecma_builtin_global_object_parse_float (const lit_utf8_byte_t *string_buff, /**<
}
const lit_utf8_byte_t *str_curr_p = string_buff;
const lit_utf8_byte_t *str_end_p = string_buff + string_buff_size;
const lit_utf8_byte_t *start_p = str_end_p;
/* 2. Remove leading whitespace. */
ecma_string_trim_helper (&str_curr_p, &string_buff_size);
const lit_utf8_byte_t *str_end_p = str_curr_p + string_buff_size;
const lit_utf8_byte_t *start_p = str_curr_p;
const lit_utf8_byte_t *end_p = str_end_p;
/* 2. Find first non whitespace char and set starting position. */
ecma_builtin_global_remove_leading_white_spaces (&str_curr_p, str_end_p, &start_p);
bool sign = false;
ecma_char_t current;