New trim helper function (#3014)

Created a new trim helper function which is used in every situation where we need to trim a string

Co-authored-by: Tibor Dusnoki tdusnoki@inf.u-szeged.hu
JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
Szilagyi Adam
2019-08-26 17:30:22 +02:00
committed by Dániel Bátyai
parent fd075322fb
commit 97e348528a
4 changed files with 117 additions and 160 deletions
+46 -78
View File
@@ -299,57 +299,25 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
return ECMA_NUMBER_ZERO; return ECMA_NUMBER_ZERO;
} }
const lit_utf8_byte_t *str_curr_p = str_p; ecma_string_trim_helper (&str_p, &str_size);
const lit_utf8_byte_t *str_end_p = str_p + str_size; const lit_utf8_byte_t *end_p = str_p + (str_size - 1);
ecma_char_t code_unit;
while (str_curr_p < str_end_p) if (str_size < 1)
{
code_unit = lit_utf8_peek_next (str_curr_p);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_incr (&str_curr_p);
}
else
{
break;
}
}
const lit_utf8_byte_t *begin_p = str_curr_p;
str_curr_p = (lit_utf8_byte_t *) str_end_p;
while (str_curr_p > str_p)
{
code_unit = lit_utf8_peek_prev (str_curr_p);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_decr (&str_curr_p);
}
else
{
break;
}
}
const lit_utf8_byte_t *end_p = str_curr_p - 1;
if (begin_p > end_p)
{ {
return ECMA_NUMBER_ZERO; return ECMA_NUMBER_ZERO;
} }
if ((end_p >= begin_p + 2) if ((end_p >= str_p + 2)
&& begin_p[0] == LIT_CHAR_0 && str_p[0] == LIT_CHAR_0
&& (begin_p[1] == LIT_CHAR_LOWERCASE_X && (str_p[1] == LIT_CHAR_LOWERCASE_X
|| begin_p[1] == LIT_CHAR_UPPERCASE_X)) || str_p[1] == LIT_CHAR_UPPERCASE_X))
{ {
/* Hex literal handling */ /* Hex literal handling */
begin_p += 2; str_p += 2;
ecma_number_t num = ECMA_NUMBER_ZERO; ecma_number_t num = ECMA_NUMBER_ZERO;
for (const lit_utf8_byte_t * iter_p = begin_p; for (const lit_utf8_byte_t * iter_p = str_p;
iter_p <= end_p; iter_p <= end_p;
iter_p++) iter_p++)
{ {
@@ -383,18 +351,18 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
bool sign = false; /* positive */ bool sign = false; /* positive */
if (*begin_p == LIT_CHAR_PLUS) if (*str_p == LIT_CHAR_PLUS)
{ {
begin_p++; str_p++;
} }
else if (*begin_p == LIT_CHAR_MINUS) else if (*str_p == LIT_CHAR_MINUS)
{ {
sign = true; /* negative */ sign = true; /* negative */
begin_p++; str_p++;
} }
if (begin_p > end_p) if (str_p > end_p)
{ {
return ecma_number_make_nan (); return ecma_number_make_nan ();
} }
@@ -404,7 +372,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
JERRY_ASSERT (strlen ((const char *) infinity_zt_str_p) == 8); JERRY_ASSERT (strlen ((const char *) infinity_zt_str_p) == 8);
if ((end_p - begin_p) == (8 - 1) && memcmp (infinity_zt_str_p, begin_p, 8) == 0) if ((end_p - str_p) == (8 - 1) && memcmp (infinity_zt_str_p, str_p, 8) == 0)
{ {
return ecma_number_make_infinity (sign); return ecma_number_make_infinity (sign);
} }
@@ -415,15 +383,15 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
bool digit_seen = false; bool digit_seen = false;
/* Parsing digits before dot (or before end of digits part if there is no dot in number) */ /* Parsing digits before dot (or before end of digits part if there is no dot in number) */
while (begin_p <= end_p) while (str_p <= end_p)
{ {
int32_t digit_value; int32_t digit_value;
if (*begin_p >= LIT_CHAR_0 if (*str_p >= LIT_CHAR_0
&& *begin_p <= LIT_CHAR_9) && *str_p <= LIT_CHAR_9)
{ {
digit_seen = true; digit_seen = true;
digit_value = (*begin_p - LIT_CHAR_0); digit_value = (*str_p - LIT_CHAR_0);
} }
else else
{ {
@@ -443,29 +411,29 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
} }
} }
begin_p++; str_p++;
} }
if (begin_p <= end_p if (str_p <= end_p
&& *begin_p == LIT_CHAR_DOT) && *str_p == LIT_CHAR_DOT)
{ {
begin_p++; str_p++;
if (!digit_seen && begin_p > end_p) if (!digit_seen && str_p > end_p)
{ {
return ecma_number_make_nan (); return ecma_number_make_nan ();
} }
/* Parsing number's part that is placed after dot */ /* Parsing number's part that is placed after dot */
while (begin_p <= end_p) while (str_p <= end_p)
{ {
int32_t digit_value; int32_t digit_value;
if (*begin_p >= LIT_CHAR_0 if (*str_p >= LIT_CHAR_0
&& *begin_p <= LIT_CHAR_9) && *str_p <= LIT_CHAR_9)
{ {
digit_seen = true; digit_seen = true;
digit_value = (*begin_p - LIT_CHAR_0); digit_value = (*str_p - LIT_CHAR_0);
} }
else else
{ {
@@ -483,7 +451,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
e--; e--;
} }
begin_p++; str_p++;
} }
} }
@@ -491,40 +459,40 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
int32_t e_in_lit = 0; int32_t e_in_lit = 0;
bool e_in_lit_sign = false; bool e_in_lit_sign = false;
if (begin_p <= end_p if (str_p <= end_p
&& (*begin_p == LIT_CHAR_LOWERCASE_E && (*str_p == LIT_CHAR_LOWERCASE_E
|| *begin_p == LIT_CHAR_UPPERCASE_E)) || *str_p == LIT_CHAR_UPPERCASE_E))
{ {
begin_p++; str_p++;
if (!digit_seen || begin_p > end_p) if (!digit_seen || str_p > end_p)
{ {
return ecma_number_make_nan (); return ecma_number_make_nan ();
} }
if (*begin_p == LIT_CHAR_PLUS) if (*str_p == LIT_CHAR_PLUS)
{ {
begin_p++; str_p++;
} }
else if (*begin_p == LIT_CHAR_MINUS) else if (*str_p == LIT_CHAR_MINUS)
{ {
e_in_lit_sign = true; e_in_lit_sign = true;
begin_p++; str_p++;
} }
if (begin_p > end_p) if (str_p > end_p)
{ {
return ecma_number_make_nan (); return ecma_number_make_nan ();
} }
while (begin_p <= end_p) while (str_p <= end_p)
{ {
int32_t digit_value; int32_t digit_value;
if (*begin_p >= LIT_CHAR_0 if (*str_p >= LIT_CHAR_0
&& *begin_p <= LIT_CHAR_9) && *str_p <= LIT_CHAR_9)
{ {
digit_value = (*begin_p - LIT_CHAR_0); digit_value = (*str_p - LIT_CHAR_0);
} }
else else
{ {
@@ -543,7 +511,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
return sign ? -ECMA_NUMBER_ZERO : ECMA_NUMBER_ZERO; return sign ? -ECMA_NUMBER_ZERO : ECMA_NUMBER_ZERO;
} }
begin_p++; str_p++;
} }
} }
@@ -569,12 +537,12 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
e_sign = false; e_sign = false;
} }
if (begin_p <= end_p) if (str_p <= end_p)
{ {
return ecma_number_make_nan (); return ecma_number_make_nan ();
} }
JERRY_ASSERT (begin_p == end_p + 1); JERRY_ASSERT (str_p == end_p + 1);
if (fraction_uint64 == 0) if (fraction_uint64 == 0)
{ {
+60 -48
View File
@@ -2449,6 +2449,57 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
return ecma_string_p; return ecma_string_p;
} /* ecma_string_substr */ } /* ecma_string_substr */
/**
* Helper function for trimming.
*
* Used by:
* - ecma_string_trim
* - ecma_utf8_string_to_number
* - ecma_builtin_global_object_parse_int
* - ecma_builtin_global_object_parse_float
*/
void
ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] current string position */
lit_utf8_size_t *utf8_str_size) /**< [in, out] size of the given string */
{
ecma_char_t ch;
lit_utf8_size_t read_size;
const lit_utf8_byte_t *nonws_start_p = *utf8_str_p + *utf8_str_size;
const lit_utf8_byte_t *current_p = *utf8_str_p;
while (current_p < nonws_start_p)
{
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
nonws_start_p = current_p;
break;
}
current_p += read_size;
}
current_p = *utf8_str_p + *utf8_str_size;
while (current_p > nonws_start_p)
{
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
break;
}
current_p -= read_size;
}
*utf8_str_p = nonws_start_p;
*utf8_str_size = (lit_utf8_size_t) (current_p - nonws_start_p);
} /* ecma_string_trim_helper */
/** /**
* Trim leading and trailing whitespace characters from string. * Trim leading and trailing whitespace characters from string.
* *
@@ -2459,63 +2510,24 @@ ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string
{ {
ecma_string_t *ret_string_p; ecma_string_t *ret_string_p;
ECMA_STRING_TO_UTF8_STRING (string_p, utf8_str_p, utf8_str_size); lit_utf8_size_t utf8_str_size;
uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
const lit_utf8_byte_t *utf8_str_p = ecma_string_get_chars (string_p, &utf8_str_size, &flags);
if (utf8_str_size > 0) if (utf8_str_size > 0)
{ {
ecma_char_t ch; ecma_string_trim_helper (&utf8_str_p, &utf8_str_size);
lit_utf8_size_t read_size; ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, utf8_str_size);
const lit_utf8_byte_t *nonws_start_p = utf8_str_p + utf8_str_size;
const lit_utf8_byte_t *current_p = utf8_str_p;
/* Trim front. */
while (current_p < nonws_start_p)
{
read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
nonws_start_p = current_p;
break;
}
current_p += read_size;
}
current_p = utf8_str_p + utf8_str_size;
/* Trim back. */
while (current_p > utf8_str_p)
{
read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
if (!lit_char_is_white_space (ch)
&& !lit_char_is_line_terminator (ch))
{
break;
}
current_p -= read_size;
}
/* Construct new string. */
if (current_p > nonws_start_p)
{
ret_string_p = ecma_new_ecma_string_from_utf8 (nonws_start_p,
(lit_utf8_size_t) (current_p - nonws_start_p));
}
else
{
ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
}
} }
else else
{ {
ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY); ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
} }
ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size); if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
{
jmem_heap_free_block ((void *) utf8_str_p, utf8_str_size);
}
return ret_string_p; return ret_string_p;
} /* ecma_string_trim */ } /* ecma_string_trim */
+2
View File
@@ -296,6 +296,8 @@ lit_magic_string_id_t ecma_get_string_magic (const ecma_string_t *string_p);
lit_string_hash_t ecma_string_hash (const ecma_string_t *string_p); lit_string_hash_t ecma_string_hash (const ecma_string_t *string_p);
ecma_string_t *ecma_string_substr (const ecma_string_t *string_p, ecma_length_t start_pos, ecma_length_t end_pos); ecma_string_t *ecma_string_substr (const ecma_string_t *string_p, ecma_length_t start_pos, ecma_length_t end_pos);
void ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p,
lit_utf8_size_t *utf8_str_size);
ecma_string_t *ecma_string_trim (const ecma_string_t *string_p); ecma_string_t *ecma_string_trim (const ecma_string_t *string_p);
ecma_stringbuilder_t ecma_stringbuilder_create (void); ecma_stringbuilder_t ecma_stringbuilder_create (void);
@@ -111,31 +111,6 @@ ecma_builtin_global_object_eval (ecma_value_t x) /**< routine's first argument *
return ecma_op_eval (ecma_get_string_from_value (x), parse_opts); return ecma_op_eval (ecma_get_string_from_value (x), parse_opts);
} /* ecma_builtin_global_object_eval */ } /* ecma_builtin_global_object_eval */
/**
* Helper function for trimming leading whitespaces
* for the Global object's 'parseInt' and 'parseFloat' routines
*/
static void
ecma_builtin_global_remove_leading_white_spaces (const lit_utf8_byte_t **string_curr_p, /**< [in, out] current string
* position */
const lit_utf8_byte_t *string_end_p, /**< end of the string buffer */
const lit_utf8_byte_t **start_p) /**< [in, out] start position of the
* trimmed string */
{
while (*string_curr_p < string_end_p)
{
ecma_char_t current_char = lit_utf8_read_next (string_curr_p);
if (!lit_char_is_white_space (current_char)
&& !lit_char_is_line_terminator (current_char))
{
lit_utf8_decr (string_curr_p);
*start_p = *string_curr_p;
break;
}
}
} /* ecma_builtin_global_remove_leading_white_spaces */
/** /**
* The Global object's 'parseInt' routine * The Global object's 'parseInt' routine
* *
@@ -158,13 +133,13 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
} }
const lit_utf8_byte_t *string_curr_p = string_buff; const lit_utf8_byte_t *string_curr_p = string_buff;
const lit_utf8_byte_t *string_end_p = string_buff + string_buff_size;
/* 2. Remove leading whitespace. */ /* 2. Remove leading whitespace. */
const lit_utf8_byte_t *start_p = string_end_p; ecma_string_trim_helper (&string_curr_p, &string_buff_size);
const lit_utf8_byte_t *end_p = start_p;
ecma_builtin_global_remove_leading_white_spaces (&string_curr_p, string_end_p, &start_p); const lit_utf8_byte_t *string_end_p = string_curr_p + string_buff_size;
const lit_utf8_byte_t *start_p = string_curr_p;
const lit_utf8_byte_t *end_p = string_end_p;
if (string_curr_p >= string_end_p) if (string_curr_p >= string_end_p)
{ {
@@ -336,14 +311,14 @@ ecma_builtin_global_object_parse_float (const lit_utf8_byte_t *string_buff, /**<
} }
const lit_utf8_byte_t *str_curr_p = string_buff; const lit_utf8_byte_t *str_curr_p = string_buff;
const lit_utf8_byte_t *str_end_p = string_buff + string_buff_size;
const lit_utf8_byte_t *start_p = str_end_p; /* 2. Remove leading whitespace. */
ecma_string_trim_helper (&str_curr_p, &string_buff_size);
const lit_utf8_byte_t *str_end_p = str_curr_p + string_buff_size;
const lit_utf8_byte_t *start_p = str_curr_p;
const lit_utf8_byte_t *end_p = str_end_p; const lit_utf8_byte_t *end_p = str_end_p;
/* 2. Find first non whitespace char and set starting position. */
ecma_builtin_global_remove_leading_white_spaces (&str_curr_p, str_end_p, &start_p);
bool sign = false; bool sign = false;
ecma_char_t current; ecma_char_t current;