New trim helper function (#3014)

Created a new trim helper function which is used in every situation where we need to trim a string Co-authored-by: Tibor Dusnoki tdusnoki@inf.u-szeged.hu JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
2019-08-26 17:30:22 +02:00
parent fd075322fb
commit 97e348528a
4 changed files with 117 additions and 160 deletions
@@ -299,57 +299,25 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
    return ECMA_NUMBER_ZERO;
  }

-  const lit_utf8_byte_t *str_curr_p = str_p;
-  const lit_utf8_byte_t *str_end_p = str_p + str_size;
-  ecma_char_t code_unit;
+  ecma_string_trim_helper (&str_p, &str_size);
+  const lit_utf8_byte_t *end_p = str_p + (str_size - 1);

-  while (str_curr_p < str_end_p)
-  {
-    code_unit = lit_utf8_peek_next (str_curr_p);
-    if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
-    {
-      lit_utf8_incr (&str_curr_p);
-    }
-    else
-    {
-      break;
-    }
-  }
-
-  const lit_utf8_byte_t *begin_p = str_curr_p;
-  str_curr_p = (lit_utf8_byte_t *) str_end_p;
-
-  while (str_curr_p > str_p)
-  {
-    code_unit = lit_utf8_peek_prev (str_curr_p);
-    if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
-    {
-      lit_utf8_decr (&str_curr_p);
-    }
-    else
-    {
-      break;
-    }
-  }
-
-  const lit_utf8_byte_t *end_p = str_curr_p - 1;
-
-  if (begin_p > end_p)
+  if (str_size < 1)
  {
    return ECMA_NUMBER_ZERO;
  }

-  if ((end_p >= begin_p + 2)
-      && begin_p[0] == LIT_CHAR_0
-      && (begin_p[1] == LIT_CHAR_LOWERCASE_X
-          || begin_p[1] == LIT_CHAR_UPPERCASE_X))
+  if ((end_p >= str_p + 2)
+      && str_p[0] == LIT_CHAR_0
+      && (str_p[1] == LIT_CHAR_LOWERCASE_X
+          || str_p[1] == LIT_CHAR_UPPERCASE_X))
  {
    /* Hex literal handling */
-    begin_p += 2;
+    str_p += 2;

    ecma_number_t num = ECMA_NUMBER_ZERO;

-    for (const lit_utf8_byte_t * iter_p = begin_p;
+    for (const lit_utf8_byte_t * iter_p = str_p;
         iter_p <= end_p;
         iter_p++)
    {
@@ -383,18 +351,18 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */

  bool sign = false; /* positive */

-  if (*begin_p == LIT_CHAR_PLUS)
+  if (*str_p == LIT_CHAR_PLUS)
  {
-    begin_p++;
+    str_p++;
  }
-  else if (*begin_p == LIT_CHAR_MINUS)
+  else if (*str_p == LIT_CHAR_MINUS)
  {
    sign = true; /* negative */

-    begin_p++;
+    str_p++;
  }

-  if (begin_p > end_p)
+  if (str_p > end_p)
  {
    return ecma_number_make_nan ();
  }
@@ -404,7 +372,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */

  JERRY_ASSERT (strlen ((const char *) infinity_zt_str_p) == 8);

-  if ((end_p - begin_p) == (8 - 1) && memcmp (infinity_zt_str_p, begin_p, 8) == 0)
+  if ((end_p - str_p) == (8 - 1) && memcmp (infinity_zt_str_p, str_p, 8) == 0)
  {
    return ecma_number_make_infinity (sign);
  }
@@ -415,15 +383,15 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
  bool digit_seen = false;

  /* Parsing digits before dot (or before end of digits part if there is no dot in number) */
-  while (begin_p <= end_p)
+  while (str_p <= end_p)
  {
    int32_t digit_value;

-    if (*begin_p >= LIT_CHAR_0
-        && *begin_p <= LIT_CHAR_9)
+    if (*str_p >= LIT_CHAR_0
+        && *str_p <= LIT_CHAR_9)
    {
      digit_seen = true;
-      digit_value = (*begin_p - LIT_CHAR_0);
+      digit_value = (*str_p - LIT_CHAR_0);
    }
    else
    {
@@ -443,29 +411,29 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
      }
    }

-    begin_p++;
+    str_p++;
  }

-  if (begin_p <= end_p
-      && *begin_p == LIT_CHAR_DOT)
+  if (str_p <= end_p
+      && *str_p == LIT_CHAR_DOT)
  {
-    begin_p++;
+    str_p++;

-    if (!digit_seen && begin_p > end_p)
+    if (!digit_seen && str_p > end_p)
    {
      return ecma_number_make_nan ();
    }

    /* Parsing number's part that is placed after dot */
-    while (begin_p <= end_p)
+    while (str_p <= end_p)
    {
      int32_t digit_value;

-      if (*begin_p >= LIT_CHAR_0
-          && *begin_p <= LIT_CHAR_9)
+      if (*str_p >= LIT_CHAR_0
+          && *str_p <= LIT_CHAR_9)
      {
        digit_seen = true;
-        digit_value = (*begin_p - LIT_CHAR_0);
+        digit_value = (*str_p - LIT_CHAR_0);
      }
      else
      {
@@ -483,7 +451,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
        e--;
      }

-      begin_p++;
+      str_p++;
    }
  }

@@ -491,40 +459,40 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
  int32_t e_in_lit = 0;
  bool e_in_lit_sign = false;

-  if (begin_p <= end_p
-      && (*begin_p == LIT_CHAR_LOWERCASE_E
-          || *begin_p == LIT_CHAR_UPPERCASE_E))
+  if (str_p <= end_p
+      && (*str_p == LIT_CHAR_LOWERCASE_E
+          || *str_p == LIT_CHAR_UPPERCASE_E))
  {
-    begin_p++;
+    str_p++;

-    if (!digit_seen || begin_p > end_p)
+    if (!digit_seen || str_p > end_p)
    {
      return ecma_number_make_nan ();
    }

-    if (*begin_p == LIT_CHAR_PLUS)
+    if (*str_p == LIT_CHAR_PLUS)
    {
-      begin_p++;
+      str_p++;
    }
-    else if (*begin_p == LIT_CHAR_MINUS)
+    else if (*str_p == LIT_CHAR_MINUS)
    {
      e_in_lit_sign = true;
-      begin_p++;
+      str_p++;
    }

-    if (begin_p > end_p)
+    if (str_p > end_p)
    {
      return ecma_number_make_nan ();
    }

-    while (begin_p <= end_p)
+    while (str_p <= end_p)
    {
      int32_t digit_value;

-      if (*begin_p >= LIT_CHAR_0
-          && *begin_p <= LIT_CHAR_9)
+      if (*str_p >= LIT_CHAR_0
+          && *str_p <= LIT_CHAR_9)
      {
-        digit_value = (*begin_p - LIT_CHAR_0);
+        digit_value = (*str_p - LIT_CHAR_0);
      }
      else
      {
@@ -543,7 +511,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
        return sign ? -ECMA_NUMBER_ZERO : ECMA_NUMBER_ZERO;
      }

-      begin_p++;
+      str_p++;
    }
  }

@@ -569,12 +537,12 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
    e_sign = false;
  }

-  if (begin_p <= end_p)
+  if (str_p <= end_p)
  {
    return ecma_number_make_nan ();
  }

-  JERRY_ASSERT (begin_p == end_p + 1);
+  JERRY_ASSERT (str_p == end_p + 1);

  if (fraction_uint64 == 0)
  {
@@ -2449,6 +2449,57 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
  return ecma_string_p;
 } /* ecma_string_substr */

+/**
+ * Helper function for trimming.
+ *
+ * Used by:
+ *        - ecma_string_trim
+ *        - ecma_utf8_string_to_number
+ *        - ecma_builtin_global_object_parse_int
+ *        - ecma_builtin_global_object_parse_float
+ */
+void
+ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] current string position */
+                         lit_utf8_size_t *utf8_str_size) /**< [in, out] size of the given string */
+{
+  ecma_char_t ch;
+  lit_utf8_size_t read_size;
+  const lit_utf8_byte_t *nonws_start_p = *utf8_str_p + *utf8_str_size;
+  const lit_utf8_byte_t *current_p = *utf8_str_p;
+
+  while (current_p < nonws_start_p)
+  {
+    read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
+
+    if (!lit_char_is_white_space (ch)
+        && !lit_char_is_line_terminator (ch))
+    {
+      nonws_start_p = current_p;
+      break;
+    }
+
+    current_p += read_size;
+  }
+
+  current_p = *utf8_str_p + *utf8_str_size;
+
+  while (current_p > nonws_start_p)
+  {
+    read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
+
+    if (!lit_char_is_white_space (ch)
+        && !lit_char_is_line_terminator (ch))
+    {
+      break;
+    }
+
+    current_p -= read_size;
+  }
+
+  *utf8_str_p = nonws_start_p;
+  *utf8_str_size = (lit_utf8_size_t) (current_p - nonws_start_p);
+} /* ecma_string_trim_helper */
+
 /**
 * Trim leading and trailing whitespace characters from string.
 *
@@ -2459,63 +2510,24 @@ ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string
 {
  ecma_string_t *ret_string_p;

-  ECMA_STRING_TO_UTF8_STRING (string_p, utf8_str_p, utf8_str_size);
+  lit_utf8_size_t utf8_str_size;
+  uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
+  const lit_utf8_byte_t *utf8_str_p = ecma_string_get_chars (string_p, &utf8_str_size, &flags);

  if (utf8_str_size > 0)
  {
-    ecma_char_t ch;
-    lit_utf8_size_t read_size;
-    const lit_utf8_byte_t *nonws_start_p = utf8_str_p + utf8_str_size;
-    const lit_utf8_byte_t *current_p = utf8_str_p;
-
-    /* Trim front. */
-    while (current_p < nonws_start_p)
-    {
-      read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
-
-      if (!lit_char_is_white_space (ch)
-          && !lit_char_is_line_terminator (ch))
-      {
-        nonws_start_p = current_p;
-        break;
-      }
-
-      current_p += read_size;
-    }
-
-    current_p = utf8_str_p + utf8_str_size;
-
-    /* Trim back. */
-    while (current_p > utf8_str_p)
-    {
-      read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
-
-      if (!lit_char_is_white_space (ch)
-          && !lit_char_is_line_terminator (ch))
-      {
-        break;
-      }
-
-      current_p -= read_size;
-    }
-
-    /* Construct new string. */
-    if (current_p > nonws_start_p)
-    {
-      ret_string_p = ecma_new_ecma_string_from_utf8 (nonws_start_p,
-                                                     (lit_utf8_size_t) (current_p - nonws_start_p));
-    }
-    else
-    {
-      ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
-    }
+    ecma_string_trim_helper (&utf8_str_p, &utf8_str_size);
+    ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, utf8_str_size);
  }
  else
  {
    ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
  }

-  ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size);
+  if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
+  {
+    jmem_heap_free_block ((void *) utf8_str_p, utf8_str_size);
+  }

  return ret_string_p;
 } /* ecma_string_trim */
@@ -296,6 +296,8 @@ lit_magic_string_id_t ecma_get_string_magic (const ecma_string_t *string_p);

 lit_string_hash_t ecma_string_hash (const ecma_string_t *string_p);
 ecma_string_t *ecma_string_substr (const ecma_string_t *string_p, ecma_length_t start_pos, ecma_length_t end_pos);
+void ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p,
+                              lit_utf8_size_t *utf8_str_size);
 ecma_string_t *ecma_string_trim (const ecma_string_t *string_p);

 ecma_stringbuilder_t ecma_stringbuilder_create (void);
@@ -111,31 +111,6 @@ ecma_builtin_global_object_eval (ecma_value_t x) /**< routine's first argument *
  return ecma_op_eval (ecma_get_string_from_value (x), parse_opts);
 } /* ecma_builtin_global_object_eval */

-/**
- * Helper function for trimming leading whitespaces
- * for the Global object's 'parseInt' and 'parseFloat' routines
- */
-static void
-ecma_builtin_global_remove_leading_white_spaces (const lit_utf8_byte_t **string_curr_p, /**< [in, out] current string
-                                                                                         *    position */
-                                                 const lit_utf8_byte_t *string_end_p, /**< end of the string buffer */
-                                                 const lit_utf8_byte_t **start_p) /**< [in, out] start position of the
-                                                                                   *    trimmed string */
-{
-  while (*string_curr_p < string_end_p)
-  {
-    ecma_char_t current_char = lit_utf8_read_next (string_curr_p);
-
-    if (!lit_char_is_white_space (current_char)
-        && !lit_char_is_line_terminator (current_char))
-    {
-      lit_utf8_decr (string_curr_p);
-      *start_p = *string_curr_p;
-      break;
-    }
-  }
-} /* ecma_builtin_global_remove_leading_white_spaces */
-
 /**
 * The Global object's 'parseInt' routine
 *
@@ -158,13 +133,13 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
  }

  const lit_utf8_byte_t *string_curr_p = string_buff;
-  const lit_utf8_byte_t *string_end_p = string_buff + string_buff_size;

  /* 2. Remove leading whitespace. */
-  const lit_utf8_byte_t *start_p = string_end_p;
-  const lit_utf8_byte_t *end_p = start_p;
+  ecma_string_trim_helper (&string_curr_p, &string_buff_size);

-  ecma_builtin_global_remove_leading_white_spaces (&string_curr_p, string_end_p, &start_p);
+  const lit_utf8_byte_t *string_end_p = string_curr_p + string_buff_size;
+  const lit_utf8_byte_t *start_p = string_curr_p;
+  const lit_utf8_byte_t *end_p = string_end_p;

  if (string_curr_p >= string_end_p)
  {
@@ -336,14 +311,14 @@ ecma_builtin_global_object_parse_float (const lit_utf8_byte_t *string_buff, /**<
  }

  const lit_utf8_byte_t *str_curr_p = string_buff;
-  const lit_utf8_byte_t *str_end_p = string_buff + string_buff_size;

-  const lit_utf8_byte_t *start_p = str_end_p;
+  /* 2. Remove leading whitespace. */
+  ecma_string_trim_helper (&str_curr_p, &string_buff_size);
+
+  const lit_utf8_byte_t *str_end_p = str_curr_p + string_buff_size;
+  const lit_utf8_byte_t *start_p = str_curr_p;
  const lit_utf8_byte_t *end_p = str_end_p;

-  /* 2. Find first non whitespace char and set starting position. */
-  ecma_builtin_global_remove_leading_white_spaces (&str_curr_p, str_end_p, &start_p);
-
  bool sign = false;
  ecma_char_t current;