Rework the public API (#4829)

Related to #4186. Some notable changes: - The term 'Error' now strictly refers to native Error objects defined in the ECMA standard, which are ordinary objects. All other uses of 'error' or 'error reference' where the term refers to a thrown value is now called 'exception'. - Simplified the naming scheme of many String API functions. These functions will now also take an 'encoding' argument to specify the desired encoding in which to operate. - Removed the substring-copy-to-buffer functions. These functions behaved awkwardly, as they use character index to specify the start/end positions, and were mostly used incorrectly with byte offsets instead. The functionality can still be replicated with other functions if necessary. - String-to-buffer functions will no longer fail if the buffer is not sufficiently large, the string will instead be cropped. - Fixed the usage of the '_sz' prefix in many API functions. The term 'sz' means zero-terminated string in hungarian notation, this was used incorrectly in many cases. - Renamed most of the public API functions to have shorter, more on-point names, rather than the often too long descriptive names. Functions are now also grouped by the type of value they operate on, where this makes sense. JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
2021-12-06 10:20:09 +01:00
parent 81d2319144
commit 9860d66a56
180 changed files with 10738 additions and 11025 deletions
@@ -587,7 +587,7 @@ lit_utf8_decr (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characte
  do
  {
    current_p--;
-  } while ((*(current_p) &LIT_UTF8_EXTRA_BYTE_MASK) == LIT_UTF8_EXTRA_BYTE_MARKER);
+  } while ((*current_p & LIT_UTF8_EXTRA_BYTE_MASK) == LIT_UTF8_EXTRA_BYTE_MARKER);

  *buf_p = current_p;
 } /* lit_utf8_decr */
@@ -824,57 +824,51 @@ lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */

 /**
 * Convert cesu-8 string to an utf-8 string and put it into the buffer.
- * It is the caller's responsibility to make sure that the string fits in the buffer.
+ * String will be truncated to fit the buffer.
 *
 * @return number of bytes copied to the buffer.
 */
 lit_utf8_size_t
-lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string, /**< cesu-8 string */
+lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string_p, /**< cesu-8 string */
                                         lit_utf8_size_t cesu8_size, /**< size of cesu-8 string */
-                                         lit_utf8_byte_t *utf8_string, /**< destination utf-8 buffer pointer
-                                                                        * (can be NULL if buffer_size == 0) */
+                                         lit_utf8_byte_t *utf8_string_p, /**< destination utf-8 buffer pointer
+                                                                          * (can be NULL if buffer_size == 0) */
                                         lit_utf8_size_t utf8_size) /**< size of utf-8 buffer */
 {
-  const lit_utf8_byte_t *cesu8_pos = cesu8_string;
-  const lit_utf8_byte_t *cesu8_end_pos = cesu8_string + cesu8_size;
+  const lit_utf8_byte_t *cesu8_cursor_p = cesu8_string_p;
+  const lit_utf8_byte_t *cesu8_end_p = cesu8_string_p + cesu8_size;

-  lit_utf8_byte_t *utf8_pos = utf8_string;
-  lit_utf8_byte_t *utf8_end_pos = utf8_string + utf8_size;
+  lit_utf8_byte_t *utf8_cursor_p = utf8_string_p;
+  lit_utf8_byte_t *utf8_end_p = utf8_string_p + utf8_size;

-  lit_utf8_size_t size = 0;
-
-  ecma_char_t prev_ch = 0;
-  lit_utf8_size_t prev_ch_size = 0;
-
-  while (cesu8_pos < cesu8_end_pos)
+  while (cesu8_cursor_p < cesu8_end_p)
  {
-    ecma_char_t ch;
-    lit_utf8_size_t code_unit_size = lit_read_code_unit_from_cesu8 (cesu8_pos, &ch);
+    lit_code_point_t cp;
+    lit_utf8_size_t read_size = lit_read_code_point_from_cesu8 (cesu8_cursor_p, cesu8_end_p, &cp);
+    lit_utf8_size_t encoded_size = (cp >= LIT_UTF16_FIRST_SURROGATE_CODE_POINT) ? 4 : read_size;

-    if (lit_is_code_point_utf16_low_surrogate (ch) && lit_is_code_point_utf16_high_surrogate (prev_ch))
+    if (utf8_cursor_p + encoded_size > utf8_end_p)
    {
-      JERRY_ASSERT (code_unit_size == prev_ch_size);
-      utf8_pos -= prev_ch_size;
-      lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_ch, ch);
-      lit_code_point_to_utf8 (code_point, utf8_pos);
-      size++;
+      break;
+    }
+
+    if (cp >= LIT_UTF16_FIRST_SURROGATE_CODE_POINT)
+    {
+      lit_code_point_to_utf8 (cp, utf8_cursor_p);
    }
    else
    {
-      memcpy (utf8_pos, cesu8_pos, code_unit_size);
-      size += code_unit_size;
+      memcpy (utf8_cursor_p, cesu8_cursor_p, encoded_size);
    }

-    utf8_pos = utf8_string + size;
-    cesu8_pos += code_unit_size;
-    prev_ch = ch;
-    prev_ch_size = code_unit_size;
+    utf8_cursor_p += encoded_size;
+    cesu8_cursor_p += read_size;
  }

-  JERRY_ASSERT (cesu8_pos == cesu8_end_pos);
-  JERRY_ASSERT (utf8_pos <= utf8_end_pos);
+  JERRY_ASSERT (cesu8_cursor_p == cesu8_end_p);
+  JERRY_ASSERT (utf8_cursor_p <= utf8_end_p);

-  return size;
+  return (lit_utf8_byte_t) (utf8_cursor_p - utf8_string_p);
 } /* lit_convert_cesu8_string_to_utf8_string */

 /**
@@ -116,9 +116,9 @@ lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byt
 lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t code_unit, lit_utf8_byte_t *buf_p);
 lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
 lit_utf8_size_t lit_code_point_to_cesu8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
-lit_utf8_size_t lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string,
+lit_utf8_size_t lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string_p,
                                                         lit_utf8_size_t cesu8_size,
-                                                         lit_utf8_byte_t *utf8_string,
+                                                         lit_utf8_byte_t *utf8_string_p,
                                                         lit_utf8_size_t utf8_size);
 lit_code_point_t lit_convert_surrogate_pair_to_code_point (ecma_char_t high_surrogate, ecma_char_t low_surrogate);