Add core unicode functionality.

Add utf-8 processing routines. Change ecma_char_t from char/uint16_t to uint16_t. Apply all utf-8 processing routines. Change char to jerry_api_char in API functions' declarations. JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
2015-06-29 19:17:17 +03:00
parent c4b0cd2196
commit fd9ff8e3bd
56 changed files with 2468 additions and 1480 deletions
@@ -309,7 +309,8 @@ parse_property_name (void)
    case TOK_KEYWORD:
    {
      const char *s = lexer_keyword_to_string ((keyword) token_data ());
-      literal_t lit = lit_find_or_create_literal_from_charset ((const ecma_char_t *) s, (ecma_length_t) strlen (s));
+      literal_t lit = lit_find_or_create_literal_from_utf8_string ((const lit_utf8_byte_t *) s,
+                                                                   (lit_utf8_size_t)strlen (s));
      return literal_operand (lit_cpointer_t::compress (lit));
    }
    default:
@@ -345,11 +346,11 @@ parse_property_assignment (void)
  {
    bool is_setter;

-    if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()), (const ecma_char_t *) "get"))
+    if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "get"))
    {
      is_setter = false;
    }
-    else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()), (const ecma_char_t *) "set"))
+    else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "set"))
    {
      is_setter = true;
    }
@@ -874,7 +875,8 @@ parse_member_expression (operand *this_arg, operand *prop_gl)
      else if (token_is (TOK_KEYWORD))
      {
        const char *s = lexer_keyword_to_string ((keyword) token_data ());
-        literal_t lit = lit_find_or_create_literal_from_charset ((const ecma_char_t *) s, (ecma_length_t) strlen (s));
+        literal_t lit = lit_find_or_create_literal_from_utf8_string ((lit_utf8_byte_t *) s,
+                                                                     (lit_utf8_size_t) strlen (s));
        if (lit == NULL)
        {
          EMIT_ERROR ("Expected identifier");
@@ -2848,8 +2850,8 @@ preparse_scope (bool is_global)
  bool is_ref_eval_identifier = false;
  bool is_use_strict = false;

-  if (token_is (TOK_STRING) && lit_literal_equal_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()),
-                                                     (const ecma_char_t *) "use strict"))
+  if (token_is (TOK_STRING) && lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()),
+                                                            "use strict"))
  {
    scopes_tree_set_strict_mode (STACK_TOP (scopes), true);
    is_use_strict = true;
@@ -2866,13 +2868,11 @@ preparse_scope (bool is_global)
  {
    if (token_is (TOK_NAME))
    {
-      if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()),
-                                     (const ecma_char_t *) "arguments"))
+      if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "arguments"))
      {
        is_ref_arguments_identifier = true;
      }
-      else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()),
-                                          (const ecma_char_t *) "eval"))
+      else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "eval"))
      {
        is_ref_eval_identifier = true;
      }
@@ -3032,7 +3032,7 @@ parse_source_element_list (bool is_global) /**< flag indicating if we are parsin
 *         false - otherwise.
 */
 static bool
-parser_parse_program (const char *source_p, /**< source code buffer */
+parser_parse_program (const jerry_api_char_t *source_p, /**< source code buffer */
                      size_t source_size, /**< source code size in bytes */
                      bool in_function, /**< flag indicating if we are parsing body of a function */
                      bool in_eval, /**< flag indicating if we are parsing body of eval code */
@@ -3137,7 +3137,7 @@ parser_parse_program (const char *source_p, /**< source code buffer */
 *         false - otherwise.
 */
 bool
-parser_parse_script (const char *source, /**< source script */
+parser_parse_script (const jerry_api_char_t *source, /**< source script */
                     size_t source_size, /**< source script size it bytes */
                     const opcode_t **opcodes_p) /**< out: generated byte-code array
                                                  *  (in case there were no syntax errors) */
@@ -3152,7 +3152,7 @@ parser_parse_script (const char *source, /**< source script */
 *         false - otherwise.
 */
 bool
-parser_parse_eval (const char *source, /**< string passed to eval() */
+parser_parse_eval (const jerry_api_char_t *source, /**< string passed to eval() */
                   size_t source_size, /**< string size in bytes */
                   bool is_strict, /**< flag, indicating whether eval is called
                                    *   from strict code in direct mode */
@@ -3173,7 +3173,9 @@ parser_parse_eval (const char *source, /**< string passed to eval() */
 *         false - otherwise.
 */
 bool
-parser_parse_new_function (const char **params, /**< array of arguments of new Function (p1, p2, ..., pn, body) call */
+parser_parse_new_function (const jerry_api_char_t **params, /**< array of arguments of new Function (p1, p2, ..., pn,
+                                                             *                                       body) call */
+                           const size_t *params_size, /**< sizes of arguments strings */
                           size_t params_count, /**< total number of arguments passed to new Function (...) */
                           const opcode_t **out_opcodes_p) /**< out: generated byte-code array
                                                            *  (in case there were no syntax errors) */
@@ -3183,10 +3185,10 @@ parser_parse_new_function (const char **params, /**< array of arguments of new F
  for (size_t i = 0; i < params_count - 1; ++i)
  {
    FIXME ("check parameter's name for syntax errors");
-    lit_find_or_create_literal_from_charset ((ecma_char_t *) params[i], (ecma_length_t) strlen (params[i]));
+    lit_find_or_create_literal_from_utf8_string ((lit_utf8_byte_t *) params[i], (lit_utf8_size_t) params_size[i]);
  }
  return parser_parse_program (params[params_count - 1],
-                               strlen (params[params_count - 1]),
+                               params_size[params_count - 1],
                               true,
                               false,
                               false,