Support Unicode supplementary planes (#3928)

JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
2020-07-06 14:21:13 +02:00
parent 7353b253ab
commit c1e90da0b4
16 changed files with 1105 additions and 861 deletions
@@ -2605,6 +2605,19 @@ ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, /**< string buil
  memcpy (dest_p, data_p, data_size);
 } /* ecma_stringbuilder_append_raw */

+/**
+ * Append a codepoint to a string builder
+ */
+void
+ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, /**< string builder */
+                                     lit_code_point_t cp) /**< code point */
+{
+  const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (cp);
+  lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
+
+  lit_code_point_to_cesu8_bytes (dest_p, cp);
+} /* ecma_stringbuilder_append_codepoint */
+
 /**
 * Append an ecma_char_t to a string builder
 */
@@ -2612,10 +2625,7 @@ void
 ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
                                const ecma_char_t c) /**< ecma char */
 {
-  const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
-  lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
-
-  lit_code_point_to_cesu8_bytes (dest_p, c);
+  ecma_stringbuilder_append_codepoint (builder_p, c);
 } /* ecma_stringbuilder_append_char */

 /**
@@ -393,6 +393,7 @@ void ecma_stringbuilder_append_magic (ecma_stringbuilder_t *builder_p, const lit
 void ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p,
                                    const lit_utf8_byte_t *data_p,
                                    const lit_utf8_size_t data_size);
+void ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, lit_code_point_t cp);
 void ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, const ecma_char_t c);
 void ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, const lit_utf8_byte_t);
 ecma_string_t *ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p);
@@ -988,96 +988,42 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
                                                        bool lower_case) /**< convert to lower (true)
                                                                          *   or upper (false) case */
 {
-  ecma_value_t ret_value = ECMA_VALUE_EMPTY;
+  ecma_stringbuilder_t builder = ecma_stringbuilder_create ();

-  /* 3. */
  ECMA_STRING_TO_UTF8_STRING (input_string_p, input_start_p, input_start_size);

-  /*
-   * The URI encoding has two major phases: first we compute
-   * the length of the lower case string, then we encode it.
-   */
-
-  lit_utf8_size_t output_length = 0;
-  const lit_utf8_byte_t *input_str_curr_p = input_start_p;
+  const lit_utf8_byte_t *input_curr_p = input_start_p;
  const lit_utf8_byte_t *input_str_end_p = input_start_p + input_start_size;

-  while (input_str_curr_p < input_str_end_p)
+  while (input_curr_p < input_str_end_p)
  {
-    ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
-    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
-    ecma_length_t character_length;
-    lit_utf8_byte_t utf8_byte_buffer[LIT_CESU8_MAX_BYTES_IN_CODE_POINT];
+    lit_code_point_t cp = lit_cesu8_read_next (&input_curr_p);
+
+#if ENABLED (JERRY_ESNEXT)
+    if (lit_is_code_point_utf16_high_surrogate (cp))
+    {
+      const ecma_char_t next_ch = lit_cesu8_peek_next (input_curr_p);
+      if (lit_is_code_point_utf16_low_surrogate (next_ch))
+      {
+        cp = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) cp, next_ch);
+        input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
+      }
+    }
+#endif /* ENABLED (JERRY_ESNEXT) */

    if (lower_case)
    {
-      character_length = lit_char_to_lower_case (character,
-                                                 character_buffer,
-                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
+      lit_char_to_lower_case (cp, &builder);
    }
    else
    {
-      character_length = lit_char_to_upper_case (character,
-                                                 character_buffer,
-                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
-    }
-
-    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
-
-    for (ecma_length_t i = 0; i < character_length; i++)
-    {
-      output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
+      lit_char_to_upper_case (cp, &builder);
    }
  }

-  /* Second phase. */
-
-  JMEM_DEFINE_LOCAL_ARRAY (output_start_p,
-                           output_length,
-                           lit_utf8_byte_t);
-
-  lit_utf8_byte_t *output_char_p = output_start_p;
-
-  /* Encoding the output. */
-  input_str_curr_p = input_start_p;
-
-  while (input_str_curr_p < input_str_end_p)
-  {
-    ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
-    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
-    ecma_length_t character_length;
-
-    if (lower_case)
-    {
-      character_length = lit_char_to_lower_case (character,
-                                                 character_buffer,
-                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
-    }
-    else
-    {
-      character_length = lit_char_to_upper_case (character,
-                                                 character_buffer,
-                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
-    }
-
-    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
-
-    for (ecma_length_t i = 0; i < character_length; i++)
-    {
-      output_char_p += lit_code_unit_to_utf8 (character_buffer[i], output_char_p);
-    }
-  }
-
-  JERRY_ASSERT (output_start_p + output_length == output_char_p);
-
-  ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
-
-  ret_value = ecma_make_string_value (output_string_p);
-
-  JMEM_FINALIZE_LOCAL_ARRAY (output_start_p);
  ECMA_FINALIZE_UTF8_STRING (input_start_p, input_start_size);

-  return ret_value;
+  return ecma_make_string_value (ecma_stringbuilder_finalize (&builder));
 } /* ecma_builtin_string_prototype_object_conversion_helper */

 /**
@@ -413,23 +413,13 @@ ecma_regexp_canonicalize_char (lit_code_point_t ch, /**< character */
    return ch;
  }

-#if ENABLED (JERRY_ESNEXT)
-  /* TODO: Implement case folding for code points in the upper planes. */
-  if (JERRY_UNLIKELY (ch > LIT_UTF16_CODE_UNIT_MAX))
-  {
-    return ch;
-  }
-#endif /* ENABLED (JERRY_ESNEXT) */
+  lit_code_point_t cu = lit_char_to_upper_case (ch, NULL);

-  ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
-  const ecma_length_t size = lit_char_to_upper_case ((ecma_char_t) ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
-
-  if (size != 1)
+  if (cu == LIT_MULTIPLE_CU)
  {
    return ch;
  }

-  const ecma_char_t cu = u[0];
  if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode)
  {
    /* 6. */
@@ -14,12 +14,15 @@
 */

 #include "config.h"
+#include "ecma-helpers.h"
 #include "lit-char-helpers.h"
 #include "lit-unicode-ranges.inc.h"
+#include "lit-unicode-ranges-sup.inc.h"
 #include "lit-strings.h"

 #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
 #include "lit-unicode-conversions.inc.h"
+#include "lit-unicode-conversions-sup.inc.h"
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */

 #define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
@@ -31,36 +34,43 @@
 * @return true - if the character is in the given array
 *         false - otherwise
 */
-static bool
-search_char_in_char_array (ecma_char_t c,               /**< code unit */
-                           const ecma_char_t *array,    /**< array */
-                           int size_of_array)           /**< length of the array */
-{
-  int bottom = 0;
-  int top = size_of_array - 1;
+#define LIT_SEARCH_CHAR_IN_ARRAY_FN(function_name, char_type, array_type) \
+static bool \
+function_name (char_type c,               /**< code unit */ \
+               const array_type *array,   /**< array */ \
+               int size_of_array)         /**< length of the array */\
+{ \
+  int bottom = 0; \
+  int top = size_of_array - 1; \
+  \
+  while (bottom <= top) \
+  { \
+    int middle = (bottom + top) / 2; \
+    char_type current = array[middle]; \
+    \
+    if (current == c) \
+    { \
+      return true; \
+    } \
+    \
+    if (c < current) \
+    { \
+      top = middle - 1; \
+    } \
+    else \
+    { \
+      bottom = middle + 1; \
+    } \
+  } \
+  \
+  return false; \
+} /* __function_name */

-  while (bottom <= top)
-  {
-    int middle = (bottom + top) / 2;
-    ecma_char_t current = array[middle];
+LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_char_in_array, ecma_char_t, uint16_t)

-    if (current == c)
-    {
-      return true;
-    }
-
-    if (c < current)
-    {
-      top = middle - 1;
-    }
-    else
-    {
-      bottom = middle + 1;
-    }
-  }
-
-  return false;
-} /* search_char_in_char_array */
+#if ENABLED (JERRY_ESNEXT)
+LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_codepoint_in_array, lit_code_point_t, uint32_t)
+#endif /* ENABLED (JERRY_ESNEXT) */

 /**
 * Binary search algorithm that searches a character in the given intervals.
@@ -70,37 +80,44 @@ search_char_in_char_array (ecma_char_t c,               /**< code unit */
 * @return true - if the the character is included (inclusively) in one of the intervals in the given array
 *         false - otherwise
 */
-static bool
-search_char_in_interval_array (ecma_char_t c,               /**< code unit */
-                               const ecma_char_t *array_sp, /**< array of interval starting points */
-                               const uint8_t *lengths,      /**< array of interval lengths */
-                               int size_of_array)           /**< length of the array */
-{
-  int bottom = 0;
-  int top = size_of_array - 1;
+#define LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN(function_name, char_type, array_type, interval_type) \
+static bool \
+function_name (char_type c,                  /**< code unit */ \
+               const array_type *array_sp,   /**< array of interval starting points */ \
+               const interval_type *lengths, /**< array of interval lengths */ \
+               int size_of_array)            /**< length of the array */ \
+{ \
+  int bottom = 0; \
+  int top = size_of_array - 1; \
+  \
+  while (bottom <= top) \
+  { \
+    int middle = (bottom + top) / 2; \
+    char_type current_sp = array_sp[middle]; \
+    \
+    if (current_sp <= c && c <= current_sp + lengths[middle]) \
+    { \
+      return true; \
+    } \
+    \
+    if (c > current_sp) \
+    { \
+      bottom = middle + 1; \
+    } \
+    else \
+    { \
+      top = middle - 1; \
+    } \
+  } \
+  \
+  return false; \
+} /* function_name */

-  while (bottom <= top)
-  {
-    int middle = (bottom + top) / 2;
-    ecma_char_t current_sp = array_sp[middle];
+LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_char_in_interval_array, ecma_char_t, uint16_t, uint8_t)

-    if (current_sp <= c && c <= current_sp + lengths[middle])
-    {
-      return true;
-    }
-
-    if (c > current_sp)
-    {
-      bottom = middle + 1;
-    }
-    else
-    {
-      top = middle - 1;
-    }
-  }
-
-  return false;
-} /* search_char_in_interval_array */
+#if ENABLED (JERRY_ESNEXT)
+LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_codepoint_in_interval_array, lit_code_point_t, uint32_t, uint16_t)
+#endif /* ENABLED (JERRY_ESNEXT) */

 /**
 * Check if specified character is one of the Whitespace characters including those that fall into
@@ -116,20 +133,18 @@ lit_char_is_white_space (lit_code_point_t c) /**< code point */
  {
    return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
  }
-  else
-  {
-    if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
-    {
-      return true;
-    }

-    return (c <= LIT_UTF16_CODE_UNIT_MAX
-            && ((c >= lit_unicode_separator_char_interval_sps[0]
-                 && c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
-                || search_char_in_char_array ((ecma_char_t) c,
-                                              lit_unicode_separator_chars,
-                                              NUM_OF_ELEMENTS (lit_unicode_separator_chars))));
+  if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
+  {
+    return true;
  }
+
+  return (c <= LIT_UTF16_CODE_UNIT_MAX
+          && ((c >= lit_unicode_white_space_interval_starts[0]
+                 && c < lit_unicode_white_space_interval_starts[0] + lit_unicode_white_space_interval_lengths[0])
+              || lit_search_char_in_array ((ecma_char_t) c,
+                                            lit_unicode_white_space_chars,
+                                            NUM_OF_ELEMENTS (lit_unicode_white_space_chars))));
 } /* lit_char_is_white_space */

 /**
@@ -148,58 +163,84 @@ lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
 } /* lit_char_is_line_terminator */

 /**
- * Check if specified character is a unicode letter
- *
- * Note:
- *      Unicode letter is a character, included into one of the following categories:
- *       - Uppercase letter (Lu);
- *       - Lowercase letter (Ll);
- *       - Titlecase letter (Lt);
- *       - Modifier letter (Lm);
- *       - Other letter (Lo);
- *       - Letter number (Nl).
+ * Check if specified character is a Unicode ID_Start
 *
 * See also:
- *          ECMA-262 v5, 7.6
+ *          ECMA-262 v1, 11.6: UnicodeIDStart
 *
- * @return true - if specified character falls into one of the listed categories,
+ * @return true - if the codepoint has Unicode property "ID_Start"
 *         false - otherwise
 */
 static bool
-lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
+lit_char_is_unicode_id_start (lit_code_point_t code_point) /**< code unit */
 {
-  return (search_char_in_interval_array (c,
-                                         lit_unicode_letter_interval_sps,
-                                         lit_unicode_letter_interval_lengths,
-                                         NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps))
-          || search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars)));
-} /* lit_char_is_unicode_letter */
+#if ENABLED (JERRY_ESNEXT)
+  if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
+  {
+    return (lit_search_codepoint_in_interval_array (code_point,
+                                                    lit_unicode_id_start_interval_starts_sup,
+                                                    lit_unicode_id_start_interval_lengths_sup,
+                                                    NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts_sup))
+            || lit_search_codepoint_in_array (code_point,
+                                              lit_unicode_id_start_chars_sup,
+                                              NUM_OF_ELEMENTS (lit_unicode_id_start_chars_sup)));
+  }
+#else /* !ENABLED (JERRY_ESNEXT) */
+  JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
+#endif /* ENABLED (JERRY_ESNEXT) */
+
+  ecma_char_t c = (ecma_char_t) code_point;
+
+  return (lit_search_char_in_interval_array (c,
+                                             lit_unicode_id_start_interval_starts,
+                                             lit_unicode_id_start_interval_lengths,
+                                             NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts))
+          || lit_search_char_in_array (c, lit_unicode_id_start_chars, NUM_OF_ELEMENTS (lit_unicode_id_start_chars)));
+} /* lit_char_is_unicode_id_start */

 /**
- * Check if specified character is a non-letter character and can be used as a
- * non-first character of an identifier.
- * These characters coverd by the following unicode categories:
- *  - digit (Nd)
- *  - punctuation mark (Mn, Mc)
- *  - connector punctuation (Pc)
+ * Check if specified character is a Unicode ID_Continue
 *
 * See also:
- *          ECMA-262 v5, 7.6
+ *          ECMA-262 v1, 11.6: UnicodeIDContinue
 *
- * @return true - if specified character falls into one of the listed categories,
+ * @return true - if the codepoint has Unicode property "ID_Continue"
 *         false - otherwise
 */
 static bool
-lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
+lit_char_is_unicode_id_continue (lit_code_point_t code_point) /**< code unit */
 {
-  return (search_char_in_interval_array (c,
-                                         lit_unicode_non_letter_ident_part_interval_sps,
-                                         lit_unicode_non_letter_ident_part_interval_lengths,
-                                         NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps))
-          || search_char_in_char_array (c,
-                                        lit_unicode_non_letter_ident_part_chars,
-                                        NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
-} /* lit_char_is_unicode_non_letter_ident_part */
+  /* Each ID_Start codepoint is ID_Continue as well. */
+  if (lit_char_is_unicode_id_start (code_point))
+  {
+    return true;
+  }
+
+#if ENABLED (JERRY_ESNEXT)
+  if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
+  {
+    return (lit_search_codepoint_in_interval_array (code_point,
+                                                    lit_unicode_id_continue_interval_starts_sup,
+                                                    lit_unicode_id_continue_interval_lengths_sup,
+                                                    NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts_sup))
+            || lit_search_codepoint_in_array (code_point,
+                                              lit_unicode_id_continue_chars_sup,
+                                              NUM_OF_ELEMENTS (lit_unicode_id_continue_chars_sup)));
+  }
+#else /* !ENABLED (JERRY_ESNEXT) */
+  JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
+#endif /* ENABLED (JERRY_ESNEXT) */
+
+  ecma_char_t c = (ecma_char_t) code_point;
+
+  return (lit_search_char_in_interval_array (c,
+                                             lit_unicode_id_continue_interval_starts,
+                                             lit_unicode_id_continue_interval_lengths,
+                                             NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts))
+          || lit_search_char_in_array (c,
+                                       lit_unicode_id_continue_chars,
+                                       NUM_OF_ELEMENTS (lit_unicode_id_continue_chars)));
+} /* lit_char_is_unicode_id_continue */

 /**
 * Checks whether the character is a valid identifier start.
@@ -218,17 +259,7 @@ lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point
            || code_point == LIT_CHAR_UNDERSCORE);
  }

-#if ENABLED (JERRY_ESNEXT)
-  if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
-  {
-    /* TODO: detect these ranges correctly. */
-    return (code_point >= 0x10C80 && code_point <= 0x10CF2);
-  }
-#else /* !ENABLED (JERRY_ESNEXT) */
-  JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
-#endif /* ENABLED (JERRY_ESNEXT) */
-
-  return lit_char_is_unicode_letter ((ecma_char_t) code_point);
+  return lit_char_is_unicode_id_start (code_point);
 } /* lit_code_point_is_identifier_start */

 /**
@@ -249,18 +280,7 @@ lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point
            || code_point == LIT_CHAR_UNDERSCORE);
  }

-#if ENABLED (JERRY_ESNEXT)
-  if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
-  {
-    /* TODO: detect these ranges correctly. */
-    return (code_point >= 0x10C80 && code_point <= 0x10CF2);
-  }
-#else /* !ENABLED (JERRY_ESNEXT) */
-  JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
-#endif /* ENABLED (JERRY_ESNEXT) */
-
-  return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
-          || lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
+  return lit_char_is_unicode_id_continue (code_point);
 } /* lit_code_point_is_identifier_part */

 /**
@@ -519,16 +539,27 @@ lit_char_is_word_char (lit_code_point_t c) /**< code point */
 /**
 * Check if the specified character is in one of those tables which contain bidirectional conversions.
 *
- * @return the mapped character sequence of an ecma character, if it's in the table.
- *         0 - otherwise.
+ * @return codepoint of the converted character if it is found the the tables
+ *         LIT_INVALID_CP - otherwise.
 */
-static ecma_length_t
-search_in_bidirectional_conversion_tables (ecma_char_t character,        /**< code unit */
-                                           ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
-                                           bool is_lowercase)            /**< is lowercase conversion */
+static lit_code_point_t
+lit_search_in_bidirectional_conversion_tables (lit_code_point_t cp,   /**< code point */
+                                               bool is_lowercase)     /**< is lowercase conversion */
 {
-  /* 1, Check if the specified character is part of the lit_character_case_ranges table. */
-  int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges);
+  /* 1, Check if the specified character is part of the lit_unicode_character_case_ranges_{sup} table. */
+  int number_of_case_ranges;
+#if ENABLED (JERRY_ESNEXT)
+  bool is_supplementary = cp > LIT_UTF16_CODE_UNIT_MAX;
+  if (is_supplementary)
+  {
+    number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges_sup);
+  }
+  else
+#endif /* ENABLED (JERRY_ESNEXT) */
+  {
+    number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges);
+  }
+
  int conv_counter = 0;

  for (int i = 0; i < number_of_case_ranges; i++)
@@ -538,54 +569,92 @@ search_in_bidirectional_conversion_tables (ecma_char_t character,        /**< co
      conv_counter++;
    }

-    int range_length = lit_character_case_range_lengths[conv_counter];
-    ecma_char_t start_point = lit_character_case_ranges[i];
+    size_t range_length;
+    lit_code_point_t start_point;
+#if ENABLED (JERRY_ESNEXT)
+    if (is_supplementary)
+    {
+      range_length = lit_unicode_character_case_range_lengths_sup[conv_counter];
+      start_point = lit_unicode_character_case_ranges_sup[i];
+    }
+    else
+#endif /* ENABLED (JERRY_ESNEXT) */
+    {
+      range_length = lit_unicode_character_case_range_lengths[conv_counter];
+      start_point = lit_unicode_character_case_ranges[i];
+    }

-    if (start_point > character || character >= start_point + range_length)
+    if (start_point > cp || cp >= start_point + range_length)
    {
      continue;
    }

-    int char_dist = character - start_point;
-
+    uint32_t char_dist = (uint32_t) cp - start_point;
+    int offset;
    if (i % 2 == 0)
    {
-      output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character;
+      if (!is_lowercase)
+      {
+        return cp;
+      }
+
+      offset = i + 1;
    }
    else
    {
-      output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist);
+      if (is_lowercase)
+      {
+        return cp;
+      }
+
+      offset = i - 1;
    }

-    return 1;
+#if ENABLED (JERRY_ESNEXT)
+    if (is_supplementary)
+    {
+      start_point = lit_unicode_character_case_ranges_sup[offset];
+    }
+    else
+#endif /* ENABLED (JERRY_ESNEXT) */
+    {
+      start_point = lit_unicode_character_case_ranges[offset];
+    }
+
+    return (lit_code_point_t) (start_point + char_dist);
  }

+  /* Note: After this point based on the latest unicode standard(13.0.0.6) no conversion characters are
+     defined for supplementary planes */
+#if ENABLED (JERRY_ESNEXT)
+  if (is_supplementary)
+  {
+    return cp;
+  }
+#endif /* ENABLED (JERRY_ESNEXT) */
+
  /* 2, Check if the specified character is part of the character_pair_ranges table. */
  int bottom = 0;
-  int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1;
+  int top = NUM_OF_ELEMENTS (lit_unicode_character_pair_ranges) - 1;

  while (bottom <= top)
  {
    int middle = (bottom + top) / 2;
-    ecma_char_t current_sp = lit_character_pair_ranges[middle];
+    lit_code_point_t current_sp = lit_unicode_character_pair_ranges[middle];

-    if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle])
+    if (current_sp <= cp && cp < current_sp + lit_unicode_character_pair_range_lengths[middle])
    {
-      int char_dist = character - current_sp;
+      uint32_t char_dist = (uint32_t) (cp - current_sp);

-      if ((character - current_sp) % 2 == 0)
+      if ((cp - current_sp) % 2 == 0)
      {
-        output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character;
-      }
-      else
-      {
-        output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
+        return is_lowercase ? (lit_code_point_t) (current_sp + char_dist + 1) : cp;
      }

-      return 1;
+      return is_lowercase ? cp : (lit_code_point_t) (current_sp + char_dist - 1);
    }

-    if (character > current_sp)
+    if (cp > current_sp)
    {
      bottom = middle + 1;
    }
@@ -596,41 +665,37 @@ search_in_bidirectional_conversion_tables (ecma_char_t character,        /**< co
  }

  /* 3, Check if the specified character is part of the character_pairs table. */
-  int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs);
+  int number_of_character_pairs = NUM_OF_ELEMENTS (lit_unicode_character_pairs);

  for (int i = 0; i < number_of_character_pairs; i++)
  {
-    if (character != lit_character_pairs[i])
+    if (cp != lit_unicode_character_pairs[i])
    {
      continue;
    }

    if (i % 2 == 0)
    {
-      output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character;
-    }
-    else
-    {
-      output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1];
+      return is_lowercase ? lit_unicode_character_pairs[i + 1] : cp;
    }

-    return 1;
+    return is_lowercase ? cp : lit_unicode_character_pairs[i - 1];
  }

-  return 0;
-} /* search_in_bidirectional_conversion_tables */
+  return LIT_INVALID_CP;
+} /* lit_search_in_bidirectional_conversion_tables */

 /**
 * Check if the specified character is in the given conversion table.
 *
- * @return the mapped character sequence of an ecma character, if it's in the table.
- *         0 - otherwise.
+ * @return LIT_MULTIPLE_CU if the converted character consist more than a single code unit
+ *         converted code point - otherwise
 */
-static ecma_length_t
-search_in_conversion_table (ecma_char_t character,        /**< code unit */
-                            ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
-                            const ecma_char_t *array,     /**< array */
-                            const uint8_t *counters)      /**< case_values counter */
+static lit_code_point_t
+lit_search_in_conversion_table (ecma_char_t character,            /**< code unit */
+                                ecma_stringbuilder_t *builder_p,  /**< string builder */
+                                const ecma_char_t *array,         /**< array */
+                                const uint8_t *counters)          /**< case_values counter */
 {
  int end_point = 0;

@@ -653,28 +718,21 @@ search_in_conversion_table (ecma_char_t character,        /**< code unit */

      if (current == character)
      {
-        ecma_length_t char_sequence = 1;
-
-        switch (size_of_case_value)
+        if (builder_p != NULL)
        {
-          case 3:
+          ecma_stringbuilder_append_char (builder_p, array[middle + 1]);
+
+          if (size_of_case_value > 1)
          {
-            output_buffer_p[2] = array[middle + 3];
-            char_sequence++;
-            /* FALLTHRU */
+            ecma_stringbuilder_append_char (builder_p, array[middle + 2]);
          }
-          case 2:
+          if (size_of_case_value > 2)
          {
-            output_buffer_p[1] = array[middle + 2];
-            char_sequence++;
-            /* FALLTHRU */
-          }
-          default:
-          {
-            output_buffer_p[0] = array[middle + 1];
-            return char_sequence;
+            ecma_stringbuilder_append_char (builder_p, array[middle + 3]);
          }
        }
+
+        return size_of_case_value == 1 ? array[middle + 1]: LIT_MULTIPLE_CU;
      }

      if (character < current)
@@ -688,127 +746,151 @@ search_in_conversion_table (ecma_char_t character,        /**< code unit */
    }
  }

-  return 0;
-} /* search_in_conversion_table */
+  if (builder_p != NULL)
+  {
+    ecma_stringbuilder_append_char (builder_p, character);
+  }
+
+  return (lit_code_point_t) character;
+} /* lit_search_in_conversion_table */
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */

 /**
- * Returns the lowercase character sequence of an ecma character.
+ * Append the converted lowercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
 *
- * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
- *
- * @return the length of the lowercase character sequence
- *         which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
+ * @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
+ *         converted code point - otherwise
 */
-ecma_length_t
-lit_char_to_lower_case (ecma_char_t character, /**< input character value */
-                        ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
-                        ecma_length_t buffer_size) /**< buffer size */
+lit_code_point_t
+lit_char_to_lower_case (lit_code_point_t cp, /**< code point */
+                        ecma_stringbuilder_t *builder_p) /**< string builder */
 {
-  JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
-
-  if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
+  if (cp >= LIT_CHAR_UPPERCASE_A && cp <= LIT_CHAR_UPPERCASE_Z)
  {
-    output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
-    return 1;
+    lit_utf8_byte_t lowercase_char = (lit_utf8_byte_t) (cp + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
+
+    if (builder_p != NULL)
+    {
+      ecma_stringbuilder_append_byte (builder_p, lowercase_char);
+    }
+
+    return lowercase_char;
  }

 #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
+  lit_code_point_t lowercase_cp = lit_search_in_bidirectional_conversion_tables (cp, true);

-  ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true);
-
-  if (lowercase_sequence != 0)
+  if (lowercase_cp != LIT_INVALID_CP)
  {
-    return lowercase_sequence;
+    if (builder_p != NULL)
+    {
+      ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
+    }
+
+    return lowercase_cp;
  }

-  int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges);
+  JERRY_ASSERT (cp < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
+
+  int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_unicode_lower_case_ranges);

  for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
  {
-    int range_length = lit_lower_case_range_lengths[j] - 1;
-    ecma_char_t start_point = lit_lower_case_ranges[i];
+    JERRY_ASSERT (lit_unicode_lower_case_range_lengths[j] > 0);
+    uint32_t range_length = (uint32_t) (lit_unicode_lower_case_range_lengths[j] - 1);
+    lit_code_point_t start_point = lit_unicode_lower_case_ranges[i];

-    if (start_point <= character && character <= start_point + range_length)
+    if (start_point <= cp && cp <= start_point + range_length)
    {
-      output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point));
-      return 1;
+      lowercase_cp = lit_unicode_lower_case_ranges[i + 1] + (cp - start_point);
+      if (builder_p != NULL)
+      {
+        ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
+      }
+
+      return lowercase_cp;
    }
  }

-  lowercase_sequence = search_in_conversion_table (character,
-                                                   output_buffer_p,
-                                                   lit_lower_case_conversions,
-                                                   lit_lower_case_conversion_counters);
-
-  if (lowercase_sequence != 0)
+  return lit_search_in_conversion_table ((ecma_char_t) cp,
+                                         builder_p,
+                                         lit_unicode_lower_case_conversions,
+                                         lit_unicode_lower_case_conversion_counters);
+#else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
+  if (builder_p != NULL)
  {
-    return lowercase_sequence;
+    ecma_stringbuilder_append_codepoint (builder_p, cp);
  }

+  return cp;
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
-
-  output_buffer_p[0] = character;
-  return 1;
 } /* lit_char_to_lower_case */

 /**
- * Returns the uppercase character sequence of an ecma character.
+ * Append the converted uppercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
 *
- * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
- *
- * @return the length of the uppercase character sequence
- *         which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
+ * @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
+ *         converted code point - otherwise
 */
-ecma_length_t
-lit_char_to_upper_case (ecma_char_t character, /**< input character value */
-                        ecma_char_t *output_buffer_p, /**< buffer for the result characters */
-                        ecma_length_t buffer_size) /**< buffer size */
+lit_code_point_t
+lit_char_to_upper_case (lit_code_point_t cp, /**< code point */
+                        ecma_stringbuilder_t *builder_p) /**< string builder */
 {
-  JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
-
-  if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
+  if (cp >= LIT_CHAR_LOWERCASE_A && cp <= LIT_CHAR_LOWERCASE_Z)
  {
-    output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
-    return 1;
+    lit_utf8_byte_t uppercase_char = (lit_utf8_byte_t) (cp - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
+
+    if (builder_p != NULL)
+    {
+      ecma_stringbuilder_append_byte (builder_p, uppercase_char);
+    }
+
+    return uppercase_char;
  }

 #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
+  lit_code_point_t uppercase_cp = lit_search_in_bidirectional_conversion_tables (cp, false);

-  ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false);
-
-  if (uppercase_sequence != 0)
+  if (uppercase_cp != LIT_INVALID_CP)
  {
-    return uppercase_sequence;
+    if (builder_p != NULL)
+    {
+      ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
+    }
+
+    return uppercase_cp;
  }

-  int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges);
+  int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_unicode_upper_case_special_ranges);

  for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
  {
-    int range_length = lit_upper_case_special_range_lengths[j];
-    ecma_char_t start_point = lit_upper_case_special_ranges[i];
+    uint32_t range_length = lit_unicode_upper_case_special_range_lengths[j];
+    ecma_char_t start_point = lit_unicode_upper_case_special_ranges[i];

-    if (start_point <= character && character <= start_point + range_length)
+    if (start_point <= cp && cp <= start_point + range_length)
    {
-      output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point));
-      output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]);
-      return 2;
+      if (builder_p != NULL)
+      {
+        uppercase_cp = lit_unicode_upper_case_special_ranges[i + 1] + (cp - start_point);
+        ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
+        ecma_stringbuilder_append_codepoint (builder_p, lit_unicode_upper_case_special_ranges[i + 2]);
+      }
+
+      return LIT_MULTIPLE_CU;
    }
  }

-  uppercase_sequence = search_in_conversion_table (character,
-                                                   output_buffer_p,
-                                                   lit_upper_case_conversions,
-                                                   lit_upper_case_conversion_counters);
-
-  if (uppercase_sequence != 0)
+  return lit_search_in_conversion_table ((ecma_char_t) cp,
+                                         builder_p,
+                                         lit_unicode_upper_case_conversions,
+                                         lit_unicode_upper_case_conversion_counters);
+#else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
+  if (builder_p != NULL)
  {
-    return uppercase_sequence;
+    ecma_stringbuilder_append_codepoint (builder_p, cp);
  }

+  return cp;
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
-
-  output_buffer_p[0] = character;
-  return 1;
 } /* lit_char_to_upper_case */
@@ -18,6 +18,16 @@

 #include "lit-globals.h"

+/**
+ * Invalid character code point
+ */
+#define LIT_INVALID_CP 0xFFFFFFFF
+
+/**
+ * Result of lit_char_to_lower_case/lit_char_to_upper_case consist more than of a single code unit
+ */
+#define LIT_MULTIPLE_CU 0xFFFFFFFE
+
 /*
 * Format control characters (ECMA-262 v5, Table 1)
 */
@@ -234,12 +244,7 @@ bool lit_char_is_word_char (lit_code_point_t c);
 * Utility functions for uppercasing / lowercasing
 */

-/**
- * Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
- */
-#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
-
-ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
-ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
+lit_code_point_t lit_char_to_lower_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
+lit_code_point_t lit_char_to_upper_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);

 #endif /* !LIT_CHAR_HELPERS_H */
@@ -0,0 +1,30 @@
+/* Copyright JS Foundation and other contributors, http://js.foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is automatically generated by the gen-unicode.py script
+ * from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
+
+/* Contains start points of character case ranges (these are bidirectional conversions). */
+static const uint32_t lit_unicode_character_case_ranges_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x010400, 0x010428, 0x0104b0, 0x0104d8, 0x010c80, 0x010cc0, 0x0118a0, 0x0118c0, 0x016e40, 0x016e60,
+  0x01e900, 0x01e922
+};
+
+/* Interval lengths of start points in `character_case_ranges` table. */
+static const uint16_t lit_unicode_character_case_range_lengths_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x000028, 0x000024, 0x000033, 0x000020, 0x000020, 0x000022
+};
@@ -14,10 +14,10 @@
 */

 /* This file is automatically generated by the gen-unicode.py script
- * from UnicodeData-13.0.0d6.txt and SpecialCasing-13.0.0d1.txt files. Do not edit! */
+ * from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */

 /* Contains start points of character case ranges (these are bidirectional conversions). */
-static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_character_case_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x00c0, 0x00e0, 0x00d8, 0x00f8, 0x0189, 0x0256, 0x01b1, 0x028a, 0x0388, 0x03ad,
  0x038e, 0x03cd, 0x0391, 0x03b1, 0x03a3, 0x03c3, 0x03fd, 0x037b, 0x0400, 0x0450,
@@ -30,7 +30,7 @@ static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
 };

 /* Interval lengths of start points in `character_case_ranges` table. */
-static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0017, 0x0007, 0x0002, 0x0002, 0x0003, 0x0002, 0x0011, 0x0009, 0x0003, 0x0010,
  0x0020, 0x0026, 0x0026, 0x0050, 0x0006, 0x002b, 0x0003, 0x0008, 0x0006, 0x0008,
@@ -39,7 +39,7 @@ static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
 };

 /* Contains the start points of bidirectional conversion ranges. */
-static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x0100, 0x0132, 0x0139, 0x014a, 0x0179, 0x0182, 0x0187, 0x018b, 0x0191, 0x0198,
  0x01a0, 0x01a7, 0x01ac, 0x01af, 0x01b3, 0x01b8, 0x01bc, 0x01cd, 0x01de, 0x01f4,
@@ -50,7 +50,7 @@ static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
 };

 /* Interval lengths of start points in `character_pair_ranges` table. */
-static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0030, 0x0006, 0x0010, 0x002e, 0x0006, 0x0004, 0x0002, 0x0002, 0x0002, 0x0002,
  0x0006, 0x0002, 0x0002, 0x0002, 0x0004, 0x0002, 0x0002, 0x0010, 0x0012, 0x0002,
@@ -61,7 +61,7 @@ static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
 };

 /* Contains lower/upper case bidirectional conversion pairs. */
-static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_character_pairs[] JERRY_ATTR_CONST_DATA =
 {
  0x0178, 0x00ff, 0x0181, 0x0253, 0x0186, 0x0254, 0x018e, 0x01dd, 0x018f, 0x0259,
  0x0190, 0x025b, 0x0193, 0x0260, 0x0194, 0x0263, 0x0196, 0x0269, 0x0197, 0x0268,
@@ -81,20 +81,20 @@ static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
 /* Contains start points of one-to-two uppercase ranges where the second character
 * is always the same.
 */
-static const uint16_t lit_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x1f80, 0x1f08, 0x0399, 0x1f88, 0x1f08, 0x0399, 0x1f90, 0x1f28, 0x0399, 0x1f98,
  0x1f28, 0x0399, 0x1fa0, 0x1f68, 0x0399, 0x1fa8, 0x1f68, 0x0399
 };

 /* Interval lengths for start points in `upper_case_special_ranges` table. */
-static const uint8_t lit_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0007, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007
 };

 /* Contains start points of lowercase ranges. */
-static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x1e96, 0x1e96, 0x1f80, 0x1f80, 0x1f88, 0x1f80, 0x1f90, 0x1f90, 0x1f98, 0x1f90,
  0x1fa0, 0x1fa0, 0x1fa8, 0x1fa0, 0x1fb2, 0x1fb2, 0x1fb6, 0x1fb6, 0x1fc2, 0x1fc2,
@@ -103,14 +103,14 @@ static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
 };

 /* Interval lengths for start points in `lower_case_ranges` table. */
-static const uint8_t lit_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0005, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0003, 0x0002, 0x0003,
  0x0002, 0x0002, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0007, 0x0005
 };

 /* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */
-static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
 {
  0x00df, 0x00df, 0x0149, 0x0149, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc,
  0x01f0, 0x01f0, 0x01f2, 0x01f3, 0x0390, 0x0390, 0x03b0, 0x03b0, 0x03f4, 0x03b8,
@@ -120,13 +120,13 @@ static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
 };

 /* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */
-static const uint8_t lit_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
 {
  0x0016, 0x0001, 0x0000
 };

 /* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */
-static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
 {
  0x00b5, 0x039c, 0x0130, 0x0130, 0x0131, 0x0049, 0x017f, 0x0053, 0x01c5, 0x01c4,
  0x01c8, 0x01c7, 0x01cb, 0x01ca, 0x01f2, 0x01f1, 0x0345, 0x0399, 0x03c2, 0x03a3,
@@ -157,7 +157,7 @@ static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
 };

 /* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */
-static const uint8_t lit_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
 {
  0x001c, 0x002c, 0x0010
 };
@@ -0,0 +1,129 @@
+/* Copyright JS Foundation and other contributors, http://js.foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is automatically generated by the gen-unicode.py script
+ * from DerivedCoreProperties.txt. Do not edit! */
+
+/**
+ * Character interval starting points for ID_Start.
+ */
+static const uint32_t lit_unicode_id_start_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x010000, 0x01000d, 0x010028, 0x01003c, 0x01003f, 0x010050, 0x010080, 0x010140, 0x010280, 0x0102a0,
+  0x010300, 0x01032d, 0x010350, 0x010380, 0x0103a0, 0x0103c8, 0x0103d1, 0x010400, 0x0104b0, 0x0104d8,
+  0x010500, 0x010530, 0x010600, 0x010740, 0x010760, 0x010800, 0x01080a, 0x010837, 0x01083f, 0x010860,
+  0x010880, 0x0108e0, 0x0108f4, 0x010900, 0x010920, 0x010980, 0x0109be, 0x010a10, 0x010a15, 0x010a19,
+  0x010a60, 0x010a80, 0x010ac0, 0x010ac9, 0x010b00, 0x010b40, 0x010b60, 0x010b80, 0x010c00, 0x010c80,
+  0x010cc0, 0x010d00, 0x010e80, 0x010eb0, 0x010f00, 0x010f30, 0x010fb0, 0x010fe0, 0x011003, 0x011083,
+  0x0110d0, 0x011103, 0x011150, 0x011183, 0x0111c1, 0x011200, 0x011213, 0x011280, 0x01128a, 0x01128f,
+  0x01129f, 0x0112b0, 0x011305, 0x01130f, 0x011313, 0x01132a, 0x011332, 0x011335, 0x01135d, 0x011400,
+  0x011447, 0x01145f, 0x011480, 0x0114c4, 0x011580, 0x0115d8, 0x011600, 0x011680, 0x011700, 0x011800,
+  0x0118a0, 0x0118ff, 0x01190c, 0x011915, 0x011918, 0x0119a0, 0x0119aa, 0x011a0b, 0x011a5c, 0x011ac0,
+  0x011c00, 0x011c0a, 0x011c72, 0x011d00, 0x011d08, 0x011d0b, 0x011d60, 0x011d67, 0x011d6a, 0x011ee0,
+  0x012000, 0x012400, 0x012480, 0x013000, 0x014400, 0x016800, 0x016a40, 0x016ad0, 0x016b00, 0x016b40,
+  0x016b63, 0x016b7d, 0x016e40, 0x016f00, 0x016f93, 0x016fe0, 0x017000, 0x018800, 0x018d00, 0x01b000,
+  0x01b150, 0x01b164, 0x01b170, 0x01bc00, 0x01bc70, 0x01bc80, 0x01bc90, 0x01d400, 0x01d456, 0x01d49e,
+  0x01d4a5, 0x01d4a9, 0x01d4ae, 0x01d4bd, 0x01d4c5, 0x01d507, 0x01d50d, 0x01d516, 0x01d51e, 0x01d53b,
+  0x01d540, 0x01d54a, 0x01d552, 0x01d6a8, 0x01d6c2, 0x01d6dc, 0x01d6fc, 0x01d716, 0x01d736, 0x01d750,
+  0x01d770, 0x01d78a, 0x01d7aa, 0x01d7c4, 0x01e100, 0x01e137, 0x01e2c0, 0x01e800, 0x01e900, 0x01ee00,
+  0x01ee05, 0x01ee21, 0x01ee29, 0x01ee34, 0x01ee4d, 0x01ee51, 0x01ee61, 0x01ee67, 0x01ee6c, 0x01ee74,
+  0x01ee79, 0x01ee80, 0x01ee8b, 0x01eea1, 0x01eea5, 0x01eeab, 0x020000, 0x02a700, 0x02b740, 0x02b820,
+  0x02ceb0, 0x02f800, 0x030000
+};
+
+/**
+ * Character interval lengths for ID_Start.
+ */
+static const uint16_t lit_unicode_id_start_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x00000b, 0x000019, 0x000012, 0x000001, 0x00000e, 0x00000d, 0x00007a, 0x000034, 0x00001c, 0x000030,
+  0x00001f, 0x00001d, 0x000025, 0x00001d, 0x000023, 0x000007, 0x000004, 0x00009d, 0x000023, 0x000023,
+  0x000027, 0x000033, 0x000136, 0x000015, 0x000007, 0x000005, 0x00002b, 0x000001, 0x000016, 0x000016,
+  0x00001e, 0x000012, 0x000001, 0x000015, 0x000019, 0x000037, 0x000001, 0x000003, 0x000002, 0x00001c,
+  0x00001c, 0x00001c, 0x000007, 0x00001b, 0x000035, 0x000015, 0x000012, 0x000011, 0x000048, 0x000032,
+  0x000032, 0x000023, 0x000029, 0x000001, 0x00001c, 0x000015, 0x000014, 0x000016, 0x000034, 0x00002c,
+  0x000018, 0x000023, 0x000022, 0x00002f, 0x000003, 0x000011, 0x000018, 0x000006, 0x000003, 0x00000e,
+  0x000009, 0x00002e, 0x000007, 0x000001, 0x000015, 0x000006, 0x000001, 0x000004, 0x000004, 0x000034,
+  0x000003, 0x000002, 0x00002f, 0x000001, 0x00002e, 0x000003, 0x00002f, 0x00002a, 0x00001a, 0x00002b,
+  0x00003f, 0x000007, 0x000007, 0x000001, 0x000017, 0x000007, 0x000026, 0x000027, 0x00002d, 0x000038,
+  0x000008, 0x000024, 0x00001d, 0x000006, 0x000001, 0x000025, 0x000005, 0x000001, 0x00001f, 0x000012,
+  0x000399, 0x00006e, 0x0000c3, 0x00042e, 0x000246, 0x000238, 0x00001e, 0x00001d, 0x00002f, 0x000003,
+  0x000014, 0x000012, 0x00003f, 0x00004a, 0x00000c, 0x000001, 0x0017f7, 0x0004d5, 0x000008, 0x00011e,
+  0x000002, 0x000003, 0x00018b, 0x00006a, 0x00000c, 0x000008, 0x000009, 0x000054, 0x000046, 0x000001,
+  0x000001, 0x000003, 0x00000b, 0x000006, 0x000040, 0x000003, 0x000007, 0x000006, 0x00001b, 0x000003,
+  0x000004, 0x000006, 0x000153, 0x000018, 0x000018, 0x00001e, 0x000018, 0x00001e, 0x000018, 0x00001e,
+  0x000018, 0x00001e, 0x000018, 0x000007, 0x00002c, 0x000006, 0x00002b, 0x0000c4, 0x000043, 0x000003,
+  0x00001a, 0x000001, 0x000009, 0x000003, 0x000002, 0x000001, 0x000001, 0x000003, 0x000006, 0x000003,
+  0x000003, 0x000009, 0x000010, 0x000002, 0x000004, 0x000010, 0x00a6dd, 0x001034, 0x0000dd, 0x001681,
+  0x001d30, 0x00021d, 0x00134a
+};
+
+/**
+ * Non-interval characters for ID_Start.
+ */
+static const uint32_t lit_unicode_id_start_chars_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x010808, 0x01083c, 0x010a00, 0x010f27, 0x011144, 0x011147, 0x011176, 0x0111da, 0x0111dc, 0x011288,
+  0x01133d, 0x011350, 0x0114c7, 0x011644, 0x0116b8, 0x011909, 0x01193f, 0x011941, 0x0119e1, 0x0119e3,
+  0x011a00, 0x011a3a, 0x011a50, 0x011a9d, 0x011c40, 0x011d46, 0x011d98, 0x011fb0, 0x016f50, 0x016fe3,
+  0x01d4a2, 0x01d4bb, 0x01d546, 0x01e14e, 0x01e94b, 0x01ee24, 0x01ee27, 0x01ee39, 0x01ee3b, 0x01ee42,
+  0x01ee47, 0x01ee49, 0x01ee4b, 0x01ee54, 0x01ee57, 0x01ee59, 0x01ee5b, 0x01ee5d, 0x01ee5f, 0x01ee64,
+  0x01ee7e
+};
+
+/**
+ * Character interval starting points for ID_Continue.
+ */
+static const uint32_t lit_unicode_id_continue_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x010376, 0x0104a0, 0x010a01, 0x010a05, 0x010a0c, 0x010a38, 0x010ae5, 0x010d24, 0x010d30, 0x010eab,
+  0x010f46, 0x011000, 0x011038, 0x011066, 0x01107f, 0x0110b0, 0x0110f0, 0x011100, 0x011127, 0x011136,
+  0x011145, 0x011180, 0x0111b3, 0x0111c9, 0x0111ce, 0x01122c, 0x0112df, 0x0112f0, 0x011300, 0x01133b,
+  0x01133e, 0x011347, 0x01134b, 0x011362, 0x011366, 0x011370, 0x011435, 0x011450, 0x0114b0, 0x0114d0,
+  0x0115af, 0x0115b8, 0x0115dc, 0x011630, 0x011650, 0x0116ab, 0x0116c0, 0x01171d, 0x011730, 0x01182c,
+  0x0118e0, 0x011930, 0x011937, 0x01193b, 0x011942, 0x011950, 0x0119d1, 0x0119da, 0x011a01, 0x011a33,
+  0x011a3b, 0x011a51, 0x011a8a, 0x011c2f, 0x011c38, 0x011c50, 0x011c92, 0x011ca9, 0x011d31, 0x011d3c,
+  0x011d3f, 0x011d50, 0x011d8a, 0x011d90, 0x011d93, 0x011da0, 0x011ef3, 0x016a60, 0x016af0, 0x016b30,
+  0x016b50, 0x016f51, 0x016f8f, 0x016ff0, 0x01bc9d, 0x01d165, 0x01d16d, 0x01d17b, 0x01d185, 0x01d1aa,
+  0x01d242, 0x01d7ce, 0x01da00, 0x01da3b, 0x01da9b, 0x01daa1, 0x01e000, 0x01e008, 0x01e01b, 0x01e023,
+  0x01e026, 0x01e130, 0x01e140, 0x01e2ec, 0x01e8d0, 0x01e944, 0x01e950, 0x01fbf0, 0x0e0100
+};
+
+/**
+ * Character interval lengths for ID_Continue.
+ */
+static const uint16_t lit_unicode_id_continue_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x000004, 0x000009, 0x000002, 0x000001, 0x000003, 0x000002, 0x000001, 0x000003, 0x000009, 0x000001,
+  0x00000a, 0x000002, 0x00000e, 0x000009, 0x000003, 0x00000a, 0x000009, 0x000002, 0x00000d, 0x000009,
+  0x000001, 0x000002, 0x00000d, 0x000003, 0x00000b, 0x00000b, 0x00000b, 0x000009, 0x000003, 0x000001,
+  0x000006, 0x000001, 0x000002, 0x000001, 0x000006, 0x000004, 0x000011, 0x000009, 0x000013, 0x000009,
+  0x000006, 0x000008, 0x000001, 0x000010, 0x000009, 0x00000c, 0x000009, 0x00000e, 0x000009, 0x00000e,
+  0x000009, 0x000005, 0x000001, 0x000003, 0x000001, 0x000009, 0x000006, 0x000006, 0x000009, 0x000006,
+  0x000003, 0x00000a, 0x00000f, 0x000007, 0x000007, 0x000009, 0x000015, 0x00000d, 0x000005, 0x000001,
+  0x000006, 0x000009, 0x000004, 0x000001, 0x000004, 0x000009, 0x000003, 0x000009, 0x000004, 0x000006,
+  0x000009, 0x000036, 0x000003, 0x000001, 0x000001, 0x000004, 0x000005, 0x000007, 0x000006, 0x000003,
+  0x000002, 0x000031, 0x000036, 0x000031, 0x000004, 0x00000e, 0x000006, 0x000010, 0x000006, 0x000001,
+  0x000004, 0x000006, 0x000009, 0x00000d, 0x000006, 0x000006, 0x000009, 0x000009, 0x0000ef
+};
+
+/**
+ * Non-interval characters for ID_Continue.
+ */
+static const uint32_t lit_unicode_id_continue_chars_sup[] JERRY_ATTR_CONST_DATA =
+{
+  0x0101fd, 0x0102e0, 0x010a3f, 0x011173, 0x01123e, 0x011357, 0x01145e, 0x011940, 0x0119e4, 0x011a47,
+  0x011d3a, 0x011d47, 0x016f4f, 0x016fe4, 0x01da75, 0x01da84
+};
@@ -14,15 +14,12 @@
 */

 /* This file is automatically generated by the gen-unicode.py script
- * from UnicodeData-13.0.0d6.txt. Do not edit! */
+ * from DerivedCoreProperties.txt. Do not edit! */

 /**
- * Character interval starting points for the unicode letters.
- *
- * The characters covered by these intervals are from
- * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
+ * Character interval starting points for ID_Start.
 */
-static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_start_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x00c0, 0x00d8, 0x00f8, 0x01f8, 0x02c6, 0x02e0, 0x0370, 0x0376, 0x037a, 0x0388,
  0x038e, 0x03a3, 0x03f7, 0x048a, 0x0531, 0x0560, 0x05d0, 0x05ef, 0x0620, 0x066e,
@@ -39,46 +36,43 @@ static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
  0x10fc, 0x11fc, 0x124a, 0x1250, 0x125a, 0x1260, 0x128a, 0x1290, 0x12b2, 0x12b8,
  0x12c2, 0x12c8, 0x12d8, 0x1312, 0x1318, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x1501,
  0x1601, 0x166f, 0x1681, 0x16a0, 0x16ee, 0x1700, 0x170e, 0x1720, 0x1740, 0x1760,
-  0x176e, 0x1780, 0x1820, 0x1880, 0x1887, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980,
-  0x19b0, 0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d,
-  0x1c5a, 0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00,
-  0x1f18, 0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0,
-  0x1fd6, 0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2119, 0x212a, 0x212f, 0x213c,
-  0x2145, 0x2160, 0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80,
-  0x2da0, 0x2da8, 0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021,
-  0x3031, 0x3038, 0x3041, 0x309d, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0,
-  0x3400, 0x3500, 0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00,
-  0x3e00, 0x3f00, 0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700,
-  0x4800, 0x4900, 0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100,
-  0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00,
-  0x5c00, 0x5d00, 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500,
-  0x6600, 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-  0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900,
-  0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300,
-  0x8400, 0x8500, 0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00,
-  0x8e00, 0x8f00, 0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700,
-  0x9800, 0x9900, 0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100,
-  0xa200, 0xa300, 0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f,
-  0xa6a0, 0xa717, 0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840,
-  0xa882, 0xa8f2, 0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa,
-  0xaa00, 0xaa40, 0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2,
-  0xab01, 0xab09, 0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00,
-  0xae00, 0xaf00, 0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700,
-  0xb800, 0xb900, 0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100,
-  0xc200, 0xc300, 0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00,
-  0xcc00, 0xcd00, 0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500,
-  0xd600, 0xd700, 0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f,
-  0xfb2a, 0xfb38, 0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0,
-  0xfe70, 0xfe76, 0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
+  0x176e, 0x1780, 0x1820, 0x1880, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980, 0x19b0,
+  0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d, 0x1c5a,
+  0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00, 0x1f18,
+  0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0, 0x1fd6,
+  0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2118, 0x212a, 0x213c, 0x2145, 0x2160,
+  0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80, 0x2da0, 0x2da8,
+  0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021, 0x3031, 0x3038,
+  0x3041, 0x309b, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0, 0x3400, 0x3500,
+  0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00, 0x3e00, 0x3f00,
+  0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900,
+  0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100, 0x5200, 0x5300,
+  0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
+  0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, 0x6700,
+  0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, 0x7000, 0x7100,
+  0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900, 0x7a00, 0x7b00,
+  0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300, 0x8400, 0x8500,
+  0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00, 0x8e00, 0x8f00,
+  0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700, 0x9800, 0x9900,
+  0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100, 0xa200, 0xa300,
+  0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f, 0xa6a0, 0xa717,
+  0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840, 0xa882, 0xa8f2,
+  0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa, 0xaa00, 0xaa40,
+  0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2, 0xab01, 0xab09,
+  0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00, 0xae00, 0xaf00,
+  0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700, 0xb800, 0xb900,
+  0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100, 0xc200, 0xc300,
+  0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00, 0xcc00, 0xcd00,
+  0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500, 0xd600, 0xd700,
+  0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f, 0xfb2a, 0xfb38,
+  0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0, 0xfe70, 0xfe76,
+  0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
 };

 /**
- * Character lengths for the unicode letters.
- *
- * The characters covered by these intervals are from
- * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
+ * Character interval lengths for ID_Start.
 */
-static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_id_start_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0016, 0x001e, 0x00ff, 0x00c9, 0x000b, 0x0004, 0x0004, 0x0001, 0x0003, 0x0002,
  0x0013, 0x0052, 0x008a, 0x00a5, 0x0025, 0x0028, 0x001a, 0x0003, 0x002a, 0x0001,
@@ -95,17 +89,17 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
  0x00ff, 0x004c, 0x0003, 0x0006, 0x0003, 0x0028, 0x0003, 0x0020, 0x0003, 0x0006,
  0x0003, 0x000e, 0x0038, 0x0003, 0x0042, 0x000f, 0x0055, 0x0005, 0x00ff, 0x00ff,
  0x006b, 0x0010, 0x0019, 0x004a, 0x000a, 0x000c, 0x0003, 0x0011, 0x0011, 0x000c,
-  0x0002, 0x0033, 0x0058, 0x0004, 0x0021, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b,
-  0x0019, 0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002,
-  0x0023, 0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015,
-  0x0005, 0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003,
-  0x0005, 0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0004, 0x0003, 0x000a, 0x0003,
-  0x0004, 0x0028, 0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016,
-  0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008,
-  0x0004, 0x0004, 0x0055, 0x0002, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f,
+  0x0002, 0x0033, 0x0058, 0x0028, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b, 0x0019,
+  0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002, 0x0023,
+  0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015, 0x0005,
+  0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003, 0x0005,
+  0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0005, 0x000f, 0x0003, 0x0004, 0x0028,
+  0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016, 0x0006, 0x0006,
+  0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008, 0x0004, 0x0004,
+  0x0055, 0x0004, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
+  0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
@@ -113,29 +107,25 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e,
-  0x004f, 0x0008, 0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033,
-  0x0031, 0x0005, 0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004,
-  0x0028, 0x0002, 0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002,
-  0x0005, 0x0005, 0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff,
+  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
+  0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e, 0x004f, 0x0008,
+  0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033, 0x0031, 0x0005,
+  0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004, 0x0028, 0x0002,
+  0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002, 0x0005, 0x0005,
+  0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00a3, 0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009,
-  0x000c, 0x0004, 0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b,
-  0x0004, 0x0086, 0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
+  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00a3,
+  0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009, 0x000c, 0x0004,
+  0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b, 0x0004, 0x0086,
+  0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
 };

 /**
- * Those unicode letter characters that are not inside any of
- * the intervals specified in lit_unicode_letter_interval_sps array.
- *
- * The characters are from the following Unicode categories:
- * Lu, Ll, Lt, Lm, Lo, Nl
+ * Non-interval characters for ID_Start.
 */
-static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_start_chars[] JERRY_ATTR_CONST_DATA =
 {
  0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5,
  0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2,
@@ -144,18 +134,13 @@ static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
  0x0ea5, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, 0x10c7, 0x10cd, 0x1258,
  0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1cfa, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe,
  0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2d27,
-  0x2d2d, 0x2d6f, 0x2e2f, 0x3400, 0x4e00, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0,
-  0xaac2, 0xac00, 0xfb1d, 0xfb3e
+  0x2d2d, 0x2d6f, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e
 };

 /**
- * Character interval starting points for non-letter character
- * that can be used as a non-first character of an identifier.
- *
- * The characters covered by these intervals are from
- * the following Unicode categories: Nd, Mn, Mc, Pc
+ * Character interval starting points for ID_Continue.
 */
-static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_continue_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x0300, 0x0483, 0x0591, 0x05c1, 0x05c4, 0x0610, 0x064b, 0x06d6, 0x06df, 0x06e7,
  0x06ea, 0x06f0, 0x0730, 0x07a6, 0x07c0, 0x07eb, 0x0816, 0x081b, 0x0825, 0x0829,
@@ -167,8 +152,8 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
  0x0ce6, 0x0d00, 0x0d3b, 0x0d3e, 0x0d46, 0x0d4a, 0x0d62, 0x0d66, 0x0d81, 0x0dcf,
  0x0dd8, 0x0de6, 0x0df2, 0x0e34, 0x0e47, 0x0e50, 0x0eb4, 0x0ec8, 0x0ed0, 0x0f18,
  0x0f20, 0x0f3e, 0x0f71, 0x0f86, 0x0f8d, 0x0f99, 0x102b, 0x1040, 0x1056, 0x105e,
-  0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1712, 0x1732, 0x1752, 0x1772,
-  0x17b4, 0x17e0, 0x180b, 0x1810, 0x1885, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
+  0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1369, 0x1712, 0x1732, 0x1752,
+  0x1772, 0x17b4, 0x17e0, 0x180b, 0x1810, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
  0x1a55, 0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1abf, 0x1b00, 0x1b34, 0x1b50, 0x1b6b,
  0x1b80, 0x1ba1, 0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf7,
  0x1dc0, 0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099,
@@ -179,13 +164,9 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
 };

 /**
- * Character interval lengths for non-letter character
- * that can be used as a non-first character of an identifier.
- *
- * The characters covered by these intervals are from
- * the following Unicode categories: Nd, Mn, Mc, Pc
+ * Character interval lengths for ID_Continue.
 */
-static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_id_continue_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x006f, 0x0004, 0x002c, 0x0001, 0x0001, 0x000a, 0x001e, 0x0006, 0x0005, 0x0001,
  0x0003, 0x0009, 0x001a, 0x000a, 0x0009, 0x0008, 0x0003, 0x0008, 0x0002, 0x0004,
@@ -197,8 +178,8 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
  0x0009, 0x0003, 0x0001, 0x0006, 0x0002, 0x0003, 0x0001, 0x0009, 0x0002, 0x0005,
  0x0007, 0x0009, 0x0001, 0x0006, 0x0007, 0x0009, 0x0008, 0x0005, 0x0009, 0x0001,
  0x0009, 0x0001, 0x0013, 0x0001, 0x000a, 0x0023, 0x0013, 0x0009, 0x0003, 0x0002,
-  0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0002, 0x0002, 0x0001, 0x0001,
-  0x001f, 0x0009, 0x0002, 0x0009, 0x0001, 0x000b, 0x000b, 0x0009, 0x0009, 0x0004,
+  0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0008, 0x0002, 0x0002, 0x0001,
+  0x0001, 0x001f, 0x0009, 0x0002, 0x0009, 0x000b, 0x000b, 0x0009, 0x000a, 0x0004,
  0x0009, 0x001c, 0x000a, 0x0009, 0x000d, 0x0001, 0x0004, 0x0010, 0x0009, 0x0008,
  0x0002, 0x000c, 0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002,
  0x0039, 0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001,
@@ -209,45 +190,65 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
 };

 /**
- * Those non-letter characters that can be used as a non-first
- * character of an identifier and not included in any of the intervals
- * specified in lit_unicode_non_letter_ident_part_interval_sps array.
- *
- * The characters are from the following Unicode categories:
- * Nd, Mn, Mc, Pc
+ * Non-interval characters for ID_Continue.
 */
-static const uint16_t lit_unicode_non_letter_ident_part_chars[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_continue_chars[] JERRY_ATTR_CONST_DATA =
 {
-  0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe, 0x0a3c, 0x0a51,
-  0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca, 0x0dd6, 0x0e31,
-  0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced, 0x1cf4, 0x2054,
-  0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5, 0xaa43, 0xaab0,
-  0xaac1, 0xfb1e, 0xff3f
+  0x00b7, 0x0387, 0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe,
+  0x0a3c, 0x0a51, 0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca,
+  0x0dd6, 0x0e31, 0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced,
+  0x1cf4, 0x2054, 0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5,
+  0xaa43, 0xaab0, 0xaac1, 0xfb1e, 0xff3f
 };

+#if ENABLED (JERRY_ESNEXT)
 /**
- * Unicode separator character interval starting points from Unicode category: Zs
+ * Character interval starting points for White_Space.
 */
-static const uint16_t lit_unicode_separator_char_interval_sps[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x2000
 };

 /**
- * Unicode separator character interval lengths from Unicode category: Zs
+ * Character interval lengths for White_Space.
 */
-static const uint8_t lit_unicode_separator_char_interval_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
+{
+  0x000a
+};
+
+/**
+ * Non-interval characters for White_Space.
+ */
+static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
+{
+  0x00a0, 0x1680, 0x202f, 0x205f, 0x3000
+};
+
+#else /* !ENABLED (JERRY_ESNEXT) */
+/**
+ * Character interval starting points for White_Space.
+ */
+static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
+{
+  0x2000
+};
+
+/**
+ * Character interval lengths for White_Space.
+ */
+static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x000b
 };

 /**
- * Unicode separator characters that are not in the
- * lit_unicode_separator_char_intervals array.
- *
- * Unicode category: Zs
+ * Non-interval characters for White_Space.
 */
-static const uint16_t lit_unicode_separator_chars[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
 {
  0x1680, 0x180e, 0x202f, 0x205f, 0x3000
 };
+
+#endif /* ENABLED (JERRY_ESNEXT) */