Support Unicode supplementary planes (#3928)

JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
2020-07-06 14:21:13 +02:00
parent 7353b253ab
commit c1e90da0b4
16 changed files with 1105 additions and 861 deletions
@@ -2605,6 +2605,19 @@ ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, /**< string buil
  memcpy (dest_p, data_p, data_size);
 } /* ecma_stringbuilder_append_raw */
 /**
 * Append a codepoint to a string builder
 */
 void
 ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, /**< string builder */
                                     lit_code_point_t cp) /**< code point */
 {
  const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (cp);
  lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
  lit_code_point_to_cesu8_bytes (dest_p, cp);
 } /* ecma_stringbuilder_append_codepoint */
 /**
 * Append an ecma_char_t to a string builder
 */
@@ -2612,10 +2625,7 @@ void
 ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
                                const ecma_char_t c) /**< ecma char */
 {
-  const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
+  ecma_stringbuilder_append_codepoint (builder_p, c);
  lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
  lit_code_point_to_cesu8_bytes (dest_p, c);
 } /* ecma_stringbuilder_append_char */
 /**
@@ -393,6 +393,7 @@ void ecma_stringbuilder_append_magic (ecma_stringbuilder_t *builder_p, const lit
 void ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p,
                                    const lit_utf8_byte_t *data_p,
                                    const lit_utf8_size_t data_size);
 void ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, lit_code_point_t cp);
 void ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, const ecma_char_t c);
 void ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, const lit_utf8_byte_t);
 ecma_string_t *ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p);
@@ -988,96 +988,42 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
                                                        bool lower_case) /**< convert to lower (true)
                                                                          *   or upper (false) case */
 {
-  ecma_value_t ret_value = ECMA_VALUE_EMPTY;
+  ecma_stringbuilder_t builder = ecma_stringbuilder_create ();
  /* 3. */
  ECMA_STRING_TO_UTF8_STRING (input_string_p, input_start_p, input_start_size);
-  /*
+  const lit_utf8_byte_t *input_curr_p = input_start_p;
   * The URI encoding has two major phases: first we compute
   * the length of the lower case string, then we encode it.
   */
  lit_utf8_size_t output_length = 0;
  const lit_utf8_byte_t *input_str_curr_p = input_start_p;
  const lit_utf8_byte_t *input_str_end_p = input_start_p + input_start_size;
-  while (input_str_curr_p < input_str_end_p)
+  while (input_curr_p < input_str_end_p)
  {
-    ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
+    lit_code_point_t cp = lit_cesu8_read_next (&input_curr_p);
-    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
+
-    ecma_length_t character_length;
+#if ENABLED (JERRY_ESNEXT)
-    lit_utf8_byte_t utf8_byte_buffer[LIT_CESU8_MAX_BYTES_IN_CODE_POINT];
+    if (lit_is_code_point_utf16_high_surrogate (cp))
    {
      const ecma_char_t next_ch = lit_cesu8_peek_next (input_curr_p);
      if (lit_is_code_point_utf16_low_surrogate (next_ch))
      {
        cp = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) cp, next_ch);
        input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
      }
    }
 #endif /* ENABLED (JERRY_ESNEXT) */
    if (lower_case)
    {
-      character_length = lit_char_to_lower_case (character,
+      lit_char_to_lower_case (cp, &builder);
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    else
    {
-      character_length = lit_char_to_upper_case (character,
+      lit_char_to_upper_case (cp, &builder);
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
    for (ecma_length_t i = 0; i < character_length; i++)
    {
      output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
    }
  }
  /* Second phase. */
  JMEM_DEFINE_LOCAL_ARRAY (output_start_p,
                           output_length,
                           lit_utf8_byte_t);
  lit_utf8_byte_t *output_char_p = output_start_p;
  /* Encoding the output. */
  input_str_curr_p = input_start_p;
  while (input_str_curr_p < input_str_end_p)
  {
    ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
    ecma_length_t character_length;
    if (lower_case)
    {
      character_length = lit_char_to_lower_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    else
    {
      character_length = lit_char_to_upper_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
    for (ecma_length_t i = 0; i < character_length; i++)
    {
      output_char_p += lit_code_unit_to_utf8 (character_buffer[i], output_char_p);
    }
  }
  JERRY_ASSERT (output_start_p + output_length == output_char_p);
  ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
  ret_value = ecma_make_string_value (output_string_p);
  JMEM_FINALIZE_LOCAL_ARRAY (output_start_p);
  ECMA_FINALIZE_UTF8_STRING (input_start_p, input_start_size);
-  return ret_value;
+  return ecma_make_string_value (ecma_stringbuilder_finalize (&builder));
 } /* ecma_builtin_string_prototype_object_conversion_helper */
 /**
@@ -413,23 +413,13 @@ ecma_regexp_canonicalize_char (lit_code_point_t ch, /**< character */
    return ch;
  }
-#if ENABLED (JERRY_ESNEXT)
+  lit_code_point_t cu = lit_char_to_upper_case (ch, NULL);
  /* TODO: Implement case folding for code points in the upper planes. */
  if (JERRY_UNLIKELY (ch > LIT_UTF16_CODE_UNIT_MAX))
  {
    return ch;
  }
 #endif /* ENABLED (JERRY_ESNEXT) */
-  ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
+  if (cu == LIT_MULTIPLE_CU)
  const ecma_length_t size = lit_char_to_upper_case ((ecma_char_t) ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
  if (size != 1)
  {
    return ch;
  }
  const ecma_char_t cu = u[0];
  if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode)
  {
    /* 6. */
@@ -14,12 +14,15 @@
 */
 #include "config.h"
 #include "ecma-helpers.h"
 #include "lit-char-helpers.h"
 #include "lit-unicode-ranges.inc.h"
 #include "lit-unicode-ranges-sup.inc.h"
 #include "lit-strings.h"
 #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
 #include "lit-unicode-conversions.inc.h"
 #include "lit-unicode-conversions-sup.inc.h"
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
 #define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
@@ -31,36 +34,43 @@
 * @return true - if the character is in the given array
 *         false - otherwise
 */
-static bool
+#define LIT_SEARCH_CHAR_IN_ARRAY_FN(function_name, char_type, array_type) \
-search_char_in_char_array (ecma_char_t c,               /**< code unit */
+static bool \
-                           const ecma_char_t *array,    /**< array */
+function_name (char_type c,               /**< code unit */ \
-                           int size_of_array)           /**< length of the array */
+               const array_type *array,   /**< array */ \
-{
+               int size_of_array)         /**< length of the array */\
-  int bottom = 0;
+{ \
-  int top = size_of_array - 1;
+  int bottom = 0; \
  int top = size_of_array - 1; \
  \
  while (bottom <= top) \
  { \
    int middle = (bottom + top) / 2; \
    char_type current = array[middle]; \
    \
    if (current == c) \
    { \
      return true; \
    } \
    \
    if (c < current) \
    { \
      top = middle - 1; \
    } \
    else \
    { \
      bottom = middle + 1; \
    } \
  } \
  \
  return false; \
 } /* __function_name */
-  while (bottom <= top)
+LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_char_in_array, ecma_char_t, uint16_t)
  {
    int middle = (bottom + top) / 2;
    ecma_char_t current = array[middle];
-    if (current == c)
+#if ENABLED (JERRY_ESNEXT)
-    {
+LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_codepoint_in_array, lit_code_point_t, uint32_t)
-      return true;
+#endif /* ENABLED (JERRY_ESNEXT) */
    }
    if (c < current)
    {
      top = middle - 1;
    }
    else
    {
      bottom = middle + 1;
    }
  }
  return false;
 } /* search_char_in_char_array */
 /**
 * Binary search algorithm that searches a character in the given intervals.
@@ -70,37 +80,44 @@ search_char_in_char_array (ecma_char_t c,               /**< code unit */
 * @return true - if the the character is included (inclusively) in one of the intervals in the given array
 *         false - otherwise
 */
-static bool
+#define LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN(function_name, char_type, array_type, interval_type) \
-search_char_in_interval_array (ecma_char_t c,               /**< code unit */
+static bool \
-                               const ecma_char_t *array_sp, /**< array of interval starting points */
+function_name (char_type c,                  /**< code unit */ \
-                               const uint8_t *lengths,      /**< array of interval lengths */
+               const array_type *array_sp,   /**< array of interval starting points */ \
-                               int size_of_array)           /**< length of the array */
+               const interval_type *lengths, /**< array of interval lengths */ \
-{
+               int size_of_array)            /**< length of the array */ \
-  int bottom = 0;
+{ \
-  int top = size_of_array - 1;
+  int bottom = 0; \
  int top = size_of_array - 1; \
  \
  while (bottom <= top) \
  { \
    int middle = (bottom + top) / 2; \
    char_type current_sp = array_sp[middle]; \
    \
    if (current_sp <= c && c <= current_sp + lengths[middle]) \
    { \
      return true; \
    } \
    \
    if (c > current_sp) \
    { \
      bottom = middle + 1; \
    } \
    else \
    { \
      top = middle - 1; \
    } \
  } \
  \
  return false; \
 } /* function_name */
-  while (bottom <= top)
+LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_char_in_interval_array, ecma_char_t, uint16_t, uint8_t)
  {
    int middle = (bottom + top) / 2;
    ecma_char_t current_sp = array_sp[middle];
-    if (current_sp <= c && c <= current_sp + lengths[middle])
+#if ENABLED (JERRY_ESNEXT)
-    {
+LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_codepoint_in_interval_array, lit_code_point_t, uint32_t, uint16_t)
-      return true;
+#endif /* ENABLED (JERRY_ESNEXT) */
    }
    if (c > current_sp)
    {
      bottom = middle + 1;
    }
    else
    {
      top = middle - 1;
    }
  }
  return false;
 } /* search_char_in_interval_array */
 /**
 * Check if specified character is one of the Whitespace characters including those that fall into
@@ -116,20 +133,18 @@ lit_char_is_white_space (lit_code_point_t c) /**< code point */
  {
    return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
  }
-  else
+
  {
  if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
  {
    return true;
  }
  return (c <= LIT_UTF16_CODE_UNIT_MAX
-            && ((c >= lit_unicode_separator_char_interval_sps[0]
+          && ((c >= lit_unicode_white_space_interval_starts[0]
-                 && c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
+                 && c < lit_unicode_white_space_interval_starts[0] + lit_unicode_white_space_interval_lengths[0])
-                || search_char_in_char_array ((ecma_char_t) c,
+              || lit_search_char_in_array ((ecma_char_t) c,
-                                              lit_unicode_separator_chars,
+                                            lit_unicode_white_space_chars,
-                                              NUM_OF_ELEMENTS (lit_unicode_separator_chars))));
+                                            NUM_OF_ELEMENTS (lit_unicode_white_space_chars))));
  }
 } /* lit_char_is_white_space */
 /**
@@ -148,58 +163,84 @@ lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
 } /* lit_char_is_line_terminator */
 /**
- * Check if specified character is a unicode letter
+ * Check if specified character is a Unicode ID_Start
 *
 * Note:
 *      Unicode letter is a character, included into one of the following categories:
 *       - Uppercase letter (Lu);
 *       - Lowercase letter (Ll);
 *       - Titlecase letter (Lt);
 *       - Modifier letter (Lm);
 *       - Other letter (Lo);
 *       - Letter number (Nl).
 *
 * See also:
- *          ECMA-262 v5, 7.6
+ *          ECMA-262 v1, 11.6: UnicodeIDStart
 *
- * @return true - if specified character falls into one of the listed categories,
+ * @return true - if the codepoint has Unicode property "ID_Start"
 *         false - otherwise
 */
 static bool
-lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
+lit_char_is_unicode_id_start (lit_code_point_t code_point) /**< code unit */
 {
-  return (search_char_in_interval_array (c,
+#if ENABLED (JERRY_ESNEXT)
-                                         lit_unicode_letter_interval_sps,
+  if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
-                                         lit_unicode_letter_interval_lengths,
+  {
-                                         NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps))
+    return (lit_search_codepoint_in_interval_array (code_point,
-          || search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars)));
+                                                    lit_unicode_id_start_interval_starts_sup,
-} /* lit_char_is_unicode_letter */
+                                                    lit_unicode_id_start_interval_lengths_sup,
                                                    NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts_sup))
            || lit_search_codepoint_in_array (code_point,
                                              lit_unicode_id_start_chars_sup,
                                              NUM_OF_ELEMENTS (lit_unicode_id_start_chars_sup)));
  }
 #else /* !ENABLED (JERRY_ESNEXT) */
  JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
 #endif /* ENABLED (JERRY_ESNEXT) */
  ecma_char_t c = (ecma_char_t) code_point;
  return (lit_search_char_in_interval_array (c,
                                             lit_unicode_id_start_interval_starts,
                                             lit_unicode_id_start_interval_lengths,
                                             NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts))
          || lit_search_char_in_array (c, lit_unicode_id_start_chars, NUM_OF_ELEMENTS (lit_unicode_id_start_chars)));
 } /* lit_char_is_unicode_id_start */
 /**
- * Check if specified character is a non-letter character and can be used as a
+ * Check if specified character is a Unicode ID_Continue
 * non-first character of an identifier.
 * These characters coverd by the following unicode categories:
 *  - digit (Nd)
 *  - punctuation mark (Mn, Mc)
 *  - connector punctuation (Pc)
 *
 * See also:
- *          ECMA-262 v5, 7.6
+ *          ECMA-262 v1, 11.6: UnicodeIDContinue
 *
- * @return true - if specified character falls into one of the listed categories,
+ * @return true - if the codepoint has Unicode property "ID_Continue"
 *         false - otherwise
 */
 static bool
-lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
+lit_char_is_unicode_id_continue (lit_code_point_t code_point) /**< code unit */
 {
-  return (search_char_in_interval_array (c,
+  /* Each ID_Start codepoint is ID_Continue as well. */
-                                         lit_unicode_non_letter_ident_part_interval_sps,
+  if (lit_char_is_unicode_id_start (code_point))
-                                         lit_unicode_non_letter_ident_part_interval_lengths,
+  {
-                                         NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps))
+    return true;
-          || search_char_in_char_array (c,
+  }
-                                        lit_unicode_non_letter_ident_part_chars,
+
-                                        NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
+#if ENABLED (JERRY_ESNEXT)
-} /* lit_char_is_unicode_non_letter_ident_part */
+  if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
  {
    return (lit_search_codepoint_in_interval_array (code_point,
                                                    lit_unicode_id_continue_interval_starts_sup,
                                                    lit_unicode_id_continue_interval_lengths_sup,
                                                    NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts_sup))
            || lit_search_codepoint_in_array (code_point,
                                              lit_unicode_id_continue_chars_sup,
                                              NUM_OF_ELEMENTS (lit_unicode_id_continue_chars_sup)));
  }
 #else /* !ENABLED (JERRY_ESNEXT) */
  JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
 #endif /* ENABLED (JERRY_ESNEXT) */
  ecma_char_t c = (ecma_char_t) code_point;
  return (lit_search_char_in_interval_array (c,
                                             lit_unicode_id_continue_interval_starts,
                                             lit_unicode_id_continue_interval_lengths,
                                             NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts))
          || lit_search_char_in_array (c,
                                       lit_unicode_id_continue_chars,
                                       NUM_OF_ELEMENTS (lit_unicode_id_continue_chars)));
 } /* lit_char_is_unicode_id_continue */
 /**
 * Checks whether the character is a valid identifier start.
@@ -218,17 +259,7 @@ lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point
            || code_point == LIT_CHAR_UNDERSCORE);
  }
-#if ENABLED (JERRY_ESNEXT)
+  return lit_char_is_unicode_id_start (code_point);
  if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
  {
    /* TODO: detect these ranges correctly. */
    return (code_point >= 0x10C80 && code_point <= 0x10CF2);
  }
 #else /* !ENABLED (JERRY_ESNEXT) */
  JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
 #endif /* ENABLED (JERRY_ESNEXT) */
  return lit_char_is_unicode_letter ((ecma_char_t) code_point);
 } /* lit_code_point_is_identifier_start */
 /**
@@ -249,18 +280,7 @@ lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point
            || code_point == LIT_CHAR_UNDERSCORE);
  }
-#if ENABLED (JERRY_ESNEXT)
+  return lit_char_is_unicode_id_continue (code_point);
  if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
  {
    /* TODO: detect these ranges correctly. */
    return (code_point >= 0x10C80 && code_point <= 0x10CF2);
  }
 #else /* !ENABLED (JERRY_ESNEXT) */
  JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
 #endif /* ENABLED (JERRY_ESNEXT) */
  return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
          || lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
 } /* lit_code_point_is_identifier_part */
 /**
@@ -519,16 +539,27 @@ lit_char_is_word_char (lit_code_point_t c) /**< code point */
 /**
 * Check if the specified character is in one of those tables which contain bidirectional conversions.
 *
- * @return the mapped character sequence of an ecma character, if it's in the table.
+ * @return codepoint of the converted character if it is found the the tables
- *         0 - otherwise.
+ *         LIT_INVALID_CP - otherwise.
 */
-static ecma_length_t
+static lit_code_point_t
-search_in_bidirectional_conversion_tables (ecma_char_t character,        /**< code unit */
+lit_search_in_bidirectional_conversion_tables (lit_code_point_t cp,   /**< code point */
                                           ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
                                               bool is_lowercase)     /**< is lowercase conversion */
 {
-  /* 1, Check if the specified character is part of the lit_character_case_ranges table. */
+  /* 1, Check if the specified character is part of the lit_unicode_character_case_ranges_{sup} table. */
-  int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges);
+  int number_of_case_ranges;
 #if ENABLED (JERRY_ESNEXT)
  bool is_supplementary = cp > LIT_UTF16_CODE_UNIT_MAX;
  if (is_supplementary)
  {
    number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges_sup);
  }
  else
 #endif /* ENABLED (JERRY_ESNEXT) */
  {
    number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges);
  }
  int conv_counter = 0;
  for (int i = 0; i < number_of_case_ranges; i++)
@@ -538,54 +569,92 @@ search_in_bidirectional_conversion_tables (ecma_char_t character,        /**< co
      conv_counter++;
    }
-    int range_length = lit_character_case_range_lengths[conv_counter];
+    size_t range_length;
-    ecma_char_t start_point = lit_character_case_ranges[i];
+    lit_code_point_t start_point;
 #if ENABLED (JERRY_ESNEXT)
    if (is_supplementary)
    {
      range_length = lit_unicode_character_case_range_lengths_sup[conv_counter];
      start_point = lit_unicode_character_case_ranges_sup[i];
    }
    else
 #endif /* ENABLED (JERRY_ESNEXT) */
    {
      range_length = lit_unicode_character_case_range_lengths[conv_counter];
      start_point = lit_unicode_character_case_ranges[i];
    }
-    if (start_point > character || character >= start_point + range_length)
+    if (start_point > cp || cp >= start_point + range_length)
    {
      continue;
    }
-    int char_dist = character - start_point;
+    uint32_t char_dist = (uint32_t) cp - start_point;
-
+    int offset;
    if (i % 2 == 0)
    {
-      output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character;
+      if (!is_lowercase)
      {
        return cp;
      }
      offset = i + 1;
    }
    else
    {
-      output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist);
+      if (is_lowercase)
      {
        return cp;
      }
-    return 1;
+      offset = i - 1;
    }
 #if ENABLED (JERRY_ESNEXT)
    if (is_supplementary)
    {
      start_point = lit_unicode_character_case_ranges_sup[offset];
    }
    else
 #endif /* ENABLED (JERRY_ESNEXT) */
    {
      start_point = lit_unicode_character_case_ranges[offset];
    }
    return (lit_code_point_t) (start_point + char_dist);
  }
  /* Note: After this point based on the latest unicode standard(13.0.0.6) no conversion characters are
     defined for supplementary planes */
 #if ENABLED (JERRY_ESNEXT)
  if (is_supplementary)
  {
    return cp;
  }
 #endif /* ENABLED (JERRY_ESNEXT) */
  /* 2, Check if the specified character is part of the character_pair_ranges table. */
  int bottom = 0;
-  int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1;
+  int top = NUM_OF_ELEMENTS (lit_unicode_character_pair_ranges) - 1;
  while (bottom <= top)
  {
    int middle = (bottom + top) / 2;
-    ecma_char_t current_sp = lit_character_pair_ranges[middle];
+    lit_code_point_t current_sp = lit_unicode_character_pair_ranges[middle];
-    if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle])
+    if (current_sp <= cp && cp < current_sp + lit_unicode_character_pair_range_lengths[middle])
    {
-      int char_dist = character - current_sp;
+      uint32_t char_dist = (uint32_t) (cp - current_sp);
-      if ((character - current_sp) % 2 == 0)
+      if ((cp - current_sp) % 2 == 0)
      {
-        output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character;
+        return is_lowercase ? (lit_code_point_t) (current_sp + char_dist + 1) : cp;
      }
      else
      {
        output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
      }
-      return 1;
+      return is_lowercase ? cp : (lit_code_point_t) (current_sp + char_dist - 1);
    }
-    if (character > current_sp)
+    if (cp > current_sp)
    {
      bottom = middle + 1;
    }
@@ -596,39 +665,35 @@ search_in_bidirectional_conversion_tables (ecma_char_t character,        /**< co
  }
  /* 3, Check if the specified character is part of the character_pairs table. */
-  int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs);
+  int number_of_character_pairs = NUM_OF_ELEMENTS (lit_unicode_character_pairs);
  for (int i = 0; i < number_of_character_pairs; i++)
  {
-    if (character != lit_character_pairs[i])
+    if (cp != lit_unicode_character_pairs[i])
    {
      continue;
    }
    if (i % 2 == 0)
    {
-      output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character;
+      return is_lowercase ? lit_unicode_character_pairs[i + 1] : cp;
    }
    else
    {
      output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1];
    }
-    return 1;
+    return is_lowercase ? cp : lit_unicode_character_pairs[i - 1];
  }
-  return 0;
+  return LIT_INVALID_CP;
-} /* search_in_bidirectional_conversion_tables */
+} /* lit_search_in_bidirectional_conversion_tables */
 /**
 * Check if the specified character is in the given conversion table.
 *
- * @return the mapped character sequence of an ecma character, if it's in the table.
+ * @return LIT_MULTIPLE_CU if the converted character consist more than a single code unit
- *         0 - otherwise.
+ *         converted code point - otherwise
 */
-static ecma_length_t
+static lit_code_point_t
-search_in_conversion_table (ecma_char_t character,        /**< code unit */
+lit_search_in_conversion_table (ecma_char_t character,            /**< code unit */
-                            ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
+                                ecma_stringbuilder_t *builder_p,  /**< string builder */
                                const ecma_char_t *array,         /**< array */
                                const uint8_t *counters)          /**< case_values counter */
 {
@@ -653,28 +718,21 @@ search_in_conversion_table (ecma_char_t character,        /**< code unit */
      if (current == character)
      {
-        ecma_length_t char_sequence = 1;
+        if (builder_p != NULL)
        {
          ecma_stringbuilder_append_char (builder_p, array[middle + 1]);
-        switch (size_of_case_value)
+          if (size_of_case_value > 1)
          {
-          case 3:
+            ecma_stringbuilder_append_char (builder_p, array[middle + 2]);
          {
            output_buffer_p[2] = array[middle + 3];
            char_sequence++;
            /* FALLTHRU */
          }
-          case 2:
+          if (size_of_case_value > 2)
          {
-            output_buffer_p[1] = array[middle + 2];
+            ecma_stringbuilder_append_char (builder_p, array[middle + 3]);
            char_sequence++;
            /* FALLTHRU */
          }
          default:
          {
            output_buffer_p[0] = array[middle + 1];
            return char_sequence;
          }
        }
        return size_of_case_value == 1 ? array[middle + 1]: LIT_MULTIPLE_CU;
      }
      if (character < current)
@@ -688,127 +746,151 @@ search_in_conversion_table (ecma_char_t character,        /**< code unit */
    }
  }
-  return 0;
+  if (builder_p != NULL)
-} /* search_in_conversion_table */
+  {
    ecma_stringbuilder_append_char (builder_p, character);
  }
  return (lit_code_point_t) character;
 } /* lit_search_in_conversion_table */
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
 /**
- * Returns the lowercase character sequence of an ecma character.
+ * Append the converted lowercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
 *
- * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
+ * @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
- *
+ *         converted code point - otherwise
 * @return the length of the lowercase character sequence
 *         which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
 */
-ecma_length_t
+lit_code_point_t
-lit_char_to_lower_case (ecma_char_t character, /**< input character value */
+lit_char_to_lower_case (lit_code_point_t cp, /**< code point */
-                        ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
+                        ecma_stringbuilder_t *builder_p) /**< string builder */
                        ecma_length_t buffer_size) /**< buffer size */
 {
-  JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
+  if (cp >= LIT_CHAR_UPPERCASE_A && cp <= LIT_CHAR_UPPERCASE_Z)
  if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
  {
-    output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
+    lit_utf8_byte_t lowercase_char = (lit_utf8_byte_t) (cp + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
-    return 1;
+
    if (builder_p != NULL)
    {
      ecma_stringbuilder_append_byte (builder_p, lowercase_char);
    }
    return lowercase_char;
  }
 #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
  lit_code_point_t lowercase_cp = lit_search_in_bidirectional_conversion_tables (cp, true);
-  ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true);
+  if (lowercase_cp != LIT_INVALID_CP)
  if (lowercase_sequence != 0)
  {
-    return lowercase_sequence;
+    if (builder_p != NULL)
    {
      ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
    }
-  int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges);
+    return lowercase_cp;
  }
  JERRY_ASSERT (cp < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
  int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_unicode_lower_case_ranges);
  for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
  {
-    int range_length = lit_lower_case_range_lengths[j] - 1;
+    JERRY_ASSERT (lit_unicode_lower_case_range_lengths[j] > 0);
-    ecma_char_t start_point = lit_lower_case_ranges[i];
+    uint32_t range_length = (uint32_t) (lit_unicode_lower_case_range_lengths[j] - 1);
    lit_code_point_t start_point = lit_unicode_lower_case_ranges[i];
-    if (start_point <= character && character <= start_point + range_length)
+    if (start_point <= cp && cp <= start_point + range_length)
    {
-      output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point));
+      lowercase_cp = lit_unicode_lower_case_ranges[i + 1] + (cp - start_point);
-      return 1;
+      if (builder_p != NULL)
    }
  }
  lowercase_sequence = search_in_conversion_table (character,
                                                   output_buffer_p,
                                                   lit_lower_case_conversions,
                                                   lit_lower_case_conversion_counters);
  if (lowercase_sequence != 0)
      {
-    return lowercase_sequence;
+        ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
      }
      return lowercase_cp;
    }
  }
  return lit_search_in_conversion_table ((ecma_char_t) cp,
                                         builder_p,
                                         lit_unicode_lower_case_conversions,
                                         lit_unicode_lower_case_conversion_counters);
 #else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
  if (builder_p != NULL)
  {
    ecma_stringbuilder_append_codepoint (builder_p, cp);
  }
  return cp;
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
  output_buffer_p[0] = character;
  return 1;
 } /* lit_char_to_lower_case */
 /**
- * Returns the uppercase character sequence of an ecma character.
+ * Append the converted uppercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
 *
- * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
+ * @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
- *
+ *         converted code point - otherwise
 * @return the length of the uppercase character sequence
 *         which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
 */
-ecma_length_t
+lit_code_point_t
-lit_char_to_upper_case (ecma_char_t character, /**< input character value */
+lit_char_to_upper_case (lit_code_point_t cp, /**< code point */
-                        ecma_char_t *output_buffer_p, /**< buffer for the result characters */
+                        ecma_stringbuilder_t *builder_p) /**< string builder */
                        ecma_length_t buffer_size) /**< buffer size */
 {
-  JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
+  if (cp >= LIT_CHAR_LOWERCASE_A && cp <= LIT_CHAR_LOWERCASE_Z)
  if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
  {
-    output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
+    lit_utf8_byte_t uppercase_char = (lit_utf8_byte_t) (cp - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
-    return 1;
+
    if (builder_p != NULL)
    {
      ecma_stringbuilder_append_byte (builder_p, uppercase_char);
    }
    return uppercase_char;
  }
 #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
  lit_code_point_t uppercase_cp = lit_search_in_bidirectional_conversion_tables (cp, false);
-  ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false);
+  if (uppercase_cp != LIT_INVALID_CP)
  if (uppercase_sequence != 0)
  {
-    return uppercase_sequence;
+    if (builder_p != NULL)
    {
      ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
    }
-  int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges);
+    return uppercase_cp;
  }
  int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_unicode_upper_case_special_ranges);
  for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
  {
-    int range_length = lit_upper_case_special_range_lengths[j];
+    uint32_t range_length = lit_unicode_upper_case_special_range_lengths[j];
-    ecma_char_t start_point = lit_upper_case_special_ranges[i];
+    ecma_char_t start_point = lit_unicode_upper_case_special_ranges[i];
-    if (start_point <= character && character <= start_point + range_length)
+    if (start_point <= cp && cp <= start_point + range_length)
    {
-      output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point));
+      if (builder_p != NULL)
      output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]);
      return 2;
    }
  }
  uppercase_sequence = search_in_conversion_table (character,
                                                   output_buffer_p,
                                                   lit_upper_case_conversions,
                                                   lit_upper_case_conversion_counters);
  if (uppercase_sequence != 0)
      {
-    return uppercase_sequence;
+        uppercase_cp = lit_unicode_upper_case_special_ranges[i + 1] + (cp - start_point);
        ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
        ecma_stringbuilder_append_codepoint (builder_p, lit_unicode_upper_case_special_ranges[i + 2]);
      }
      return LIT_MULTIPLE_CU;
    }
  }
  return lit_search_in_conversion_table ((ecma_char_t) cp,
                                         builder_p,
                                         lit_unicode_upper_case_conversions,
                                         lit_unicode_upper_case_conversion_counters);
 #else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
  if (builder_p != NULL)
  {
    ecma_stringbuilder_append_codepoint (builder_p, cp);
  }
  return cp;
 #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
  output_buffer_p[0] = character;
  return 1;
 } /* lit_char_to_upper_case */
@@ -18,6 +18,16 @@
 #include "lit-globals.h"
 /**
 * Invalid character code point
 */
 #define LIT_INVALID_CP 0xFFFFFFFF
 /**
 * Result of lit_char_to_lower_case/lit_char_to_upper_case consist more than of a single code unit
 */
 #define LIT_MULTIPLE_CU 0xFFFFFFFE
 /*
 * Format control characters (ECMA-262 v5, Table 1)
 */
@@ -234,12 +244,7 @@ bool lit_char_is_word_char (lit_code_point_t c);
 * Utility functions for uppercasing / lowercasing
 */
-/**
+lit_code_point_t lit_char_to_lower_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
- * Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
+lit_code_point_t lit_char_to_upper_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
 */
 #define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
 ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
 ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
 #endif /* !LIT_CHAR_HELPERS_H */
@@ -0,0 +1,30 @@
 /* Copyright JS Foundation and other contributors, http://js.foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* This file is automatically generated by the gen-unicode.py script
 * from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
 /* Contains start points of character case ranges (these are bidirectional conversions). */
 static const uint32_t lit_unicode_character_case_ranges_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x010400, 0x010428, 0x0104b0, 0x0104d8, 0x010c80, 0x010cc0, 0x0118a0, 0x0118c0, 0x016e40, 0x016e60,
  0x01e900, 0x01e922
 };
 /* Interval lengths of start points in `character_case_ranges` table. */
 static const uint16_t lit_unicode_character_case_range_lengths_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x000028, 0x000024, 0x000033, 0x000020, 0x000020, 0x000022
 };
@@ -14,10 +14,10 @@
 */
 /* This file is automatically generated by the gen-unicode.py script
- * from UnicodeData-13.0.0d6.txt and SpecialCasing-13.0.0d1.txt files. Do not edit! */
+ * from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
 /* Contains start points of character case ranges (these are bidirectional conversions). */
-static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_character_case_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x00c0, 0x00e0, 0x00d8, 0x00f8, 0x0189, 0x0256, 0x01b1, 0x028a, 0x0388, 0x03ad,
  0x038e, 0x03cd, 0x0391, 0x03b1, 0x03a3, 0x03c3, 0x03fd, 0x037b, 0x0400, 0x0450,
@@ -30,7 +30,7 @@ static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
 };
 /* Interval lengths of start points in `character_case_ranges` table. */
-static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0017, 0x0007, 0x0002, 0x0002, 0x0003, 0x0002, 0x0011, 0x0009, 0x0003, 0x0010,
  0x0020, 0x0026, 0x0026, 0x0050, 0x0006, 0x002b, 0x0003, 0x0008, 0x0006, 0x0008,
@@ -39,7 +39,7 @@ static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
 };
 /* Contains the start points of bidirectional conversion ranges. */
-static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x0100, 0x0132, 0x0139, 0x014a, 0x0179, 0x0182, 0x0187, 0x018b, 0x0191, 0x0198,
  0x01a0, 0x01a7, 0x01ac, 0x01af, 0x01b3, 0x01b8, 0x01bc, 0x01cd, 0x01de, 0x01f4,
@@ -50,7 +50,7 @@ static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
 };
 /* Interval lengths of start points in `character_pair_ranges` table. */
-static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0030, 0x0006, 0x0010, 0x002e, 0x0006, 0x0004, 0x0002, 0x0002, 0x0002, 0x0002,
  0x0006, 0x0002, 0x0002, 0x0002, 0x0004, 0x0002, 0x0002, 0x0010, 0x0012, 0x0002,
@@ -61,7 +61,7 @@ static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
 };
 /* Contains lower/upper case bidirectional conversion pairs. */
-static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_character_pairs[] JERRY_ATTR_CONST_DATA =
 {
  0x0178, 0x00ff, 0x0181, 0x0253, 0x0186, 0x0254, 0x018e, 0x01dd, 0x018f, 0x0259,
  0x0190, 0x025b, 0x0193, 0x0260, 0x0194, 0x0263, 0x0196, 0x0269, 0x0197, 0x0268,
@@ -81,20 +81,20 @@ static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
 /* Contains start points of one-to-two uppercase ranges where the second character
 * is always the same.
 */
-static const uint16_t lit_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x1f80, 0x1f08, 0x0399, 0x1f88, 0x1f08, 0x0399, 0x1f90, 0x1f28, 0x0399, 0x1f98,
  0x1f28, 0x0399, 0x1fa0, 0x1f68, 0x0399, 0x1fa8, 0x1f68, 0x0399
 };
 /* Interval lengths for start points in `upper_case_special_ranges` table. */
-static const uint8_t lit_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0007, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007
 };
 /* Contains start points of lowercase ranges. */
-static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
 {
  0x1e96, 0x1e96, 0x1f80, 0x1f80, 0x1f88, 0x1f80, 0x1f90, 0x1f90, 0x1f98, 0x1f90,
  0x1fa0, 0x1fa0, 0x1fa8, 0x1fa0, 0x1fb2, 0x1fb2, 0x1fb6, 0x1fb6, 0x1fc2, 0x1fc2,
@@ -103,14 +103,14 @@ static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
 };
 /* Interval lengths for start points in `lower_case_ranges` table. */
-static const uint8_t lit_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0005, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0003, 0x0002, 0x0003,
  0x0002, 0x0002, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0007, 0x0005
 };
 /* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */
-static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
 {
  0x00df, 0x00df, 0x0149, 0x0149, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc,
  0x01f0, 0x01f0, 0x01f2, 0x01f3, 0x0390, 0x0390, 0x03b0, 0x03b0, 0x03f4, 0x03b8,
@@ -120,13 +120,13 @@ static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
 };
 /* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */
-static const uint8_t lit_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
 {
  0x0016, 0x0001, 0x0000
 };
 /* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */
-static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
 {
  0x00b5, 0x039c, 0x0130, 0x0130, 0x0131, 0x0049, 0x017f, 0x0053, 0x01c5, 0x01c4,
  0x01c8, 0x01c7, 0x01cb, 0x01ca, 0x01f2, 0x01f1, 0x0345, 0x0399, 0x03c2, 0x03a3,
@@ -157,7 +157,7 @@ static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
 };
 /* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */
-static const uint8_t lit_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
 {
  0x001c, 0x002c, 0x0010
 };
@@ -0,0 +1,129 @@
 /* Copyright JS Foundation and other contributors, http://js.foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* This file is automatically generated by the gen-unicode.py script
 * from DerivedCoreProperties.txt. Do not edit! */
 /**
 * Character interval starting points for ID_Start.
 */
 static const uint32_t lit_unicode_id_start_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x010000, 0x01000d, 0x010028, 0x01003c, 0x01003f, 0x010050, 0x010080, 0x010140, 0x010280, 0x0102a0,
  0x010300, 0x01032d, 0x010350, 0x010380, 0x0103a0, 0x0103c8, 0x0103d1, 0x010400, 0x0104b0, 0x0104d8,
  0x010500, 0x010530, 0x010600, 0x010740, 0x010760, 0x010800, 0x01080a, 0x010837, 0x01083f, 0x010860,
  0x010880, 0x0108e0, 0x0108f4, 0x010900, 0x010920, 0x010980, 0x0109be, 0x010a10, 0x010a15, 0x010a19,
  0x010a60, 0x010a80, 0x010ac0, 0x010ac9, 0x010b00, 0x010b40, 0x010b60, 0x010b80, 0x010c00, 0x010c80,
  0x010cc0, 0x010d00, 0x010e80, 0x010eb0, 0x010f00, 0x010f30, 0x010fb0, 0x010fe0, 0x011003, 0x011083,
  0x0110d0, 0x011103, 0x011150, 0x011183, 0x0111c1, 0x011200, 0x011213, 0x011280, 0x01128a, 0x01128f,
  0x01129f, 0x0112b0, 0x011305, 0x01130f, 0x011313, 0x01132a, 0x011332, 0x011335, 0x01135d, 0x011400,
  0x011447, 0x01145f, 0x011480, 0x0114c4, 0x011580, 0x0115d8, 0x011600, 0x011680, 0x011700, 0x011800,
  0x0118a0, 0x0118ff, 0x01190c, 0x011915, 0x011918, 0x0119a0, 0x0119aa, 0x011a0b, 0x011a5c, 0x011ac0,
  0x011c00, 0x011c0a, 0x011c72, 0x011d00, 0x011d08, 0x011d0b, 0x011d60, 0x011d67, 0x011d6a, 0x011ee0,
  0x012000, 0x012400, 0x012480, 0x013000, 0x014400, 0x016800, 0x016a40, 0x016ad0, 0x016b00, 0x016b40,
  0x016b63, 0x016b7d, 0x016e40, 0x016f00, 0x016f93, 0x016fe0, 0x017000, 0x018800, 0x018d00, 0x01b000,
  0x01b150, 0x01b164, 0x01b170, 0x01bc00, 0x01bc70, 0x01bc80, 0x01bc90, 0x01d400, 0x01d456, 0x01d49e,
  0x01d4a5, 0x01d4a9, 0x01d4ae, 0x01d4bd, 0x01d4c5, 0x01d507, 0x01d50d, 0x01d516, 0x01d51e, 0x01d53b,
  0x01d540, 0x01d54a, 0x01d552, 0x01d6a8, 0x01d6c2, 0x01d6dc, 0x01d6fc, 0x01d716, 0x01d736, 0x01d750,
  0x01d770, 0x01d78a, 0x01d7aa, 0x01d7c4, 0x01e100, 0x01e137, 0x01e2c0, 0x01e800, 0x01e900, 0x01ee00,
  0x01ee05, 0x01ee21, 0x01ee29, 0x01ee34, 0x01ee4d, 0x01ee51, 0x01ee61, 0x01ee67, 0x01ee6c, 0x01ee74,
  0x01ee79, 0x01ee80, 0x01ee8b, 0x01eea1, 0x01eea5, 0x01eeab, 0x020000, 0x02a700, 0x02b740, 0x02b820,
  0x02ceb0, 0x02f800, 0x030000
 };
 /**
 * Character interval lengths for ID_Start.
 */
 static const uint16_t lit_unicode_id_start_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x00000b, 0x000019, 0x000012, 0x000001, 0x00000e, 0x00000d, 0x00007a, 0x000034, 0x00001c, 0x000030,
  0x00001f, 0x00001d, 0x000025, 0x00001d, 0x000023, 0x000007, 0x000004, 0x00009d, 0x000023, 0x000023,
  0x000027, 0x000033, 0x000136, 0x000015, 0x000007, 0x000005, 0x00002b, 0x000001, 0x000016, 0x000016,
  0x00001e, 0x000012, 0x000001, 0x000015, 0x000019, 0x000037, 0x000001, 0x000003, 0x000002, 0x00001c,
  0x00001c, 0x00001c, 0x000007, 0x00001b, 0x000035, 0x000015, 0x000012, 0x000011, 0x000048, 0x000032,
  0x000032, 0x000023, 0x000029, 0x000001, 0x00001c, 0x000015, 0x000014, 0x000016, 0x000034, 0x00002c,
  0x000018, 0x000023, 0x000022, 0x00002f, 0x000003, 0x000011, 0x000018, 0x000006, 0x000003, 0x00000e,
  0x000009, 0x00002e, 0x000007, 0x000001, 0x000015, 0x000006, 0x000001, 0x000004, 0x000004, 0x000034,
  0x000003, 0x000002, 0x00002f, 0x000001, 0x00002e, 0x000003, 0x00002f, 0x00002a, 0x00001a, 0x00002b,
  0x00003f, 0x000007, 0x000007, 0x000001, 0x000017, 0x000007, 0x000026, 0x000027, 0x00002d, 0x000038,
  0x000008, 0x000024, 0x00001d, 0x000006, 0x000001, 0x000025, 0x000005, 0x000001, 0x00001f, 0x000012,
  0x000399, 0x00006e, 0x0000c3, 0x00042e, 0x000246, 0x000238, 0x00001e, 0x00001d, 0x00002f, 0x000003,
  0x000014, 0x000012, 0x00003f, 0x00004a, 0x00000c, 0x000001, 0x0017f7, 0x0004d5, 0x000008, 0x00011e,
  0x000002, 0x000003, 0x00018b, 0x00006a, 0x00000c, 0x000008, 0x000009, 0x000054, 0x000046, 0x000001,
  0x000001, 0x000003, 0x00000b, 0x000006, 0x000040, 0x000003, 0x000007, 0x000006, 0x00001b, 0x000003,
  0x000004, 0x000006, 0x000153, 0x000018, 0x000018, 0x00001e, 0x000018, 0x00001e, 0x000018, 0x00001e,
  0x000018, 0x00001e, 0x000018, 0x000007, 0x00002c, 0x000006, 0x00002b, 0x0000c4, 0x000043, 0x000003,
  0x00001a, 0x000001, 0x000009, 0x000003, 0x000002, 0x000001, 0x000001, 0x000003, 0x000006, 0x000003,
  0x000003, 0x000009, 0x000010, 0x000002, 0x000004, 0x000010, 0x00a6dd, 0x001034, 0x0000dd, 0x001681,
  0x001d30, 0x00021d, 0x00134a
 };
 /**
 * Non-interval characters for ID_Start.
 */
 static const uint32_t lit_unicode_id_start_chars_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x010808, 0x01083c, 0x010a00, 0x010f27, 0x011144, 0x011147, 0x011176, 0x0111da, 0x0111dc, 0x011288,
  0x01133d, 0x011350, 0x0114c7, 0x011644, 0x0116b8, 0x011909, 0x01193f, 0x011941, 0x0119e1, 0x0119e3,
  0x011a00, 0x011a3a, 0x011a50, 0x011a9d, 0x011c40, 0x011d46, 0x011d98, 0x011fb0, 0x016f50, 0x016fe3,
  0x01d4a2, 0x01d4bb, 0x01d546, 0x01e14e, 0x01e94b, 0x01ee24, 0x01ee27, 0x01ee39, 0x01ee3b, 0x01ee42,
  0x01ee47, 0x01ee49, 0x01ee4b, 0x01ee54, 0x01ee57, 0x01ee59, 0x01ee5b, 0x01ee5d, 0x01ee5f, 0x01ee64,
  0x01ee7e
 };
 /**
 * Character interval starting points for ID_Continue.
 */
 static const uint32_t lit_unicode_id_continue_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x010376, 0x0104a0, 0x010a01, 0x010a05, 0x010a0c, 0x010a38, 0x010ae5, 0x010d24, 0x010d30, 0x010eab,
  0x010f46, 0x011000, 0x011038, 0x011066, 0x01107f, 0x0110b0, 0x0110f0, 0x011100, 0x011127, 0x011136,
  0x011145, 0x011180, 0x0111b3, 0x0111c9, 0x0111ce, 0x01122c, 0x0112df, 0x0112f0, 0x011300, 0x01133b,
  0x01133e, 0x011347, 0x01134b, 0x011362, 0x011366, 0x011370, 0x011435, 0x011450, 0x0114b0, 0x0114d0,
  0x0115af, 0x0115b8, 0x0115dc, 0x011630, 0x011650, 0x0116ab, 0x0116c0, 0x01171d, 0x011730, 0x01182c,
  0x0118e0, 0x011930, 0x011937, 0x01193b, 0x011942, 0x011950, 0x0119d1, 0x0119da, 0x011a01, 0x011a33,
  0x011a3b, 0x011a51, 0x011a8a, 0x011c2f, 0x011c38, 0x011c50, 0x011c92, 0x011ca9, 0x011d31, 0x011d3c,
  0x011d3f, 0x011d50, 0x011d8a, 0x011d90, 0x011d93, 0x011da0, 0x011ef3, 0x016a60, 0x016af0, 0x016b30,
  0x016b50, 0x016f51, 0x016f8f, 0x016ff0, 0x01bc9d, 0x01d165, 0x01d16d, 0x01d17b, 0x01d185, 0x01d1aa,
  0x01d242, 0x01d7ce, 0x01da00, 0x01da3b, 0x01da9b, 0x01daa1, 0x01e000, 0x01e008, 0x01e01b, 0x01e023,
  0x01e026, 0x01e130, 0x01e140, 0x01e2ec, 0x01e8d0, 0x01e944, 0x01e950, 0x01fbf0, 0x0e0100
 };
 /**
 * Character interval lengths for ID_Continue.
 */
 static const uint16_t lit_unicode_id_continue_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x000004, 0x000009, 0x000002, 0x000001, 0x000003, 0x000002, 0x000001, 0x000003, 0x000009, 0x000001,
  0x00000a, 0x000002, 0x00000e, 0x000009, 0x000003, 0x00000a, 0x000009, 0x000002, 0x00000d, 0x000009,
  0x000001, 0x000002, 0x00000d, 0x000003, 0x00000b, 0x00000b, 0x00000b, 0x000009, 0x000003, 0x000001,
  0x000006, 0x000001, 0x000002, 0x000001, 0x000006, 0x000004, 0x000011, 0x000009, 0x000013, 0x000009,
  0x000006, 0x000008, 0x000001, 0x000010, 0x000009, 0x00000c, 0x000009, 0x00000e, 0x000009, 0x00000e,
  0x000009, 0x000005, 0x000001, 0x000003, 0x000001, 0x000009, 0x000006, 0x000006, 0x000009, 0x000006,
  0x000003, 0x00000a, 0x00000f, 0x000007, 0x000007, 0x000009, 0x000015, 0x00000d, 0x000005, 0x000001,
  0x000006, 0x000009, 0x000004, 0x000001, 0x000004, 0x000009, 0x000003, 0x000009, 0x000004, 0x000006,
  0x000009, 0x000036, 0x000003, 0x000001, 0x000001, 0x000004, 0x000005, 0x000007, 0x000006, 0x000003,
  0x000002, 0x000031, 0x000036, 0x000031, 0x000004, 0x00000e, 0x000006, 0x000010, 0x000006, 0x000001,
  0x000004, 0x000006, 0x000009, 0x00000d, 0x000006, 0x000006, 0x000009, 0x000009, 0x0000ef
 };
 /**
 * Non-interval characters for ID_Continue.
 */
 static const uint32_t lit_unicode_id_continue_chars_sup[] JERRY_ATTR_CONST_DATA =
 {
  0x0101fd, 0x0102e0, 0x010a3f, 0x011173, 0x01123e, 0x011357, 0x01145e, 0x011940, 0x0119e4, 0x011a47,
  0x011d3a, 0x011d47, 0x016f4f, 0x016fe4, 0x01da75, 0x01da84
 };
@@ -14,15 +14,12 @@
 */
 /* This file is automatically generated by the gen-unicode.py script
- * from UnicodeData-13.0.0d6.txt. Do not edit! */
+ * from DerivedCoreProperties.txt. Do not edit! */
 /**
- * Character interval starting points for the unicode letters.
+ * Character interval starting points for ID_Start.
 *
 * The characters covered by these intervals are from
 * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
 */
-static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_start_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x00c0, 0x00d8, 0x00f8, 0x01f8, 0x02c6, 0x02e0, 0x0370, 0x0376, 0x037a, 0x0388,
  0x038e, 0x03a3, 0x03f7, 0x048a, 0x0531, 0x0560, 0x05d0, 0x05ef, 0x0620, 0x066e,
@@ -39,46 +36,43 @@ static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
  0x10fc, 0x11fc, 0x124a, 0x1250, 0x125a, 0x1260, 0x128a, 0x1290, 0x12b2, 0x12b8,
  0x12c2, 0x12c8, 0x12d8, 0x1312, 0x1318, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x1501,
  0x1601, 0x166f, 0x1681, 0x16a0, 0x16ee, 0x1700, 0x170e, 0x1720, 0x1740, 0x1760,
-  0x176e, 0x1780, 0x1820, 0x1880, 0x1887, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980,
+  0x176e, 0x1780, 0x1820, 0x1880, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980, 0x19b0,
-  0x19b0, 0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d,
+  0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d, 0x1c5a,
-  0x1c5a, 0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00,
+  0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00, 0x1f18,
-  0x1f18, 0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0,
+  0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0, 0x1fd6,
-  0x1fd6, 0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2119, 0x212a, 0x212f, 0x213c,
+  0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2118, 0x212a, 0x213c, 0x2145, 0x2160,
-  0x2145, 0x2160, 0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80,
+  0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80, 0x2da0, 0x2da8,
-  0x2da0, 0x2da8, 0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021,
+  0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021, 0x3031, 0x3038,
-  0x3031, 0x3038, 0x3041, 0x309d, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0,
+  0x3041, 0x309b, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0, 0x3400, 0x3500,
-  0x3400, 0x3500, 0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00,
+  0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00, 0x3e00, 0x3f00,
-  0x3e00, 0x3f00, 0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700,
+  0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900,
-  0x4800, 0x4900, 0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100,
+  0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100, 0x5200, 0x5300,
-  0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00,
+  0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-  0x5c00, 0x5d00, 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500,
+  0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, 0x6700,
-  0x6600, 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
+  0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, 0x7000, 0x7100,
-  0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900,
+  0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900, 0x7a00, 0x7b00,
-  0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300,
+  0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300, 0x8400, 0x8500,
-  0x8400, 0x8500, 0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00,
+  0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00, 0x8e00, 0x8f00,
-  0x8e00, 0x8f00, 0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700,
+  0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700, 0x9800, 0x9900,
-  0x9800, 0x9900, 0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100,
+  0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100, 0xa200, 0xa300,
-  0xa200, 0xa300, 0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f,
+  0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f, 0xa6a0, 0xa717,
-  0xa6a0, 0xa717, 0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840,
+  0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840, 0xa882, 0xa8f2,
-  0xa882, 0xa8f2, 0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa,
+  0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa, 0xaa00, 0xaa40,
-  0xaa00, 0xaa40, 0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2,
+  0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2, 0xab01, 0xab09,
-  0xab01, 0xab09, 0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00,
+  0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00, 0xae00, 0xaf00,
-  0xae00, 0xaf00, 0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700,
+  0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700, 0xb800, 0xb900,
-  0xb800, 0xb900, 0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100,
+  0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100, 0xc200, 0xc300,
-  0xc200, 0xc300, 0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00,
+  0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00, 0xcc00, 0xcd00,
-  0xcc00, 0xcd00, 0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500,
+  0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500, 0xd600, 0xd700,
-  0xd600, 0xd700, 0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f,
+  0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f, 0xfb2a, 0xfb38,
-  0xfb2a, 0xfb38, 0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0,
+  0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0, 0xfe70, 0xfe76,
-  0xfe70, 0xfe76, 0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
+  0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
 };
 /**
- * Character lengths for the unicode letters.
+ * Character interval lengths for ID_Start.
 *
 * The characters covered by these intervals are from
 * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
 */
-static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_id_start_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x0016, 0x001e, 0x00ff, 0x00c9, 0x000b, 0x0004, 0x0004, 0x0001, 0x0003, 0x0002,
  0x0013, 0x0052, 0x008a, 0x00a5, 0x0025, 0x0028, 0x001a, 0x0003, 0x002a, 0x0001,
@@ -95,17 +89,17 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
  0x00ff, 0x004c, 0x0003, 0x0006, 0x0003, 0x0028, 0x0003, 0x0020, 0x0003, 0x0006,
  0x0003, 0x000e, 0x0038, 0x0003, 0x0042, 0x000f, 0x0055, 0x0005, 0x00ff, 0x00ff,
  0x006b, 0x0010, 0x0019, 0x004a, 0x000a, 0x000c, 0x0003, 0x0011, 0x0011, 0x000c,
-  0x0002, 0x0033, 0x0058, 0x0004, 0x0021, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b,
+  0x0002, 0x0033, 0x0058, 0x0028, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b, 0x0019,
-  0x0019, 0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002,
+  0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002, 0x0023,
-  0x0023, 0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015,
+  0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015, 0x0005,
-  0x0005, 0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003,
+  0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003, 0x0005,
-  0x0005, 0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0004, 0x0003, 0x000a, 0x0003,
+  0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0005, 0x000f, 0x0003, 0x0004, 0x0028,
-  0x0004, 0x0028, 0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016,
+  0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016, 0x0006, 0x0006,
-  0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008,
+  0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008, 0x0004, 0x0004,
-  0x0004, 0x0004, 0x0055, 0x0002, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f,
+  0x0055, 0x0004, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
+  0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
@@ -113,29 +107,25 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff,
+  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e,
+  0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e, 0x004f, 0x0008,
-  0x004f, 0x0008, 0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033,
+  0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033, 0x0031, 0x0005,
-  0x0031, 0x0005, 0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004,
+  0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004, 0x0028, 0x0002,
-  0x0028, 0x0002, 0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002,
+  0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002, 0x0005, 0x0005,
-  0x0005, 0x0005, 0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff,
+  0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
-  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
+  0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00a3,
-  0x00ff, 0x00a3, 0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009,
+  0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009, 0x000c, 0x0004,
-  0x000c, 0x0004, 0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b,
+  0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b, 0x0004, 0x0086,
-  0x0004, 0x0086, 0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
+  0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
 };
 /**
- * Those unicode letter characters that are not inside any of
+ * Non-interval characters for ID_Start.
 * the intervals specified in lit_unicode_letter_interval_sps array.
 *
 * The characters are from the following Unicode categories:
 * Lu, Ll, Lt, Lm, Lo, Nl
 */
-static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_start_chars[] JERRY_ATTR_CONST_DATA =
 {
  0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5,
  0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2,
@@ -144,18 +134,13 @@ static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
  0x0ea5, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, 0x10c7, 0x10cd, 0x1258,
  0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1cfa, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe,
  0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2d27,
-  0x2d2d, 0x2d6f, 0x2e2f, 0x3400, 0x4e00, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0,
+  0x2d2d, 0x2d6f, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e
  0xaac2, 0xac00, 0xfb1d, 0xfb3e
 };
 /**
- * Character interval starting points for non-letter character
+ * Character interval starting points for ID_Continue.
 * that can be used as a non-first character of an identifier.
 *
 * The characters covered by these intervals are from
 * the following Unicode categories: Nd, Mn, Mc, Pc
 */
-static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_continue_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x0300, 0x0483, 0x0591, 0x05c1, 0x05c4, 0x0610, 0x064b, 0x06d6, 0x06df, 0x06e7,
  0x06ea, 0x06f0, 0x0730, 0x07a6, 0x07c0, 0x07eb, 0x0816, 0x081b, 0x0825, 0x0829,
@@ -167,8 +152,8 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
  0x0ce6, 0x0d00, 0x0d3b, 0x0d3e, 0x0d46, 0x0d4a, 0x0d62, 0x0d66, 0x0d81, 0x0dcf,
  0x0dd8, 0x0de6, 0x0df2, 0x0e34, 0x0e47, 0x0e50, 0x0eb4, 0x0ec8, 0x0ed0, 0x0f18,
  0x0f20, 0x0f3e, 0x0f71, 0x0f86, 0x0f8d, 0x0f99, 0x102b, 0x1040, 0x1056, 0x105e,
-  0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1712, 0x1732, 0x1752, 0x1772,
+  0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1369, 0x1712, 0x1732, 0x1752,
-  0x17b4, 0x17e0, 0x180b, 0x1810, 0x1885, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
+  0x1772, 0x17b4, 0x17e0, 0x180b, 0x1810, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
  0x1a55, 0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1abf, 0x1b00, 0x1b34, 0x1b50, 0x1b6b,
  0x1b80, 0x1ba1, 0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf7,
  0x1dc0, 0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099,
@@ -179,13 +164,9 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
 };
 /**
- * Character interval lengths for non-letter character
+ * Character interval lengths for ID_Continue.
 * that can be used as a non-first character of an identifier.
 *
 * The characters covered by these intervals are from
 * the following Unicode categories: Nd, Mn, Mc, Pc
 */
-static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_id_continue_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x006f, 0x0004, 0x002c, 0x0001, 0x0001, 0x000a, 0x001e, 0x0006, 0x0005, 0x0001,
  0x0003, 0x0009, 0x001a, 0x000a, 0x0009, 0x0008, 0x0003, 0x0008, 0x0002, 0x0004,
@@ -197,8 +178,8 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
  0x0009, 0x0003, 0x0001, 0x0006, 0x0002, 0x0003, 0x0001, 0x0009, 0x0002, 0x0005,
  0x0007, 0x0009, 0x0001, 0x0006, 0x0007, 0x0009, 0x0008, 0x0005, 0x0009, 0x0001,
  0x0009, 0x0001, 0x0013, 0x0001, 0x000a, 0x0023, 0x0013, 0x0009, 0x0003, 0x0002,
-  0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0002, 0x0002, 0x0001, 0x0001,
+  0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0008, 0x0002, 0x0002, 0x0001,
-  0x001f, 0x0009, 0x0002, 0x0009, 0x0001, 0x000b, 0x000b, 0x0009, 0x0009, 0x0004,
+  0x0001, 0x001f, 0x0009, 0x0002, 0x0009, 0x000b, 0x000b, 0x0009, 0x000a, 0x0004,
  0x0009, 0x001c, 0x000a, 0x0009, 0x000d, 0x0001, 0x0004, 0x0010, 0x0009, 0x0008,
  0x0002, 0x000c, 0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002,
  0x0039, 0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001,
@@ -209,45 +190,65 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
 };
 /**
- * Those non-letter characters that can be used as a non-first
+ * Non-interval characters for ID_Continue.
 * character of an identifier and not included in any of the intervals
 * specified in lit_unicode_non_letter_ident_part_interval_sps array.
 *
 * The characters are from the following Unicode categories:
 * Nd, Mn, Mc, Pc
 */
-static const uint16_t lit_unicode_non_letter_ident_part_chars[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_id_continue_chars[] JERRY_ATTR_CONST_DATA =
 {
-  0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe, 0x0a3c, 0x0a51,
+  0x00b7, 0x0387, 0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe,
-  0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca, 0x0dd6, 0x0e31,
+  0x0a3c, 0x0a51, 0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca,
-  0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced, 0x1cf4, 0x2054,
+  0x0dd6, 0x0e31, 0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced,
-  0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5, 0xaa43, 0xaab0,
+  0x1cf4, 0x2054, 0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5,
-  0xaac1, 0xfb1e, 0xff3f
+  0xaa43, 0xaab0, 0xaac1, 0xfb1e, 0xff3f
 };
 #if ENABLED (JERRY_ESNEXT)
 /**
- * Unicode separator character interval starting points from Unicode category: Zs
+ * Character interval starting points for White_Space.
 */
-static const uint16_t lit_unicode_separator_char_interval_sps[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x2000
 };
 /**
- * Unicode separator character interval lengths from Unicode category: Zs
+ * Character interval lengths for White_Space.
 */
-static const uint8_t lit_unicode_separator_char_interval_lengths[] JERRY_ATTR_CONST_DATA =
+static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x000a
 };
 /**
 * Non-interval characters for White_Space.
 */
 static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
 {
  0x00a0, 0x1680, 0x202f, 0x205f, 0x3000
 };
 #else /* !ENABLED (JERRY_ESNEXT) */
 /**
 * Character interval starting points for White_Space.
 */
 static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
 {
  0x2000
 };
 /**
 * Character interval lengths for White_Space.
 */
 static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
 {
  0x000b
 };
 /**
- * Unicode separator characters that are not in the
+ * Non-interval characters for White_Space.
 * lit_unicode_separator_char_intervals array.
 *
 * Unicode category: Zs
 */
-static const uint16_t lit_unicode_separator_chars[] JERRY_ATTR_CONST_DATA =
+static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
 {
  0x1680, 0x180e, 0x202f, 0x205f, 0x3000
 };
 #endif /* ENABLED (JERRY_ESNEXT) */
@@ -0,0 +1,66 @@
 // Copyright JS Foundation and other contributors, http://js.foundation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 let start = 0x10000
 let end = 0x10FFFF
 const lower_expected = [66560, 66561, 66562, 66563, 66564, 66565, 66566, 66567, 66568, 66569, 66570, 66571, 66572,
                        66573, 66574, 66575, 66576, 66577, 66578, 66579, 66580, 66581, 66582, 66583, 66584, 66585,
                        66586, 66587, 66588, 66589, 66590, 66591, 66592, 66593, 66594, 66595, 66596, 66597, 66598,
                        66599, 66736, 66737, 66738, 66739, 66740, 66741, 66742, 66743, 66744, 66745, 66746, 66747,
                        66748, 66749, 66750, 66751, 66752, 66753, 66754, 66755, 66756, 66757, 66758, 66759, 66760,
                        66761, 66762, 66763, 66764, 66765, 66766, 66767, 66768, 66769, 66770, 66771, 68736, 68737,
                        68738, 68739, 68740, 68741, 68742, 68743, 68744, 68745, 68746, 68747, 68748, 68749, 68750,
                        68751, 68752, 68753, 68754, 68755, 68756, 68757, 68758, 68759, 68760, 68761, 68762, 68763,
                        68764, 68765, 68766, 68767, 68768, 68769, 68770, 68771, 68772, 68773, 68774, 68775, 68776,
                        68777, 68778, 68779, 68780, 68781, 68782, 68783, 68784, 68785, 68786, 71840, 71841, 71842,
                        71843, 71844, 71845, 71846, 71847, 71848, 71849, 71850, 71851, 71852, 71853, 71854, 71855,
                        71856, 71857, 71858, 71859, 71860, 71861, 71862, 71863, 71864, 71865, 71866, 71867, 71868,
                        71869, 71870, 71871, 93760, 93761, 93762, 93763, 93764, 93765, 93766, 93767, 93768, 93769,
                        93770, 93771, 93772, 93773, 93774, 93775, 93776, 93777, 93778, 93779, 93780, 93781, 93782,
                        93783, 93784, 93785, 93786, 93787, 93788, 93789, 93790, 93791, 125184, 125185, 125186, 125187,
                        125188, 125189, 125190, 125191, 125192, 125193, 125194, 125195, 125196, 125197, 125198, 125199,
                        125200, 125201, 125202, 125203, 125204, 125205, 125206, 125207, 125208, 125209, 125210, 125211,
                        125212, 125213, 125214, 125215, 125216, 125217];
 const upper_expected = [66600, 66601, 66602, 66603, 66604, 66605, 66606, 66607, 66608, 66609, 66610, 66611, 66612,
                        66613, 66614, 66615, 66616, 66617, 66618, 66619, 66620, 66621, 66622, 66623, 66624, 66625,
                        66626, 66627, 66628, 66629, 66630, 66631, 66632, 66633, 66634, 66635, 66636, 66637, 66638,
                        66639, 66776, 66777, 66778, 66779, 66780, 66781, 66782, 66783, 66784, 66785, 66786, 66787,
                        66788, 66789, 66790, 66791, 66792, 66793, 66794, 66795, 66796, 66797, 66798, 66799, 66800,
                        66801, 66802, 66803, 66804, 66805, 66806, 66807, 66808, 66809, 66810, 66811, 68800, 68801,
                        68802, 68803, 68804, 68805, 68806, 68807, 68808, 68809, 68810, 68811, 68812, 68813, 68814,
                        68815, 68816, 68817, 68818, 68819, 68820, 68821, 68822, 68823, 68824, 68825, 68826, 68827,
                        68828, 68829, 68830, 68831, 68832, 68833, 68834, 68835, 68836, 68837, 68838, 68839, 68840,
                        68841, 68842, 68843, 68844, 68845, 68846, 68847, 68848, 68849, 68850, 71872, 71873, 71874,
                        71875, 71876, 71877, 71878, 71879, 71880, 71881, 71882, 71883, 71884, 71885, 71886, 71887,
                        71888, 71889, 71890, 71891, 71892, 71893, 71894, 71895, 71896, 71897, 71898, 71899, 71900,
                        71901, 71902, 71903, 93792, 93793, 93794, 93795, 93796, 93797, 93798, 93799, 93800, 93801,
                        93802, 93803, 93804, 93805, 93806, 93807, 93808, 93809, 93810, 93811, 93812, 93813, 93814,
                        93815, 93816, 93817, 93818, 93819, 93820, 93821, 93822, 93823, 125218, 125219, 125220, 125221,
                        125222, 125223, 125224, 125225, 125226, 125227, 125228, 125229, 125230, 125231, 125232, 125233,
                        125234, 125235, 125236, 125237, 125238, 125239, 125240, 125241, 125242, 125243, 125244, 125245,
                        125246, 125247, 125248, 125249, 125250, 125251];
 for (let iter of lower_expected) {
  let cp = String.fromCodePoint(iter);
  assert(cp !== cp.toLowerCase());
 }
 for (let iter of upper_expected) {
  let cp = String.fromCodePoint(iter);
  assert(cp !== cp.toUpperCase());
 }
 assert("\ud801A".toLowerCase() === "\ud801a");
@@ -0,0 +1,32 @@
 // Copyright JS Foundation and other contributors, http://js.foundation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 var \u{102C0} = 2;
 assert(\u{102C0} === 2);
 var o1 = { \u{102C0} : 3 };
 assert(o1['\ud800\udec0'] === 3);
 var o2 = { '\ud800\udec0' : 4 };
 assert(o2.\u{102C0} === 4);
 try {
  eval('var ⸯ');
  assert(false);
 } catch(e) {
  assert(e instanceof SyntaxError);
 }
 var 𐋀 = 5;
 assert(𐋀 === 5);
@@ -0,0 +1,20 @@
 // Copyright JS Foundation and other contributors, http://js.foundation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
 // we must not do their conversion in JavaScript. We must also ignore
 // stray surrogates.
 assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
 assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
@@ -84,13 +84,6 @@ assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toLower
 assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase()
        == "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ");
 // Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
 // we must not do their conversion in JavaScript. We must also ignore
 // stray surrogates.
 assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
 assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
 // Conversion of non-string objects.
 assert (String.prototype.toUpperCase.call(true) == "TRUE");
@@ -123,11 +123,7 @@
  <test id="built-ins/String/prototype/normalize/return-normalized-string.js"><reason></reason></test>
  <test id="built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js"><reason></reason></test>
  <test id="built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional.js"><reason></reason></test>
  <test id="built-ins/String/prototype/toLocaleLowerCase/supplementary_plane.js"><reason></reason></test>
  <test id="built-ins/String/prototype/toLocaleUpperCase/supplementary_plane.js"><reason></reason></test>
  <test id="built-ins/String/prototype/toLowerCase/special_casing_conditional.js"><reason></reason></test>
  <test id="built-ins/String/prototype/toLowerCase/supplementary_plane.js"><reason></reason></test>
  <test id="built-ins/String/prototype/toUpperCase/supplementary_plane.js"><reason></reason></test>
  <test id="intl402/6.2.2_a.js"><reason></reason></test>
  <test id="intl402/6.2.2_b.js"><reason></reason></test>
  <test id="intl402/6.2.2_c.js"><reason></reason></test>
@@ -377,4 +373,16 @@
  <test id="language/statements/generators/prototype-value.js"><reason></reason></test>
  <test id="language/statements/let/syntax/identifier-let-disallowed-as-boundname.js"><reason></reason></test>
  <test id="language/statements/try/S12.14_A16_T4.js"><reason>ES2019 change: catch without parameter is allowed</reason></test>
  <test id="built-ins/Number/S9.3.1_A2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/Number/S9.3.1_A3_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/Number/S9.3.1_A3_T2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/RegExp/S15.10.2.12_A1_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/RegExp/S15.10.2.12_A2_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/String/prototype/trim/15.5.4.20-3-2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/String/prototype/trim/15.5.4.20-3-3.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/String/prototype/trim/15.5.4.20-3-4.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/String/prototype/trim/15.5.4.20-3-5.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/String/prototype/trim/15.5.4.20-3-6.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/parseFloat/S15.1.2.3_A2_T10.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
  <test id="built-ins/parseInt/S15.1.2.2_A2_T10.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
 </excludeList>
@@ -17,10 +17,10 @@
 from __future__ import print_function
 import argparse
 import bisect
 import csv
 import itertools
 import os
 import re
 import warnings
 from gen_c_source import LICENSE, format_code
@@ -28,268 +28,286 @@ from settings import PROJECT_DIR
 RANGES_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h')
 RANGES_SUP_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges-sup.inc.h')
 CONVERSIONS_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h')
 CONVERSIONS_SUP_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions-sup.inc.h')
 UNICODE_PLANE_TYPE_BASIC = 0
 UNICODE_PLANE_TYPE_SUPPLEMENTARY = 1
 # For ES5.1 profile we use a predefined subset of whitespace characters
 ES5_1_WHITE_SPACE_UNITS = [0x1680, 0x180e]
 ES5_1_WHITE_SPACE_UNITS.extend(range(0x2000, 0x200c))
 ES5_1_WHITE_SPACE_UNITS.extend([0x202f, 0x205f, 0x3000])
 # common code generation
 class UnicodeBasicSource(object):
    # pylint: disable=too-many-instance-attributes
    def __init__(self, filepath, character_type="uint16_t", length_type="uint8_t"):
        self._filepath = filepath
        self._header = [LICENSE, ""]
        self._data = []
        self._table_name_suffix = ""
        self.character_type = character_type
        self.length_type = length_type
-class UniCodeSource(object):
+        self._range_table_types = [self.character_type,
-    def __init__(self, filepath):
+                                   self.length_type,
-        self.__filepath = filepath
+                                   self.character_type]
-        self.__header = [LICENSE, ""]
+        self._range_table_names = ["interval_starts",
-        self.__data = []
+                                   "interval_lengths",
                                   "chars"]
        self._range_table_descriptions = ["Character interval starting points for",
                                          "Character interval lengths for",
                                          "Non-interval characters for"]
        self._conversion_range_types = [self.character_type,
                                        self.length_type]
        self._conversion_range_names = ["ranges",
                                        "range_lengths"]
    def complete_header(self, completion):
-        self.__header.append(completion)
+        self._header.append(completion)
-        self.__header.append("")  # for an extra empty line
+        self._header.append("")  # for an extra empty line
-    def add_table(self, table, table_name, table_type, table_descr):
+    def add_whitepace_range(self, category, categorizer, units):
-        self.__data.append(table_descr)
+        self._data.append("#if ENABLED (JERRY_ESNEXT)")
-        self.__data.append("static const %s lit_%s[] JERRY_ATTR_CONST_DATA =" % (table_type, table_name))
+        self.add_range(category, categorizer.create_tables(units))
-        self.__data.append("{")
+        self._data.append("#else /* !ENABLED (JERRY_ESNEXT) */")
-        self.__data.append(format_code(table, 1))
+        self.add_range(category, categorizer.create_tables(ES5_1_WHITE_SPACE_UNITS))
-        self.__data.append("};")
+        self._data.append("#endif /* ENABLED (JERRY_ESNEXT) */\n")
-        self.__data.append("")  # for an extra empty line
+
    def add_range(self, category, tables):
        idx = 0
        for table in tables:
            self.add_table(table,
                           "/**\n * %s %s.\n */" % (self._range_table_descriptions[idx], category),
                           self._range_table_types[idx],
                           category,
                           self._range_table_names[idx])
            idx += 1
    def add_conversion_range(self, category, tables, descriptions):
        self.add_named_conversion_range(category, tables, self._conversion_range_names, descriptions)
    def add_named_conversion_range(self, category, tables, table_names, descriptions):
        idx = 0
        for table in tables:
            self.add_table(table,
                           descriptions[idx],
                           self._conversion_range_types[idx],
                           category,
                           table_names[idx])
            idx += 1
    def add_table(self, table, description, table_type, category, table_name):
        if table and sum(table) != 0:
            self._data.append(description)
            self._data.append("static const %s lit_unicode_%s%s%s[] JERRY_ATTR_CONST_DATA ="
                              % (table_type,
                                 category.lower(),
                                 "_" + table_name if table_name else "",
                                 self._table_name_suffix))
            self._data.append("{")
            self._data.append(format_code(table, 1, 6 if self._table_name_suffix else 4))
            self._data.append("};")
            self._data.append("")  # for an extra empty line
    def generate(self):
-        with open(self.__filepath, 'w') as generated_source:
+        with open(self._filepath, 'w') as generated_source:
-            generated_source.write("\n".join(self.__header))
+            generated_source.write("\n".join(self._header))
-            generated_source.write("\n".join(self.__data))
+            generated_source.write("\n".join(self._data))
-class UnicodeCategorizer(object):
+
 class UnicodeSupplementarySource(UnicodeBasicSource):
    def __init__(self, filepath):
        UnicodeBasicSource.__init__(self, filepath, "uint32_t", "uint16_t")
        self._table_name_suffix = "_sup"
    def add_whitepace_range(self, category, categorizer, units):
        self.add_range(category, categorizer.create_tables(units))
 class UnicodeBasicCategorizer(object):
    def __init__(self):
-        # unicode categories:      Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs
+        self._length_limit = 0xff
-        #                          Co Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
+        self.extra_id_continue_units = set([0x200C, 0x200D])
        # letter:                  Lu Ll Lt Lm Lo Nl
        # non-letter-indent-part:
        #   digit:                 Nd
        #   punctuation mark:      Mn Mc
        #   connector punctuation: Pc
        # separators:              Zs
        self._unicode_categories = {
            'letters_category' : ["Lu", "Ll", "Lt", "Lm", "Lo", "Nl"],
            'non_letters_category' : ["Nd", "Mn", "Mc", "Pc"],
            'separators_category' : ["Zs"]
        }
-        self._categories = {
+    #pylint: disable=no-self-use
-            'letters' : [],
+    def in_range(self, i):
-            'non_letters' : [],
+        return i >= 0x80 and i < 0x10000
            'separators' : []
        }
-    def _store_by_category(self, unicode_id, category):
+    def _group_ranges(self, units):
        """
        Store the given unicode_id by its category
        """
        for target_category in self._categories:
            if category in self._unicode_categories[target_category + '_category']:
                self._categories[target_category].append(unicode_id)
    def read_categories(self, unicode_data_file):
        """
        Read the corresponding unicode values and store them in category lists.
        :return: List of letters, non_letter and separators.
        """
        range_start_id = 0
        with open(unicode_data_file) as unicode_data:
            for line in csv.reader(unicode_data, delimiter=';'):
                unicode_id = int(line[0], 16)
                # Skip supplementary planes and ascii chars
                if unicode_id >= 0x10000 or unicode_id < 128:
                    continue
                category = line[2]
                if range_start_id != 0:
                    while range_start_id <= unicode_id:
                        self._store_by_category(range_start_id, category)
                        range_start_id += 1
                    range_start_id = 0
                    continue
                if line[1].startswith('<'):
                    # Save the start position of the range
                    range_start_id = unicode_id
                self._store_by_category(unicode_id, category)
        # This separator char is handled separatly
        separators = self._categories['separators']
        non_breaking_space = 0x00A0
        if non_breaking_space in separators:
            separators.remove(int(non_breaking_space))
        # These separator chars are not in the unicode data file or not in Zs category
        mongolian_vowel_separator = 0x180E
        medium_mathematical_space = 0x205F
        zero_width_space = 0x200B
        if mongolian_vowel_separator not in separators:
            bisect.insort(separators, int(mongolian_vowel_separator))
        if medium_mathematical_space not in separators:
            bisect.insort(separators, int(medium_mathematical_space))
        if zero_width_space not in separators:
            bisect.insort(separators, int(zero_width_space))
        # https://www.ecma-international.org/ecma-262/5.1/#sec-7.1 format-control characters
        non_letters = self._categories['non_letters']
        zero_width_non_joiner = 0x200C
        zero_width_joiner = 0x200D
        bisect.insort(non_letters, int(zero_width_non_joiner))
        bisect.insort(non_letters, int(zero_width_joiner))
        return self._categories['letters'], self._categories['non_letters'], self._categories['separators']
 def group_ranges(i):
        """
        Convert an increasing list of integers into a range list
        :return: List of ranges.
        """
-    for _, group in itertools.groupby(enumerate(i), lambda q: (q[1] - q[0])):
+        for _, group in itertools.groupby(enumerate(units), lambda q: (q[1] - q[0])):
            group = list(group)
            yield group[0][1], group[-1][1]
-
+    def create_tables(self, units):
 def split_list(category_list):
        """
        Split list of ranges into intervals and single char lists.
-
+        :return: A tuple containing the following info:
-    :return: List of interval starting points, interval lengths and single chars
+            - list of interval starting points
            - list of interval lengths
            - list of single chars
        """
        interval_sps = []
        interval_lengths = []
        chars = []
-    for element in category_list:
+        for element in self._group_ranges(units):
            interval_length = element[1] - element[0]
            if interval_length == 0:
                chars.append(element[0])
-        elif interval_length > 255:
+            elif interval_length > self._length_limit:
-            for i in range(element[0], element[1], 256):
+                for i in range(element[0], element[1], self._length_limit + 1):
-                length = 255 if (element[1] - i > 255) else (element[1] - i)
+                    length = min(self._length_limit, element[1] - i)
                    interval_sps.append(i)
                    interval_lengths.append(length)
            else:
                interval_sps.append(element[0])
-            interval_lengths.append(element[1] - element[0])
+                interval_lengths.append(interval_length)
        return interval_sps, interval_lengths, chars
    def read_units(self, file_path, categories, subcategories=None):
        """
        Read the Unicode Derived Core Properties file and extract the ranges
        for the given categories.
-def generate_ranges(script_args):
+        :param file_path: Path to the Unicode "DerivedCoreProperties.txt" file.
-    categorizer = UnicodeCategorizer()
+        :param categories: A list of category strings to extract from the Unicode file.
-    letters, non_letters, separators = categorizer.read_categories(script_args.unicode_data)
+        :param subcategories: A list of subcategory strings to restrict categories.
        :return: A dictionary each string from the :param categories: is a key and for each
                key list of code points are stored.
        """
        # Create a dictionary in the format: { category[0]: [ ], ..., category[N]: [ ] }
        units = {}
        for category in categories:
            units[category] = []
-    letter_tables = split_list(list(group_ranges(letters)))
+        # Formats to match:
-    non_letter_tables = split_list(list(group_ranges(non_letters)))
+        #  <HEX>     ; <category> #
-    separator_tables = split_list(list(group_ranges(separators)))
+        #  <HEX>..<HEX>     ; <category> # <subcategory>
        matcher = r"(?P<start>[\dA-F]+)(?:\.\.(?P<end>[\dA-F]+))?\s+; (?P<category>[\w]+) # (?P<subcategory>[\w&]{2})"
-    c_source = UniCodeSource(RANGES_C_SOURCE)
+        with open(file_path, "r") as src_file:
            for line in src_file:
                match = re.match(matcher, line)
                if (match
                        and match.group("category") in categories
                        and (not subcategories or match.group("subcategory") in subcategories)):
                    start = int(match.group("start"), 16)
                    # if no "end" found use the "start"
                    end = int(match.group("end") or match.group("start"), 16)
                    matching_code_points = [
                        code_point for code_point in range(start, end + 1) if self.in_range(code_point)
                    ]
                    units[match.group("category")].extend(matching_code_points)
        return units
    def read_case_mappings(self, unicode_data_file, special_casing_file):
        """
        Read the corresponding unicode values of lower and upper case letters and store these in tables.
        :param unicode_data_file: Contains the default case mappings (one-to-one mappings).
        :param special_casing_file: Contains additional informative case mappings that are either not one-to-one
                                    or which are context-sensitive.
        :return: Upper and lower case mappings.
        """
        lower_case_mapping = {}
        upper_case_mapping = {}
        # Add one-to-one mappings
        with open(unicode_data_file) as unicode_data:
            reader = csv.reader(unicode_data, delimiter=';')
            for line in reader:
                letter_id = int(line[0], 16)
                if not self.in_range(letter_id):
                    continue
                capital_letter = line[12]
                small_letter = line[13]
                if capital_letter:
                    upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter)
                if small_letter:
                    lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter)
        # Update the conversion tables with the special cases
        with open(special_casing_file) as special_casing:
            reader = csv.reader(special_casing, delimiter=';')
            for line in reader:
                # Skip comment sections and empty lines
                if not line or line[0].startswith('#'):
                    continue
                # Replace '#' character with empty string
                for idx, fragment in enumerate(line):
                    if fragment.find('#') >= 0:
                        line[idx] = ''
                letter_id = int(line[0], 16)
                condition_list = line[4]
                if not self.in_range(letter_id) or condition_list:
                    continue
                small_letter = parse_unicode_sequence(line[1])
                capital_letter = parse_unicode_sequence(line[3])
                lower_case_mapping[letter_id] = small_letter
                upper_case_mapping[letter_id] = capital_letter
        return lower_case_mapping, upper_case_mapping
 class UnicodeSupplementaryCategorizer(UnicodeBasicCategorizer):
    def __init__(self):
        UnicodeBasicCategorizer.__init__(self)
        self._length_limit = 0xffff
        self.extra_id_continue_units = set()
    def in_range(self, i):
        return i >= 0x10000
 def generate_ranges(script_args, plane_type):
    if plane_type == UNICODE_PLANE_TYPE_SUPPLEMENTARY:
        c_source = UnicodeSupplementarySource(RANGES_SUP_C_SOURCE)
        categorizer = UnicodeSupplementaryCategorizer()
    else:
        c_source = UnicodeBasicSource(RANGES_C_SOURCE)
        categorizer = UnicodeBasicCategorizer()
    header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__),
-                         " * from %s. Do not edit! */" % os.path.basename(script_args.unicode_data),
+                         " * from %s. Do not edit! */" % os.path.basename(script_args.derived_core_properties),
                         ""]
    c_source.complete_header("\n".join(header_completion))
-    c_source.add_table(letter_tables[0],
+    units = categorizer.read_units(script_args.derived_core_properties, ["ID_Start", "ID_Continue"])
                       "unicode_letter_interval_sps",
                       "uint16_t",
                       ("/**\n"
                        " * Character interval starting points for the unicode letters.\n"
                        " *\n"
                        " * The characters covered by these intervals are from\n"
                        " * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n"
                        " */"))
-    c_source.add_table(letter_tables[1],
+    units["ID_Continue"] = sorted(set(units["ID_Continue"]).union(categorizer.extra_id_continue_units)
-                       "unicode_letter_interval_lengths",
+                                  - set(units["ID_Start"]))
                       "uint8_t",
                       ("/**\n"
                        " * Character lengths for the unicode letters.\n"
                        " *\n"
                        " * The characters covered by these intervals are from\n"
                        " * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n"
                        " */"))
-    c_source.add_table(letter_tables[2],
+    for category, unit in units.items():
-                       "unicode_letter_chars",
+        c_source.add_range(category, categorizer.create_tables(unit))
                       "uint16_t",
                       ("/**\n"
                        " * Those unicode letter characters that are not inside any of\n"
                        " * the intervals specified in lit_unicode_letter_interval_sps array.\n"
                        " *\n"
                        " * The characters are from the following Unicode categories:\n"
                        " * Lu, Ll, Lt, Lm, Lo, Nl\n"
                        " */"))
-    c_source.add_table(non_letter_tables[0],
+    white_space_units = categorizer.read_units(script_args.prop_list, ["White_Space"], ["Zs"])["White_Space"]
                       "unicode_non_letter_ident_part_interval_sps",
                       "uint16_t",
                       ("/**\n"
                        " * Character interval starting points for non-letter character\n"
                        " * that can be used as a non-first character of an identifier.\n"
                        " *\n"
                        " * The characters covered by these intervals are from\n"
                        " * the following Unicode categories: Nd, Mn, Mc, Pc\n"
                        " */"))
-    c_source.add_table(non_letter_tables[1],
+    c_source.add_whitepace_range("White_Space", categorizer, white_space_units)
                       "unicode_non_letter_ident_part_interval_lengths",
                       "uint8_t",
                       ("/**\n"
                        " * Character interval lengths for non-letter character\n"
                        " * that can be used as a non-first character of an identifier.\n"
                        " *\n"
                        " * The characters covered by these intervals are from\n"
                        " * the following Unicode categories: Nd, Mn, Mc, Pc\n"
                        " */"))
    c_source.add_table(non_letter_tables[2],
                       "unicode_non_letter_ident_part_chars",
                       "uint16_t",
                       ("/**\n"
                        " * Those non-letter characters that can be used as a non-first\n"
                        " * character of an identifier and not included in any of the intervals\n"
                        " * specified in lit_unicode_non_letter_ident_part_interval_sps array.\n"
                        " *\n"
                        " * The characters are from the following Unicode categories:\n"
                        " * Nd, Mn, Mc, Pc\n"
                        " */"))
    c_source.add_table(separator_tables[0],
                       "unicode_separator_char_interval_sps",
                       "uint16_t",
                       ("/**\n"
                        " * Unicode separator character interval starting points from Unicode category: Zs\n"
                        " */"))
    c_source.add_table(separator_tables[1],
                       "unicode_separator_char_interval_lengths",
                       "uint8_t",
                       ("/**\n"
                        " * Unicode separator character interval lengths from Unicode category: Zs\n"
                        " */"))
    c_source.add_table(separator_tables[2],
                       "unicode_separator_chars",
                       "uint16_t",
                       ("/**\n"
                        " * Unicode separator characters that are not in the\n"
                        " * lit_unicode_separator_char_intervals array.\n"
                        " *\n"
                        " * Unicode category: Zs\n"
                        " */"))
    c_source.generate()
@@ -320,70 +338,6 @@ def parse_unicode_sequence(raw_data):
    return result
 def read_case_mappings(unicode_data_file, special_casing_file):
    """
    Read the corresponding unicode values of lower and upper case letters and store these in tables.
    :param unicode_data_file: Contains the default case mappings (one-to-one mappings).
    :param special_casing_file: Contains additional informative case mappings that are either not one-to-one
                                or which are context-sensitive.
    :return: Upper and lower case mappings.
    """
    lower_case_mapping = {}
    upper_case_mapping = {}
    # Add one-to-one mappings
    with open(unicode_data_file) as unicode_data:
        unicode_data_reader = csv.reader(unicode_data, delimiter=';')
        for line in unicode_data_reader:
            letter_id = int(line[0], 16)
            # Skip supplementary planes and ascii chars
            if letter_id >= 0x10000 or letter_id < 128:
                continue
            capital_letter = line[12]
            small_letter = line[13]
            if capital_letter:
                upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter)
            if small_letter:
                lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter)
    # Update the conversion tables with the special cases
    with open(special_casing_file) as special_casing:
        special_casing_reader = csv.reader(special_casing, delimiter=';')
        for line in special_casing_reader:
            # Skip comment sections and empty lines
            if not line or line[0].startswith('#'):
                continue
            # Replace '#' character with empty string
            for idx, i in enumerate(line):
                if i.find('#') >= 0:
                    line[idx] = ''
            letter_id = int(line[0], 16)
            condition_list = line[4]
            # Skip supplementary planes, ascii chars, and condition_list
            if letter_id >= 0x10000 or letter_id < 128 or condition_list:
                continue
            small_letter = parse_unicode_sequence(line[1])
            capital_letter = parse_unicode_sequence(line[3])
            lower_case_mapping[letter_id] = small_letter
            upper_case_mapping[letter_id] = capital_letter
    return lower_case_mapping, upper_case_mapping
 def extract_ranges(letter_case, reverse_letter_case=None):
    """
    Extract ranges from case mappings
@@ -675,27 +629,13 @@ def calculate_conversion_distance(letter_case, letter_id):
    return ord(letter_case[letter_id]) - letter_id
-def generate_conversions(script_args):
+def generate_conversions(script_args, plane_type):
-    # Read the corresponding unicode values of lower and upper case letters and store these in tables
+    if plane_type == UNICODE_PLANE_TYPE_SUPPLEMENTARY:
-    case_mappings = read_case_mappings(script_args.unicode_data, script_args.special_casing)
+        c_source = UnicodeSupplementarySource(CONVERSIONS_SUP_C_SOURCE)
-    lower_case = case_mappings[0]
+        categorizer = UnicodeSupplementaryCategorizer()
-    upper_case = case_mappings[1]
+    else:
-
+        c_source = UnicodeBasicSource(CONVERSIONS_C_SOURCE)
-    character_case_ranges = extract_ranges(lower_case, upper_case)
+        categorizer = UnicodeBasicCategorizer()
    character_pair_ranges = extract_character_pair_ranges(lower_case, upper_case)
    character_pairs = extract_character_pairs(lower_case, upper_case)
    upper_case_special_ranges = extract_special_ranges(upper_case)
    lower_case_ranges = extract_ranges(lower_case)
    lower_case_conversions = extract_conversions(lower_case)
    upper_case_conversions = extract_conversions(upper_case)
    if lower_case:
        warnings.warn('Not all elements extracted from the lowercase table!')
    if upper_case:
        warnings.warn('Not all elements extracted from the uppercase table!')
    # Generate conversions output
    c_source = UniCodeSource(CONVERSIONS_C_SOURCE)
    unicode_file = os.path.basename(script_args.unicode_data)
    spec_casing_file = os.path.basename(script_args.special_casing)
@@ -706,75 +646,58 @@ def generate_conversions(script_args):
    c_source.complete_header("\n".join(header_completion))
-    c_source.add_table(character_case_ranges[0],
+    # Read the corresponding unicode values of lower and upper case letters and store these in tables
-                       "character_case_ranges",
+    lower_case, upper_case = categorizer.read_case_mappings(script_args.unicode_data, script_args.special_casing)
                       "uint16_t",
                       ("/* Contains start points of character case ranges "
                        "(these are bidirectional conversions). */"))
-    c_source.add_table(character_case_ranges[1],
+    c_source.add_conversion_range("character_case",
-                       "character_case_range_lengths",
+                                  extract_ranges(lower_case, upper_case),
-                       "uint8_t",
+                                  [("/* Contains start points of character case ranges "
-                       "/* Interval lengths of start points in `character_case_ranges` table. */")
+                                    "(these are bidirectional conversions). */"),
                                   "/* Interval lengths of start points in `character_case_ranges` table. */"])
    c_source.add_conversion_range("character_pair",
                                  extract_character_pair_ranges(lower_case, upper_case),
                                  ["/* Contains the start points of bidirectional conversion ranges. */",
                                   "/* Interval lengths of start points in `character_pair_ranges` table. */"])
-    c_source.add_table(character_pair_ranges[0],
+    c_source.add_table(extract_character_pairs(lower_case, upper_case),
-                       "character_pair_ranges",
+                       "/* Contains lower/upper case bidirectional conversion pairs. */",
-                       "uint16_t",
+                       c_source.character_type,
                       "/* Contains the start points of bidirectional conversion ranges. */")
    c_source.add_table(character_pair_ranges[1],
                       "character_pair_range_lengths",
                       "uint8_t",
                       "/* Interval lengths of start points in `character_pair_ranges` table. */")
    c_source.add_table(character_pairs,
                       "character_pairs",
-                       "uint16_t",
+                       "")
                       "/* Contains lower/upper case bidirectional conversion pairs. */")
-    c_source.add_table(upper_case_special_ranges[0],
+    c_source.add_conversion_range("upper_case_special",
-                       "upper_case_special_ranges",
+                                  extract_special_ranges(upper_case),
-                       "uint16_t",
+                                  [("/* Contains start points of one-to-two uppercase ranges where the "
-                       ("/* Contains start points of one-to-two uppercase ranges where the second character\n"
+                                    "second character\n"
                                    " * is always the same.\n"
-                        " */"))
+                                    " */"),
                                   "/* Interval lengths for start points in `upper_case_special_ranges` table. */"])
-    c_source.add_table(upper_case_special_ranges[1],
+    c_source.add_conversion_range("lower_case",
-                       "upper_case_special_range_lengths",
+                                  extract_ranges(lower_case),
-                       "uint8_t",
+                                  ["/* Contains start points of lowercase ranges. */",
-                       "/* Interval lengths for start points in `upper_case_special_ranges` table. */")
+                                   "/* Interval lengths for start points in `lower_case_ranges` table. */"])
-    c_source.add_table(lower_case_ranges[0],
+    c_source.add_named_conversion_range("lower_case",
-                       "lower_case_ranges",
+                                        extract_conversions(lower_case),
-                       "uint16_t",
+                                        ["conversions", "conversion_counters"],
-                       "/* Contains start points of lowercase ranges. */")
+                                        [("/* The remaining lowercase conversions. The lowercase variant can "
                                          "be one-to-three character long. */"),
                                         ("/* Number of one-to-one, one-to-two, and one-to-three lowercase "
                                          "conversions. */")])
-    c_source.add_table(lower_case_ranges[1],
+    c_source.add_named_conversion_range("upper_case",
-                       "lower_case_range_lengths",
+                                        extract_conversions(upper_case),
-                       "uint8_t",
+                                        ["conversions", "conversion_counters"],
-                       "/* Interval lengths for start points in `lower_case_ranges` table. */")
+                                        [("/* The remaining uppercase conversions. The uppercase variant can "
                                          "be one-to-three character long. */"),
                                         ("/* Number of one-to-one, one-to-two, and one-to-three uppercase "
                                          "conversions. */")])
-    c_source.add_table(lower_case_conversions[0],
+    if lower_case:
-                       "lower_case_conversions",
+        warnings.warn('Not all elements extracted from the lowercase table!')
-                       "uint16_t",
+    if upper_case:
-                       ("/* The remaining lowercase conversions. The lowercase variant can "
+        warnings.warn('Not all elements extracted from the uppercase table!')
                        "be one-to-three character long. */"))
    c_source.add_table(lower_case_conversions[1],
                       "lower_case_conversion_counters",
                       "uint8_t",
                       "/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */")
    c_source.add_table(upper_case_conversions[0],
                       "upper_case_conversions",
                       "uint16_t",
                       ("/* The remaining uppercase conversions. The uppercase variant can "
                        "be one-to-three character long. */"))
    c_source.add_table(upper_case_conversions[1],
                       "upper_case_conversion_counters",
                       "uint8_t",
                       "/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */")
    c_source.generate()
@@ -783,29 +706,37 @@ def generate_conversions(script_args):
 def main():
-    parser = argparse.ArgumentParser(description='lit-unicode-{conversions,ranges}.inc.h generator',
+    parser = argparse.ArgumentParser(description='lit-unicode-{conversions,ranges}-{sup}.inc.h generator',
                                     epilog='''
-                                        The input files (UnicodeData.txt, SpecialCasing.txt)
+                                        The input files:
                                            - UnicodeData.txt
                                            - SpecialCasing.txt
                                            - DerivedCoreProperties.txt
                                            - PropList.txt
                                        must be retrieved from
                                        http://www.unicode.org/Public/<VERSION>/ucd/.
                                        The last known good version is 13.0.0.
                                        ''')
    def check_file(path):
        if not os.path.isfile(path) or not os.access(path, os.R_OK):
            raise argparse.ArgumentTypeError('The %s file is missing or not readable!' % path)
        return path
    parser.add_argument('--unicode-data', metavar='FILE', action='store', required=True,
-                        help='specify the unicode data file')
+                        type=check_file, help='specify the unicode data file')
    parser.add_argument('--special-casing', metavar='FILE', action='store', required=True,
-                        help='specify the special casing file')
+                        type=check_file, help='specify the special casing file')
    parser.add_argument('--prop-list', metavar='FILE', action='store', required=True,
                        type=check_file, help='specify the prop list file')
    parser.add_argument('--derived-core-properties', metavar='FILE', action='store', required=True,
                        type=check_file, help='specify the DerivedCodeProperties file')
    script_args = parser.parse_args()
-    if not os.path.isfile(script_args.unicode_data) or not os.access(script_args.unicode_data, os.R_OK):
+    generate_ranges(script_args, UNICODE_PLANE_TYPE_BASIC)
-        parser.error('The %s file is missing or not readable!' % script_args.unicode_data)
+    generate_ranges(script_args, UNICODE_PLANE_TYPE_SUPPLEMENTARY)
-
+    generate_conversions(script_args, UNICODE_PLANE_TYPE_BASIC)
-    if not os.path.isfile(script_args.special_casing) or not os.access(script_args.special_casing, os.R_OK):
+    generate_conversions(script_args, UNICODE_PLANE_TYPE_SUPPLEMENTARY)
        parser.error('The %s file is missing or not readable!' % script_args.special_casing)
    generate_ranges(script_args)
    generate_conversions(script_args)
 if __name__ == "__main__":