Implement \u{hex} support. (#3447)

A large rework because surrogate pairs must be combined. Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
2019-12-16 11:26:02 +01:00
parent 1db16c3a1c
commit 40d930d62c
22 changed files with 765 additions and 370 deletions
@@ -53,14 +53,13 @@ align_column_to_tab (parser_line_counter_t column) /**< current column */
 /**
 * Parse hexadecimal character sequence
 *
- * @return character value
+ * @return character value or UINT32_MAX on error
 */
-ecma_char_t
-lexer_hex_to_character (parser_context_t *context_p, /**< context */
-                        const uint8_t *source_p, /**< current source position */
-                        int length) /**< source length */
+static lit_code_point_t
+lexer_hex_to_code_point (const uint8_t *source_p, /**< current source position */
+                         parser_line_counter_t length) /**< source length */
 {
-  uint32_t result = 0;
+  lit_code_point_t result = 0;

  do
  {
@@ -81,29 +80,94 @@ lexer_hex_to_character (parser_context_t *context_p, /**< context */
      }
      else
      {
-        parser_raise_error (context_p, PARSER_ERR_INVALID_ESCAPE_SEQUENCE);
+        return UINT32_MAX;
      }
    }
  }
  while (--length > 0);

-  return (ecma_char_t) result;
-} /* lexer_hex_to_character */
+  return result;
+} /* lexer_hex_to_code_point */
+
+#if ENABLED (JERRY_ES2015)
+
+/**
+ * Parse hexadecimal character sequence enclosed in braces
+ *
+ * @return character value or UINT32_MAX on error
+ */
+static lit_code_point_t
+lexer_hex_in_braces_to_code_point (const uint8_t *source_p, /**< current source position */
+                                   const uint8_t *source_end_p, /**< source end */
+                                   uint32_t *length_p) /**< [out] length of the sequence */
+{
+  lit_code_point_t result = 0;
+  /* Four is the size of \u{} sequence. */
+  uint32_t length = 4;
+
+  JERRY_ASSERT (source_p[-1] == LIT_CHAR_LEFT_BRACE);
+  JERRY_ASSERT (source_p < source_end_p);
+
+  do
+  {
+    uint32_t byte = *source_p++;
+
+    result <<= 4;
+
+    if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
+    {
+      result += byte - LIT_CHAR_0;
+    }
+    else
+    {
+      byte = LEXER_TO_ASCII_LOWERCASE (byte);
+      if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
+      {
+        result += byte - (LIT_CHAR_LOWERCASE_A - 10);
+      }
+      else
+      {
+        return UINT32_MAX;
+      }
+    }
+
+    if (result >= (LIT_UNICODE_CODE_POINT_MAX + 1) || source_p >= source_end_p)
+    {
+      return UINT32_MAX;
+    }
+    length++;
+  }
+  while (*source_p != LIT_CHAR_RIGHT_BRACE);
+
+  *length_p = length;
+  return result;
+} /* lexer_hex_in_braces_to_code_point */
+
+#endif /* ENABLED (JERRY_ES2015) */

 /**
 * Parse hexadecimal character sequence
 *
 * @return character value
 */
-static ecma_char_t
-lexer_unchecked_hex_to_character (const uint8_t *source_p, /**< current source position */
-                                  int length) /**< source length */
+static lit_code_point_t
+lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] current source position */
 {
-  uint32_t result = 0;
+  lit_code_point_t result = 0;
+  const uint8_t *char_p = *source_p;
+  uint32_t length = (char_p[-1] == LIT_CHAR_LOWERCASE_U) ? 4 : 2;

-  do
+#if ENABLED (JERRY_ES2015)
+  if (char_p[0] == LIT_CHAR_LEFT_BRACE)
  {
-    uint32_t byte = *source_p++;
+    length = 0;
+    char_p++;
+  }
+#endif /* ENABLED (JERRY_ES2015) */
+
+  while (true)
+  {
+    uint32_t byte = *char_p++;

    result <<= 4;

@@ -118,10 +182,27 @@ lexer_unchecked_hex_to_character (const uint8_t *source_p, /**< current source p

      result += LEXER_TO_ASCII_LOWERCASE (byte) - (LIT_CHAR_LOWERCASE_A - 10);
    }
-  }
-  while (--length > 0);

-  return (ecma_char_t) result;
+    JERRY_ASSERT (result <= LIT_UNICODE_CODE_POINT_MAX);
+
+#if ENABLED (JERRY_ES2015)
+    if (length == 0)
+    {
+      if (*char_p != LIT_CHAR_RIGHT_BRACE)
+      {
+        continue;
+      }
+      *source_p = char_p + 1;
+      return result;
+    }
+#endif /* ENABLED (JERRY_ES2015) */
+
+    if (--length == 0)
+    {
+      *source_p = char_p;
+      return result;
+    }
+  }
 } /* lexer_unchecked_hex_to_character */

 /**
@@ -509,102 +590,188 @@ static const uint8_t keyword_lengths_list[] =
 #undef LEXER_KEYWORD_LIST_LENGTH

 /**
- * Parse identifier.
+ * Flags for lexer_parse_identifier.
 */
-static void
+typedef enum
+{
+  LEXER_PARSE_NO_OPTS = 0, /**< no options */
+  LEXER_PARSE_CHECK_KEYWORDS = (1 << 0), /**< check keywords */
+  LEXER_PARSE_CHECK_START_AND_RETURN = (1 << 1), /**< check identifier start and return */
+  LEXER_PARSE_CHECK_PART_AND_RETURN = (1 << 2), /**< check identifier part and return */
+} lexer_parse_options_t;
+
+/**
+ * Parse identifier.
+ *
+ * @return true, if an identifier is parsed, false otherwise
+ */
+static bool
 lexer_parse_identifier (parser_context_t *context_p, /**< context */
-                        bool check_keywords) /**< check keywords */
+                        lexer_parse_options_t options) /**< check keywords */
 {
  /* Only very few identifiers contains \u escape sequences. */
  const uint8_t *source_p = context_p->source_p;
-  const uint8_t *ident_start_p = context_p->source_p;
  /* Note: newline or tab cannot be part of an identifier. */
  parser_line_counter_t column = context_p->column;
  const uint8_t *source_end_p = context_p->source_end_p;
  size_t length = 0;
-
-  context_p->token.type = LEXER_LITERAL;
-  context_p->token.ident_is_strict_keyword = false;
-  context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
-  context_p->token.lit_location.has_escape = false;
+  uint8_t has_escape = false;

  do
  {
    if (*source_p == LIT_CHAR_BACKSLASH)
    {
-      uint16_t character;
+      /* After a backslash an identifier must start. */
+      lit_code_point_t code_point = UINT32_MAX;
+      uint32_t escape_length = 6;

-      context_p->token.lit_location.has_escape = true;
-      context_p->source_p = source_p;
-      context_p->token.column = column;
-
-      if ((source_p + 6 > source_end_p) || (source_p[1] != LIT_CHAR_LOWERCASE_U))
+      if (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN))
      {
+        return true;
+      }
+
+      has_escape = true;
+
+#if ENABLED (JERRY_ES2015)
+      if (source_p + 5 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
+      {
+        if (source_p[2] == LIT_CHAR_LEFT_BRACE)
+        {
+          code_point = lexer_hex_in_braces_to_code_point (source_p + 3, source_end_p, &escape_length);
+        }
+        else if (source_p + 6 <= source_end_p)
+        {
+          code_point = lexer_hex_to_code_point (source_p + 2, 4);
+        }
+      }
+#else /* !ENABLED (JERRY_ES2015) */
+      if (source_p + 6 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
+      {
+        code_point = lexer_hex_to_code_point (source_p + 2, 4);
+      }
+#endif /* ENABLED (JERRY_ES2015) */
+
+      if (code_point == UINT32_MAX)
+      {
+        context_p->source_p = source_p;
+        context_p->token.column = column;
        parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
      }

-      character = lexer_hex_to_character (context_p, source_p + 2, 4);
-
      if (length == 0)
      {
-        if (!lit_char_is_identifier_start_character (character))
+        if (!lit_code_point_is_identifier_start (code_point))
        {
          parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_START);
        }
      }
      else
      {
-        if (!lit_char_is_identifier_part_character (character))
+        if (!lit_code_point_is_identifier_part (code_point))
        {
          parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_PART);
        }
      }

-      length += lit_char_get_utf8_length (character);
-      source_p += 6;
-      PARSER_PLUS_EQUAL_LC (column, 6);
+      length += lit_code_point_get_cesu8_length (code_point);
+      source_p += escape_length;
+      PARSER_PLUS_EQUAL_LC (column, escape_length);
      continue;
    }

-    /* Valid identifiers cannot contain 4 byte long utf-8
-     * characters, since those characters are represented
-     * by 2 ecmascript (UTF-16) characters, and those
-     * characters cannot be literal characters. */
-    JERRY_ASSERT (source_p[0] < LEXER_UTF8_4BYTE_START);
+    lit_code_point_t code_point = *source_p;
+    lit_utf8_size_t utf8_length = 1, decoded_length = 1, char_count = 1;

-    source_p++;
-    length++;
-    column++;
-
-    while (source_p < source_end_p
-           && IS_UTF8_INTERMEDIATE_OCTET (source_p[0]))
+    if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
    {
-      source_p++;
-      length++;
-    }
-  }
-  while (source_p < source_end_p
-         && (lit_char_is_identifier_part (source_p) || *source_p == LIT_CHAR_BACKSLASH));
+      utf8_length = lit_read_code_point_from_utf8 (source_p,
+                                                   (lit_utf8_size_t) (source_end_p - source_p),
+                                                   &code_point);
+      decoded_length = utf8_length;
+
+#if ENABLED (JERRY_ES2015)
+      /* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
+      if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
+          && source_p + 3 < source_end_p)
+      {
+        lit_code_point_t low_surrogate;
+        lit_read_code_point_from_utf8 (source_p + 3,
+                                       (lit_utf8_size_t) (source_end_p - (source_p + 3)),
+                                       &low_surrogate);
+
+        if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
+        {
+          code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
+                                                                 (ecma_char_t) low_surrogate);
+          utf8_length = 2 * 3;
+          decoded_length = 2 * 3;
+          char_count = 2;
+        }
+      }
+      else if (source_p[0] >= LEXER_UTF8_4BYTE_START)
+      {
+        decoded_length = 2 * 3;
+        has_escape = true;
+      }
+#endif /* ENABLED (JERRY_ES2015) */
+    }
+
+    if (length == 0)
+    {
+      if (JERRY_UNLIKELY (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN)))
+      {
+        if (options & LEXER_PARSE_CHECK_START_AND_RETURN)
+        {
+          return lit_code_point_is_identifier_start (code_point);
+        }
+        else
+        {
+          return lit_code_point_is_identifier_part (code_point);
+        }
+      }
+
+      if (!lit_code_point_is_identifier_start (code_point))
+      {
+        return false;
+      }
+    }
+    else if (!lit_code_point_is_identifier_part (code_point))
+    {
+      break;
+    }
+
+    source_p += utf8_length;
+    length += decoded_length;
+    PARSER_PLUS_EQUAL_LC (column, char_count);
+  }
+  while (source_p < source_end_p);
+
+  JERRY_ASSERT (length > 0);
+
+  context_p->token.type = LEXER_LITERAL;
+  context_p->token.ident_is_strict_keyword = false;
+  context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
+  context_p->token.lit_location.has_escape = has_escape;

-  context_p->source_p = ident_start_p;
  context_p->token.column = context_p->column;
-  context_p->token.lit_location.char_p = ident_start_p;
+  context_p->token.lit_location.char_p = context_p->source_p;
  context_p->token.lit_location.length = (prop_length_t) length;

-  if (length > PARSER_MAXIMUM_IDENT_LENGTH)
+  if (JERRY_UNLIKELY (length > PARSER_MAXIMUM_IDENT_LENGTH))
  {
    parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_TOO_LONG);
  }

  /* Check keywords. */
-  if (check_keywords
+  if ((options & LEXER_PARSE_CHECK_KEYWORDS)
      && (length >= LEXER_KEYWORD_MIN_LENGTH && length <= LEXER_KEYWORD_MAX_LENGTH))
  {
+    const uint8_t *ident_start_p = context_p->source_p;
    uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH];

    if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape))
    {
-      lexer_convert_ident_to_cesu8 (ident_start_p, buffer_p, (prop_length_t) length);
+      lexer_convert_ident_to_cesu8 (buffer_p, ident_start_p, (prop_length_t) length);
      ident_start_p = buffer_p;
    }

@@ -690,6 +857,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */

  context_p->source_p = source_p;
  context_p->column = column;
+  return true;
 } /* lexer_parse_identifier */

 /**
@@ -840,20 +1008,40 @@ lexer_parse_string (parser_context_t *context_p) /**< context */

      if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
      {
-        uint8_t hex_part_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 2 : 4;
+        uint32_t escape_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 3 : 5;
+        lit_code_point_t code_point = UINT32_MAX;

        context_p->token.line = line;
        context_p->token.column = (parser_line_counter_t) (column - 1);
-        if (source_p + 1 + hex_part_length > source_end_p)
+
+#if ENABLED (JERRY_ES2015)
+        if (source_p + 4 <= source_end_p
+            && source_p[0] == LIT_CHAR_LOWERCASE_U
+            && source_p[1] == LIT_CHAR_LEFT_BRACE)
        {
-          parser_raise_error (context_p, PARSER_ERR_INVALID_ESCAPE_SEQUENCE);
+          code_point = lexer_hex_in_braces_to_code_point (source_p + 2, source_end_p, &escape_length);
+          escape_length--;
+        }
+        else
+        {
+#endif /* ENABLED (JERRY_ES2015) */
+          if (source_p + escape_length <= source_end_p)
+          {
+            code_point = lexer_hex_to_code_point (source_p + 1, escape_length - 1);
+          }
+#if ENABLED (JERRY_ES2015)
+        }
+#endif /* ENABLED (JERRY_ES2015) */
+
+        if (code_point == UINT32_MAX)
+        {
+          parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
        }

-        length += lit_char_get_utf8_length (lexer_hex_to_character (context_p,
-                                                                    source_p + 1,
-                                                                    hex_part_length));
-        source_p += hex_part_length + 1;
-        PARSER_PLUS_EQUAL_LC (column, hex_part_length + 1u);
+        length += lit_code_point_get_cesu8_length (code_point);
+
+        source_p += escape_length;
+        PARSER_PLUS_EQUAL_LC (column, escape_length);
        continue;
      }
    }
@@ -1120,12 +1308,6 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
    }
  }

-  if (source_p < source_end_p
-      && (lit_char_is_identifier_start (source_p) || source_p[0] == LIT_CHAR_BACKSLASH))
-  {
-    parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
-  }
-
  length = (size_t) (source_p - context_p->source_p);
  if (length > PARSER_MAXIMUM_IDENT_LENGTH)
  {
@@ -1135,6 +1317,11 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
  context_p->token.lit_location.length = (prop_length_t) length;
  PARSER_PLUS_EQUAL_LC (context_p->column, length);
  context_p->source_p = source_p;
+
+  if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_START_AND_RETURN))
+  {
+    parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
+  }
 } /* lexer_parse_number */

 /**
@@ -1229,10 +1416,8 @@ lexer_next_token (parser_context_t *context_p) /**< context */
    return;
  }

-  if (lit_char_is_identifier_start (context_p->source_p)
-      || context_p->source_p[0] == LIT_CHAR_BACKSLASH)
+  if (lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_KEYWORDS))
  {
-    lexer_parse_identifier (context_p, true);
    return;
  }

@@ -1723,8 +1908,8 @@ lexer_process_char_literal (parser_context_t *context_p, /**< context */
 * Convert an ident with escapes to a utf8 string.
 */
 void
-lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */
-                              uint8_t *destination_p, /**< destination string */
+lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
+                              const uint8_t *source_p, /**< source string */
                              prop_length_t length) /**< length of destination string */
 {
  const uint8_t *destination_end_p = destination_p + length;
@@ -1735,14 +1920,22 @@ lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */
  {
    if (*source_p == LIT_CHAR_BACKSLASH)
    {
-      destination_p += lit_char_to_utf8_bytes (destination_p,
-                                               lexer_unchecked_hex_to_character (source_p + 2, 4));
-      source_p += 6;
+      source_p += 2;
+      destination_p += lit_code_point_to_cesu8_bytes (destination_p,
+                                                      lexer_unchecked_hex_to_character (&source_p));
      continue;
    }

-    JERRY_ASSERT (IS_UTF8_INTERMEDIATE_OCTET (*source_p)
-                  || lit_char_is_identifier_part (source_p));
+#if ENABLED (JERRY_ES2015)
+    if (*source_p >= LEXER_UTF8_4BYTE_START)
+    {
+      lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
+
+      destination_p += 6;
+      source_p += 4;
+      continue;
+    }
+#endif /* ENABLED (JERRY_ES2015) */

    *destination_p++ = *source_p++;
  }
@@ -1783,7 +1976,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */

    if (literal_p->type == LEXER_IDENT_LITERAL)
    {
-      lexer_convert_ident_to_cesu8 (source_p, destination_start_p, literal_p->length);
+      lexer_convert_ident_to_cesu8 (destination_start_p, source_p, literal_p->length);
    }
    else
    {
@@ -1835,7 +2028,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */

          if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
          {
-            uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
+            lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);

            source_p++;
            JERRY_ASSERT (source_p < context_p->source_end_p);
@@ -1854,7 +2047,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
              }
            }

-            destination_p += lit_char_to_utf8_bytes (destination_p, (uint16_t) octal_number);
+            destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
            continue;
          }

@@ -1878,13 +2071,9 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */

          if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
          {
-            int hex_part_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 2 : 4;
-            JERRY_ASSERT (source_p + 1 + hex_part_length <= context_p->source_end_p);
-
-            destination_p += lit_char_to_utf8_bytes (destination_p,
-                                                     lexer_unchecked_hex_to_character (source_p + 1,
-                                                                                       hex_part_length));
-            source_p += hex_part_length + 1;
+            source_p++;
+            destination_p += lit_code_point_to_cesu8_bytes (destination_p,
+                                                            lexer_unchecked_hex_to_character (&source_p));
            continue;
          }

@@ -1946,18 +2135,9 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
          /* Processing 4 byte unicode sequence (even if it is
           * after a backslash). Always converted to two 3 byte
           * long sequence. */
+          lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);

-          uint32_t character = ((((uint32_t) source_p[0]) & 0x7) << 18);
-          character |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
-          character |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
-          character |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK);
-
-          JERRY_ASSERT (character >= 0x10000);
-          character -= 0x10000;
-          destination_p += lit_char_to_utf8_bytes (destination_p,
-                                                   (ecma_char_t) (0xd800 | (character >> 10)));
-          destination_p += lit_char_to_utf8_bytes (destination_p,
-                                                   (ecma_char_t) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK)));
+          destination_p += 6;
          source_p += 4;
          continue;
        }
@@ -2376,15 +2556,14 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
    column++;
  }

-  if (source_p < source_end_p
-      && lit_char_is_identifier_part (source_p))
+  context_p->source_p = source_p;
+  context_p->column = column;
+
+  if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_PART_AND_RETURN))
  {
    parser_raise_error (context_p, PARSER_ERR_UNKNOWN_REGEXP_FLAG);
  }

-  context_p->source_p = source_p;
-  context_p->column = column;
-
  length = (lit_utf8_size_t) (regex_end_p - regex_start_p);
  if (length > PARSER_MAXIMUM_STRING_LENGTH)
  {
@@ -2473,10 +2652,9 @@ lexer_expect_identifier (parser_context_t *context_p, /**< context */
  context_p->token.column = context_p->column;

  if (context_p->source_p < context_p->source_end_p
-      && (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH))
+      && lexer_parse_identifier (context_p, (literal_type != LEXER_STRING_LITERAL ? LEXER_PARSE_CHECK_KEYWORDS
+                                                                                  : LEXER_PARSE_NO_OPTS)))
  {
-    lexer_parse_identifier (context_p, literal_type != LEXER_STRING_LITERAL);
-
    if (context_p->token.type == LEXER_LITERAL)
    {
      JERRY_ASSERT (context_p->token.lit_location.type == LEXER_IDENT_LITERAL);
@@ -2548,10 +2726,8 @@ lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */
  context_p->token.column = context_p->column;
  bool create_literal_object = false;

-  if (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH)
+  if (lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
  {
-    lexer_parse_identifier (context_p, false);
-
    if (!(ident_opts & (LEXER_OBJ_IDENT_ONLY_IDENTIFIERS | LEXER_OBJ_IDENT_OBJECT_PATTERN))
        && context_p->token.lit_location.length == 3)
    {
@@ -2687,10 +2863,8 @@ lexer_scan_identifier (parser_context_t *context_p, /**< context */
  context_p->token.column = context_p->column;

  if (context_p->source_p < context_p->source_end_p
-      && (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH))
+      && lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
  {
-    lexer_parse_identifier (context_p, false);
-
    if ((ident_opts & LEXER_SCAN_IDENT_PROPERTY)
        && context_p->token.lit_location.length == 3)
    {
@@ -2726,75 +2900,135 @@ lexer_scan_identifier (parser_context_t *context_p, /**< context */
 * Compares two identifiers.
 *
 * Note:
- *   Escape sequences are allowed, size must be the same.
+ *   Escape sequences are allowed in the left identifier, but not in the right
 *
 * @return true if the two identifiers are the same
 */
-bool
-lexer_compare_identifiers (const uint8_t *left_p, /**< left identifier */
-                           const uint8_t *right_p, /**< right identifier */
-                           size_t size) /**< byte size of the two identifiers */
+static bool
+lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier */
+                                   const uint8_t *right_p, /**< right identifier string */
+                                   size_t size) /**< byte size of the two identifiers */
 {
-  uint8_t utf8_buf[3];
-  size_t utf8_len, offset;
+  uint8_t utf8_buf[6];

  do
  {
-    /* Backslash cannot be part of a multibyte UTF-8 character. */
-    if (*left_p != LIT_CHAR_BACKSLASH && *right_p != LIT_CHAR_BACKSLASH)
+    if (*left_p == *right_p)
    {
-      if (*left_p++ != *right_p++)
-      {
-        return false;
-      }
+      left_p++;
+      right_p++;
      size--;
      continue;
    }

-    if (*left_p == LIT_CHAR_BACKSLASH && *right_p == LIT_CHAR_BACKSLASH)
+    size_t escape_size;
+
+    if (*left_p == LIT_CHAR_BACKSLASH)
    {
-      uint16_t left_chr = lexer_unchecked_hex_to_character (left_p + 2, 4);
+      left_p += 2;
+      lit_code_point_t code_point = lexer_unchecked_hex_to_character (&left_p);

-      if (left_chr != lexer_unchecked_hex_to_character (right_p + 2, 4))
-      {
-        return false;
-      }
-
-      left_p += 6;
-      right_p += 6;
-      size -= lit_char_get_utf8_length (left_chr);
-      continue;
+      escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
+    }
+    else if (*left_p >= LEXER_UTF8_4BYTE_START)
+    {
+      lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
+      escape_size = 3 * 2;
+      left_p += 4;
+    }
+    else
+    {
+      return false;
    }

-    /* One character is encoded as unicode sequence. */
-    if (*right_p == LIT_CHAR_BACKSLASH)
-    {
-      /* The pointers can be swapped. */
-      const uint8_t *swap_p = left_p;
-      left_p = right_p;
-      right_p = swap_p;
-    }
-
-    utf8_len = lit_char_to_utf8_bytes (utf8_buf, lexer_unchecked_hex_to_character (left_p + 2, 4));
-    JERRY_ASSERT (utf8_len > 0);
-    size -= utf8_len;
-    offset = 0;
+    size -= escape_size;

+    uint8_t *utf8_p = utf8_buf;
    do
    {
-      if (utf8_buf[offset] != *right_p++)
+      if (*right_p++ != *utf8_p++)
      {
        return false;
      }
-      offset++;
    }
-    while (offset < utf8_len);
-
-    left_p += 6;
+    while (--escape_size > 0);
  }
  while (size > 0);

  return true;
+} /* lexer_compare_identifier_to_chars */
+
+/**
+ * Compares an identifier to a string.
+ *
+ * Note:
+ *   Escape sequences are allowed in the left identifier, but not in the right
+ *
+ * @return true if the identifier equals to string
+ */
+bool
+lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, /**< left literal */
+                                    const uint8_t *right_p, /**< right identifier string */
+                                    size_t size) /**< byte size of the right identifier */
+{
+  if (left_p->length != size)
+  {
+    return false;
+  }
+
+  if (!left_p->has_escape)
+  {
+    return memcmp (left_p->char_p, right_p, size) == 0;
+  }
+
+  return lexer_compare_identifier_to_chars (left_p->char_p, right_p, size);
+} /* lexer_compare_identifier_to_string */
+
+/**
+ * Compares two identifiers.
+ *
+ * Note:
+ *   Escape sequences are allowed in both identifiers
+ *
+ * @return true if the two identifiers are the same
+ */
+bool
+lexer_compare_identifiers (parser_context_t *context_p, /**< context */
+                           const lexer_lit_location_t *left_p, /**< left literal */
+                           const lexer_lit_location_t *right_p) /**< right literal */
+{
+  prop_length_t length = left_p->length;
+
+  if (length != right_p->length)
+  {
+    return false;
+  }
+
+  if (!left_p->has_escape)
+  {
+    return lexer_compare_identifier_to_chars (right_p->char_p, left_p->char_p, length);
+  }
+
+  if (!right_p->has_escape)
+  {
+    return lexer_compare_identifier_to_chars (left_p->char_p, right_p->char_p, length);
+  }
+
+  uint8_t buf_p[64];
+
+  if (length <= 64)
+  {
+    lexer_convert_ident_to_cesu8 (buf_p, left_p->char_p, length);
+    return lexer_compare_identifier_to_chars (right_p->char_p, buf_p, length);
+  }
+
+  uint8_t *dynamic_buf_p = parser_malloc (context_p, length);
+
+  lexer_convert_ident_to_cesu8 (dynamic_buf_p, left_p->char_p, length);
+  bool result = lexer_compare_identifier_to_chars (right_p->char_p, dynamic_buf_p, length);
+  parser_free (dynamic_buf_p, length);
+
+  return result;
 } /* lexer_compare_identifiers */

 /**
@@ -2818,7 +3052,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */

  if (left_ident_p->length != right_ident_p->length)
  {
-    return 0;
+    return false;
  }

  if (!left_ident_p->has_escape && !right_ident_p->has_escape)
@@ -2826,7 +3060,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */
    return memcmp (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length) == 0;
  }

-  return lexer_compare_identifiers (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length);
+  return lexer_compare_identifiers (context_p, left_ident_p, right_ident_p);
 } /* lexer_current_is_literal */

 #if ENABLED (JERRY_ES2015)
@@ -637,8 +637,7 @@ bool lexer_check_yield_no_arg (parser_context_t *context_p);
 void lexer_parse_string (parser_context_t *context_p);
 void lexer_expect_identifier (parser_context_t *context_p, uint8_t literal_type);
 void lexer_scan_identifier (parser_context_t *context_p, uint32_t ident_opts);
-ecma_char_t lexer_hex_to_character (parser_context_t *context_p, const uint8_t *source_p, int length);
-void lexer_convert_ident_to_cesu8 (const uint8_t *source_p, uint8_t *destination_p, prop_length_t length);
+void lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length);
 void lexer_expect_object_literal_id (parser_context_t *context_p, uint32_t ident_opts);
 void lexer_construct_literal_object (parser_context_t *context_p, const lexer_lit_location_t *literal_p,
                                     uint8_t literal_type);
@@ -646,7 +645,9 @@ bool lexer_construct_number_object (parser_context_t *context_p, bool is_expr, b
 void lexer_convert_push_number_to_push_literal (parser_context_t *context_p);
 uint16_t lexer_construct_function_object (parser_context_t *context_p, uint32_t extra_status_flags);
 void lexer_construct_regexp_object (parser_context_t *context_p, bool parse_only);
-bool lexer_compare_identifiers (const uint8_t *left_p, const uint8_t *right_p, size_t size);
+bool lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, const uint8_t *right_p, size_t size);
+bool lexer_compare_identifiers (parser_context_t *context_p, const lexer_lit_location_t *left_p,
+                                const lexer_lit_location_t *right_p);
 bool lexer_current_is_literal (parser_context_t *context_p, const lexer_lit_location_t *right_ident_p);
 #if ENABLED (JERRY_ES2015)
 bool lexer_token_is_identifier (parser_context_t *context_p, const char *identifier_p,
@@ -434,8 +434,7 @@ JERRY_STATIC_ASSERT (PARSER_MAXIMUM_IDENT_LENGTH <= UINT8_MAX,
 static inline bool JERRY_ATTR_ALWAYS_INLINE
 scanner_literal_is_arguments (lexer_lit_location_t *literal_p) /**< literal */
 {
-  return (literal_p->length == 9
-          && lexer_compare_identifiers (literal_p->char_p, (const uint8_t *) "arguments", 9));
+  return lexer_compare_identifier_to_string (literal_p, (const uint8_t *) "arguments", 9);
 } /* scanner_literal_is_arguments */

 /**
@@ -986,7 +985,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */
            return literal_p;
          }
        }
-        else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
+        else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
        {
          /* The non-escaped version is preferred. */
          literal_p->char_p = char_p;
@@ -1000,8 +999,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */
  {
    while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
    {
-      if (literal_p->length == length
-          && lexer_compare_identifiers (literal_p->char_p, char_p, length))
+      if (lexer_compare_identifiers (context_p, literal_p, literal_location_p))
      {
        return literal_p;
      }
@@ -1065,10 +1063,11 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
  scanner_literal_pool_t *literal_pool_p = scanner_context_p->active_literal_pool_p;
  parser_list_iterator_t literal_iterator;
  parser_list_iterator_init (&literal_pool_p->literal_pool, &literal_iterator);
+  lexer_lit_location_t *literal_location_p = &context_p->token.lit_location;
  lexer_lit_location_t *literal_p;

-  const uint8_t *char_p = context_p->token.lit_location.char_p;
-  prop_length_t length = context_p->token.lit_location.length;
+  const uint8_t *char_p = literal_location_p->char_p;
+  prop_length_t length = literal_location_p->length;

  if (JERRY_LIKELY (!context_p->token.lit_location.has_escape))
  {
@@ -1084,7 +1083,7 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
            break;
          }
        }
-        else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
+        else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
        {
          literal_p->length = 0;
          break;
@@ -1096,8 +1095,7 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
  {
    while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
    {
-      if (literal_p->length == length
-          && lexer_compare_identifiers (literal_p->char_p, char_p, length))
+      if (lexer_compare_identifiers (context_p, literal_p, literal_location_p))
      {
        literal_p->length = 0;
        break;
@@ -1118,8 +1116,7 @@ void
 scanner_detect_eval_call (parser_context_t *context_p, /**< context */
                          scanner_context_t *scanner_context_p) /**< scanner context */
 {
-  if (context_p->token.lit_location.length == 4
-      && lexer_compare_identifiers (context_p->token.lit_location.char_p, (const uint8_t *) "eval", 4)
+  if (lexer_compare_identifier_to_string (&context_p->token.lit_location, (const uint8_t *) "eval", 4)
      && lexer_check_next_character (context_p, LIT_CHAR_LEFT_PAREN))
  {
    scanner_context_p->active_literal_pool_p->status_flags |= SCANNER_LITERAL_POOL_NO_REG;
@@ -1147,7 +1144,7 @@ scanner_scope_find_let_declaration (parser_context_t *context_p, /**< context */
  {
    uint8_t *destination_p = (uint8_t *) scanner_malloc (context_p, literal_p->length);

-    lexer_convert_ident_to_cesu8 (literal_p->char_p, destination_p, literal_p->length);
+    lexer_convert_ident_to_cesu8 (destination_p, literal_p->char_p, literal_p->length);

    name_p = ecma_new_ecma_string_from_utf8 (destination_p, literal_p->length);
    scanner_free (destination_p, literal_p->length);
@@ -1231,7 +1228,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */
              return;
            }
          }
-          else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
+          else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
          {
            scanner_raise_redeclaration_error (context_p);
            return;
@@ -1246,8 +1243,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */
        if (literal_p->type & SCANNER_LITERAL_IS_LOCAL
            && !(literal_p->type & SCANNER_LITERAL_IS_ARG)
            && (literal_p->type & SCANNER_LITERAL_IS_LOCAL) != SCANNER_LITERAL_IS_LOCAL
-            && literal_p->length == length
-            && lexer_compare_identifiers (literal_p->char_p, char_p, length))
+            && lexer_compare_identifiers (context_p, literal_p, var_literal_p))
        {
          scanner_raise_redeclaration_error (context_p);
          return;
@@ -376,8 +376,7 @@ scanner_handle_bracket (parser_context_t *context_p, /**< context */
        arrow_source_p = NULL;
 #endif /* ENABLED (JERRY_ES2015) */

-        if (context_p->token.lit_location.length == 4
-            && lexer_compare_identifiers (context_p->token.lit_location.char_p, (const uint8_t *) "eval", 4))
+        if (lexer_compare_identifier_to_string (&context_p->token.lit_location, (const uint8_t *) "eval", 4))
        {
          scanner_context_p->active_literal_pool_p->status_flags |= SCANNER_LITERAL_POOL_NO_REG;
        }
@@ -272,7 +272,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
  const bool is_char_class = (re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS
                              || re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS);

-  const ecma_char_t prev_char = lit_utf8_peek_prev (parser_ctx_p->input_curr_p);
+  const ecma_char_t prev_char = lit_cesu8_peek_prev (parser_ctx_p->input_curr_p);
  if (prev_char != LIT_CHAR_LEFT_SQUARE && prev_char != LIT_CHAR_CIRCUMFLEX)
  {
    lit_utf8_decr (&parser_ctx_p->input_curr_p);
@@ -286,7 +286,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
      return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
    }

-    lit_code_point_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
+    lit_code_point_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);

    if (ch == LIT_CHAR_RIGHT_SQUARE)
    {
@@ -318,7 +318,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
        return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\'"));
      }

-      ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
+      ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);

      if (ch == LIT_CHAR_LOWERCASE_B)
      {
@@ -376,7 +376,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
        parser_ctx_p->input_curr_p += 2;
        if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
            && is_range == false
-            && lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
+            && lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
        {
          start = code_unit;
          continue;
@@ -396,7 +396,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
        parser_ctx_p->input_curr_p += 4;
        if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
            && is_range == false
-            && lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
+            && lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
        {
          start = code_unit;
          continue;
@@ -481,7 +481,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
        && lit_is_code_point_utf16_high_surrogate (ch)
        && parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
    {
-      const ecma_char_t next_ch = lit_utf8_peek_next (parser_ctx_p->input_curr_p);
+      const ecma_char_t next_ch = lit_cesu8_peek_next (parser_ctx_p->input_curr_p);
      if (lit_is_code_point_utf16_low_surrogate (next_ch))
      {
        ch = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) ch, next_ch);
@@ -315,7 +315,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
    return ret_value;
  }

-  ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
+  ecma_char_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);

  switch (ch)
  {
@@ -348,7 +348,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
      }

      out_token_p->type = RE_TOK_CHAR;
-      ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
+      ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);

      if (ch == LIT_CHAR_LOWERCASE_B)
      {