Implement tagged template literals (#3456)

Missing features: snapshot support JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
2019-12-19 19:10:45 +01:00
parent 7bfbc701d8
commit 9596a7e1d6
28 changed files with 1229 additions and 314 deletions
@@ -874,8 +874,15 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
 * Parse string.
 */
 void
-lexer_parse_string (parser_context_t *context_p) /**< context */
+lexer_parse_string (parser_context_t *context_p, /**< context */
+                    lexer_string_options_t opts) /**< options */
 {
+#if ENABLED (JERRY_ES2015)
+  const size_t raw_length_inc = (opts & LEXER_STRING_RAW) ? 1 : 0;
+#else /* ENABLED (JERRY_ES2015) */
+  JERRY_UNUSED (opts);
+#endif /* ENABLED (JERRY_ES2015) */
+
  uint8_t str_end_character = context_p->source_p[0];
  const uint8_t *source_p = context_p->source_p + 1;
  const uint8_t *string_start_p = source_p;
@@ -918,6 +925,10 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
        continue;
      }

+#if ENABLED (JERRY_ES2015)
+      length += raw_length_inc;
+#endif /* ENABLED (JERRY_ES2015) */
+
      has_escape = true;

      /* Newline is ignored. */
@@ -931,6 +942,9 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
        }

        line++;
+#if ENABLED (JERRY_ES2015)
+        length += raw_length_inc;
+#endif /* ENABLED (JERRY_ES2015) */
        column = 1;
        continue;
      }
@@ -938,6 +952,9 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
      {
        source_p++;
        line++;
+#if ENABLED (JERRY_ES2015)
+        length += raw_length_inc;
+#endif /* ENABLED (JERRY_ES2015) */
        column = 1;
        continue;
      }
@@ -949,6 +966,13 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
        continue;
      }

+#if ENABLED (JERRY_ES2015)
+      if (opts & LEXER_STRING_RAW)
+      {
+        continue;
+      }
+#endif /* ENABLED (JERRY_ES2015) */
+
      if (*source_p == LIT_CHAR_0
          && source_p + 1 < source_end_p
          && (*(source_p + 1) < LIT_CHAR_0 || *(source_p + 1) > LIT_CHAR_9))
@@ -1086,16 +1110,17 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
 #if ENABLED (JERRY_ES2015)
    else if (str_end_character == LIT_CHAR_GRAVE_ACCENT)
    {
-      /* Newline (without backslash) is part of the string. */
+      /* Newline (without backslash) is part of the string.
+         Note: ECMAScript v6, 11.8.6.1 <CR> or <CR><LF> are both normalized to <LF> */
      if (*source_p == LIT_CHAR_CR)
      {
+        has_escape = true;
        source_p++;
        length++;
        if (source_p < source_end_p
            && *source_p == LIT_CHAR_LF)
        {
          source_p++;
-          length++;
        }
        line++;
        column = 1;
@@ -1658,7 +1683,7 @@ lexer_next_token (parser_context_t *context_p) /**< context */
    case LIT_CHAR_GRAVE_ACCENT:
 #endif /* ENABLED (JERRY_ES2015) */
    {
-      lexer_parse_string (context_p);
+      lexer_parse_string (context_p, LEXER_STRING_NO_OPTS);
      return;
    }

@@ -1931,11 +1956,6 @@ lexer_process_char_literal (parser_context_t *context_p, /**< context */
  context_p->literal_count++;
 } /* lexer_process_char_literal */

-/**
- * Maximum local buffer size for identifiers which contains escape sequences.
- */
-#define LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE 48
-
 /**
 * Convert an ident with escapes to a utf8 string.
 */
@@ -1974,6 +1994,238 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
  while (destination_p < destination_end_p);
 } /* lexer_convert_ident_to_cesu8 */

+/**
+ * Convert literal to character sequence
+ */
+const uint8_t *
+lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
+                                const lexer_lit_location_t *literal_p, /**< literal location */
+                                uint8_t *local_byte_array_p, /**< local byte array to store chars */
+                                lexer_string_options_t opts) /**< options */
+{
+  JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
+
+  if (!literal_p->has_escape)
+  {
+    return literal_p->char_p;
+  }
+
+  uint8_t *destination_start_p;
+  if (literal_p->length > LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE)
+  {
+    context_p->u.allocated_buffer_p = (uint8_t *) parser_malloc_local (context_p, literal_p->length);
+    context_p->allocated_buffer_size = literal_p->length;
+    destination_start_p = context_p->u.allocated_buffer_p;
+  }
+  else
+  {
+    destination_start_p = local_byte_array_p;
+  }
+
+  if (literal_p->type == LEXER_IDENT_LITERAL)
+  {
+    lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length);
+    return destination_start_p;
+  }
+
+  const uint8_t *source_p = literal_p->char_p;
+  uint8_t *destination_p = destination_start_p;
+
+  uint8_t str_end_character = source_p[-1];
+
+#if ENABLED (JERRY_ES2015)
+  if (str_end_character == LIT_CHAR_RIGHT_BRACE)
+  {
+    str_end_character = LIT_CHAR_GRAVE_ACCENT;
+  }
+
+  bool is_raw = (opts & LEXER_STRING_RAW) != 0;
+#else /* !ENABLED (JERRY_ES2015) */
+  JERRY_UNUSED (opts);
+  bool is_raw = false;
+#endif /* ENABLED (JERRY_ES2015) */
+
+  while (true)
+  {
+    if (*source_p == str_end_character)
+    {
+      break;
+    }
+
+    if (*source_p == LIT_CHAR_BACKSLASH && !is_raw)
+    {
+      uint8_t conv_character;
+
+      source_p++;
+      JERRY_ASSERT (source_p < context_p->source_end_p);
+
+      /* Newline is ignored. */
+      if (*source_p == LIT_CHAR_CR)
+      {
+        source_p++;
+        JERRY_ASSERT (source_p < context_p->source_end_p);
+
+        if (*source_p == LIT_CHAR_LF)
+        {
+          source_p++;
+        }
+        continue;
+      }
+      else if (*source_p == LIT_CHAR_LF)
+      {
+        source_p++;
+        continue;
+      }
+      else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
+      {
+        source_p += 3;
+        continue;
+      }
+
+      if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
+      {
+        lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
+
+        source_p++;
+        JERRY_ASSERT (source_p < context_p->source_end_p);
+
+        if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
+        {
+          octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
+          source_p++;
+          JERRY_ASSERT (source_p < context_p->source_end_p);
+
+          if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
+          {
+            octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
+            source_p++;
+            JERRY_ASSERT (source_p < context_p->source_end_p);
+          }
+        }
+
+        destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
+        continue;
+      }
+
+      if (*source_p >= LIT_CHAR_4 && *source_p <= LIT_CHAR_7)
+      {
+        uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
+
+        source_p++;
+        JERRY_ASSERT (source_p < context_p->source_end_p);
+
+        if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
+        {
+          octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
+          source_p++;
+          JERRY_ASSERT (source_p < context_p->source_end_p);
+        }
+
+        *destination_p++ = (uint8_t) octal_number;
+        continue;
+      }
+
+      if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
+      {
+        source_p++;
+        destination_p += lit_code_point_to_cesu8_bytes (destination_p,
+                                                        lexer_unchecked_hex_to_character (&source_p));
+        continue;
+      }
+
+      conv_character = *source_p;
+      switch (*source_p)
+      {
+        case LIT_CHAR_LOWERCASE_B:
+        {
+          conv_character = 0x08;
+          break;
+        }
+        case LIT_CHAR_LOWERCASE_T:
+        {
+          conv_character = 0x09;
+          break;
+        }
+        case LIT_CHAR_LOWERCASE_N:
+        {
+          conv_character = 0x0a;
+          break;
+        }
+        case LIT_CHAR_LOWERCASE_V:
+        {
+          conv_character = 0x0b;
+          break;
+        }
+        case LIT_CHAR_LOWERCASE_F:
+        {
+          conv_character = 0x0c;
+          break;
+        }
+        case LIT_CHAR_LOWERCASE_R:
+        {
+          conv_character = 0x0d;
+          break;
+        }
+      }
+
+      if (conv_character != *source_p)
+      {
+        *destination_p++ = conv_character;
+        source_p++;
+        continue;
+      }
+    }
+#if ENABLED (JERRY_ES2015)
+    else if (str_end_character == LIT_CHAR_GRAVE_ACCENT)
+    {
+      if (source_p[0] == LIT_CHAR_DOLLAR_SIGN
+          && source_p[1] == LIT_CHAR_LEFT_BRACE)
+      {
+        source_p++;
+        JERRY_ASSERT (source_p < context_p->source_end_p);
+        break;
+      }
+      if (*source_p == LIT_CHAR_CR)
+      {
+        *destination_p++ = LIT_CHAR_LF;
+        source_p++;
+        if (*source_p != str_end_character
+            && *source_p == LIT_CHAR_LF)
+        {
+          source_p++;
+        }
+        continue;
+      }
+    }
+#endif /* ENABLED (JERRY_ES2015) */
+
+    if (*source_p >= LEXER_UTF8_4BYTE_START)
+    {
+      /* Processing 4 byte unicode sequence (even if it is
+        * after a backslash). Always converted to two 3 byte
+        * long sequence. */
+      lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
+
+      destination_p += 6;
+      source_p += 4;
+      continue;
+    }
+
+    *destination_p++ = *source_p++;
+
+    /* There is no need to check the source_end_p
+      * since the string is terminated by a quotation mark. */
+    while (IS_UTF8_INTERMEDIATE_OCTET (*source_p))
+    {
+      *destination_p++ = *source_p++;
+    }
+  }
+
+  JERRY_ASSERT (destination_p == destination_start_p + literal_p->length);
+
+  return destination_start_p;
+} /* lexer_convert_literal_to_chars */
+
 /**
 * Construct a literal object from an identifier.
 */
@@ -1982,213 +2234,11 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
                                const lexer_lit_location_t *literal_p, /**< literal location */
                                uint8_t literal_type) /**< final literal type */
 {
-  uint8_t *destination_start_p;
-  const uint8_t *source_p;
  uint8_t local_byte_array[LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE];
-
-  JERRY_ASSERT (literal_p->type == LEXER_IDENT_LITERAL
-                || literal_p->type == LEXER_STRING_LITERAL);
-  JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
-
-  destination_start_p = local_byte_array;
-  source_p = literal_p->char_p;
-
-  if (literal_p->has_escape)
-  {
-    uint8_t *destination_p;
-
-    if (literal_p->length > LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE)
-    {
-      destination_start_p = (uint8_t *) parser_malloc_local (context_p, literal_p->length);
-      context_p->u.allocated_buffer_p = destination_start_p;
-      context_p->allocated_buffer_size = literal_p->length;
-    }
-
-    destination_p = destination_start_p;
-
-    if (literal_p->type == LEXER_IDENT_LITERAL)
-    {
-      lexer_convert_ident_to_cesu8 (destination_start_p, source_p, literal_p->length);
-    }
-    else
-    {
-      uint8_t str_end_character = source_p[-1];
-
-#if ENABLED (JERRY_ES2015)
-      if (str_end_character == LIT_CHAR_RIGHT_BRACE)
-      {
-        str_end_character = LIT_CHAR_GRAVE_ACCENT;
-      }
-#endif /* ENABLED (JERRY_ES2015) */
-
-      while (true)
-      {
-        if (*source_p == str_end_character)
-        {
-          break;
-        }
-
-        if (*source_p == LIT_CHAR_BACKSLASH)
-        {
-          uint8_t conv_character;
-
-          source_p++;
-          JERRY_ASSERT (source_p < context_p->source_end_p);
-
-          /* Newline is ignored. */
-          if (*source_p == LIT_CHAR_CR)
-          {
-            source_p++;
-            JERRY_ASSERT (source_p < context_p->source_end_p);
-
-            if (*source_p == LIT_CHAR_LF)
-            {
-              source_p++;
-            }
-            continue;
-          }
-          else if (*source_p == LIT_CHAR_LF)
-          {
-            source_p++;
-            continue;
-          }
-          else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
-          {
-            source_p += 3;
-            continue;
-          }
-
-          if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
-          {
-            lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
-
-            source_p++;
-            JERRY_ASSERT (source_p < context_p->source_end_p);
-
-            if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
-            {
-              octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
-              source_p++;
-              JERRY_ASSERT (source_p < context_p->source_end_p);
-
-              if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
-              {
-                octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
-                source_p++;
-                JERRY_ASSERT (source_p < context_p->source_end_p);
-              }
-            }
-
-            destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
-            continue;
-          }
-
-          if (*source_p >= LIT_CHAR_4 && *source_p <= LIT_CHAR_7)
-          {
-            uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
-
-            source_p++;
-            JERRY_ASSERT (source_p < context_p->source_end_p);
-
-            if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
-            {
-              octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
-              source_p++;
-              JERRY_ASSERT (source_p < context_p->source_end_p);
-            }
-
-            *destination_p++ = (uint8_t) octal_number;
-            continue;
-          }
-
-          if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
-          {
-            source_p++;
-            destination_p += lit_code_point_to_cesu8_bytes (destination_p,
-                                                            lexer_unchecked_hex_to_character (&source_p));
-            continue;
-          }
-
-          conv_character = *source_p;
-          switch (*source_p)
-          {
-            case LIT_CHAR_LOWERCASE_B:
-            {
-              conv_character = 0x08;
-              break;
-            }
-            case LIT_CHAR_LOWERCASE_T:
-            {
-              conv_character = 0x09;
-              break;
-            }
-            case LIT_CHAR_LOWERCASE_N:
-            {
-              conv_character = 0x0a;
-              break;
-            }
-            case LIT_CHAR_LOWERCASE_V:
-            {
-              conv_character = 0x0b;
-              break;
-            }
-            case LIT_CHAR_LOWERCASE_F:
-            {
-              conv_character = 0x0c;
-              break;
-            }
-            case LIT_CHAR_LOWERCASE_R:
-            {
-              conv_character = 0x0d;
-              break;
-            }
-          }
-
-          if (conv_character != *source_p)
-          {
-            *destination_p++ = conv_character;
-            source_p++;
-            continue;
-          }
-        }
-#if ENABLED (JERRY_ES2015)
-        else if (str_end_character == LIT_CHAR_GRAVE_ACCENT
-                 && source_p[0] == LIT_CHAR_DOLLAR_SIGN
-                 && source_p[1] == LIT_CHAR_LEFT_BRACE)
-        {
-          source_p++;
-          JERRY_ASSERT (source_p < context_p->source_end_p);
-          break;
-        }
-#endif /* ENABLED (JERRY_ES2015) */
-
-        if (*source_p >= LEXER_UTF8_4BYTE_START)
-        {
-          /* Processing 4 byte unicode sequence (even if it is
-           * after a backslash). Always converted to two 3 byte
-           * long sequence. */
-          lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
-
-          destination_p += 6;
-          source_p += 4;
-          continue;
-        }
-
-        *destination_p++ = *source_p++;
-
-        /* There is no need to check the source_end_p
-         * since the string is terminated by a quotation mark. */
-        while (IS_UTF8_INTERMEDIATE_OCTET (*source_p))
-        {
-          *destination_p++ = *source_p++;
-        }
-      }
-
-      JERRY_ASSERT (destination_p == destination_start_p + literal_p->length);
-    }
-
-    source_p = destination_start_p;
-  }
+  const uint8_t *source_p = lexer_convert_literal_to_chars (context_p,
+                                                            literal_p,
+                                                            local_byte_array,
+                                                            LEXER_STRING_NO_OPTS);

  lexer_process_char_literal (context_p,
                              source_p,
@@ -2196,20 +2246,10 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
                              literal_type,
                              literal_p->has_escape);

-  if (destination_start_p != local_byte_array)
-  {
-    JERRY_ASSERT (context_p->u.allocated_buffer_p == destination_start_p);
-
-    context_p->u.allocated_buffer_p = NULL;
-    parser_free_local (destination_start_p,
-                       context_p->allocated_buffer_size);
-  }
-
+  parser_free_allocated_buffer (context_p);
  JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
 } /* lexer_construct_literal_object */

-#undef LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE
-
 /**
 * Construct a number object.
 *
@@ -2784,7 +2824,7 @@ lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */
      case LIT_CHAR_DOUBLE_QUOTE:
      case LIT_CHAR_SINGLE_QUOTE:
      {
-        lexer_parse_string (context_p);
+        lexer_parse_string (context_p, LEXER_STRING_NO_OPTS);
        create_literal_object = true;
        break;
      }