Implement tagged template literals (#3456)

Missing features: snapshot support

JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
Robert Fancsik
2019-12-19 19:10:45 +01:00
committed by GitHub
parent 7bfbc701d8
commit 9596a7e1d6
28 changed files with 1229 additions and 314 deletions
+267 -227
View File
@@ -874,8 +874,15 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
* Parse string.
*/
void
lexer_parse_string (parser_context_t *context_p) /**< context */
lexer_parse_string (parser_context_t *context_p, /**< context */
lexer_string_options_t opts) /**< options */
{
#if ENABLED (JERRY_ES2015)
const size_t raw_length_inc = (opts & LEXER_STRING_RAW) ? 1 : 0;
#else /* ENABLED (JERRY_ES2015) */
JERRY_UNUSED (opts);
#endif /* ENABLED (JERRY_ES2015) */
uint8_t str_end_character = context_p->source_p[0];
const uint8_t *source_p = context_p->source_p + 1;
const uint8_t *string_start_p = source_p;
@@ -918,6 +925,10 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
continue;
}
#if ENABLED (JERRY_ES2015)
length += raw_length_inc;
#endif /* ENABLED (JERRY_ES2015) */
has_escape = true;
/* Newline is ignored. */
@@ -931,6 +942,9 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
}
line++;
#if ENABLED (JERRY_ES2015)
length += raw_length_inc;
#endif /* ENABLED (JERRY_ES2015) */
column = 1;
continue;
}
@@ -938,6 +952,9 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
{
source_p++;
line++;
#if ENABLED (JERRY_ES2015)
length += raw_length_inc;
#endif /* ENABLED (JERRY_ES2015) */
column = 1;
continue;
}
@@ -949,6 +966,13 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
continue;
}
#if ENABLED (JERRY_ES2015)
if (opts & LEXER_STRING_RAW)
{
continue;
}
#endif /* ENABLED (JERRY_ES2015) */
if (*source_p == LIT_CHAR_0
&& source_p + 1 < source_end_p
&& (*(source_p + 1) < LIT_CHAR_0 || *(source_p + 1) > LIT_CHAR_9))
@@ -1086,16 +1110,17 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
#if ENABLED (JERRY_ES2015)
else if (str_end_character == LIT_CHAR_GRAVE_ACCENT)
{
/* Newline (without backslash) is part of the string. */
/* Newline (without backslash) is part of the string.
Note: ECMAScript v6, 11.8.6.1 <CR> or <CR><LF> are both normalized to <LF> */
if (*source_p == LIT_CHAR_CR)
{
has_escape = true;
source_p++;
length++;
if (source_p < source_end_p
&& *source_p == LIT_CHAR_LF)
{
source_p++;
length++;
}
line++;
column = 1;
@@ -1658,7 +1683,7 @@ lexer_next_token (parser_context_t *context_p) /**< context */
case LIT_CHAR_GRAVE_ACCENT:
#endif /* ENABLED (JERRY_ES2015) */
{
lexer_parse_string (context_p);
lexer_parse_string (context_p, LEXER_STRING_NO_OPTS);
return;
}
@@ -1931,11 +1956,6 @@ lexer_process_char_literal (parser_context_t *context_p, /**< context */
context_p->literal_count++;
} /* lexer_process_char_literal */
/**
* Maximum local buffer size for identifiers which contains escape sequences.
*/
#define LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE 48
/**
* Convert an ident with escapes to a utf8 string.
*/
@@ -1974,6 +1994,238 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
while (destination_p < destination_end_p);
} /* lexer_convert_ident_to_cesu8 */
/**
* Convert literal to character sequence
*/
const uint8_t *
lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
const lexer_lit_location_t *literal_p, /**< literal location */
uint8_t *local_byte_array_p, /**< local byte array to store chars */
lexer_string_options_t opts) /**< options */
{
JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
if (!literal_p->has_escape)
{
return literal_p->char_p;
}
uint8_t *destination_start_p;
if (literal_p->length > LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE)
{
context_p->u.allocated_buffer_p = (uint8_t *) parser_malloc_local (context_p, literal_p->length);
context_p->allocated_buffer_size = literal_p->length;
destination_start_p = context_p->u.allocated_buffer_p;
}
else
{
destination_start_p = local_byte_array_p;
}
if (literal_p->type == LEXER_IDENT_LITERAL)
{
lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length);
return destination_start_p;
}
const uint8_t *source_p = literal_p->char_p;
uint8_t *destination_p = destination_start_p;
uint8_t str_end_character = source_p[-1];
#if ENABLED (JERRY_ES2015)
if (str_end_character == LIT_CHAR_RIGHT_BRACE)
{
str_end_character = LIT_CHAR_GRAVE_ACCENT;
}
bool is_raw = (opts & LEXER_STRING_RAW) != 0;
#else /* !ENABLED (JERRY_ES2015) */
JERRY_UNUSED (opts);
bool is_raw = false;
#endif /* ENABLED (JERRY_ES2015) */
while (true)
{
if (*source_p == str_end_character)
{
break;
}
if (*source_p == LIT_CHAR_BACKSLASH && !is_raw)
{
uint8_t conv_character;
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
/* Newline is ignored. */
if (*source_p == LIT_CHAR_CR)
{
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p == LIT_CHAR_LF)
{
source_p++;
}
continue;
}
else if (*source_p == LIT_CHAR_LF)
{
source_p++;
continue;
}
else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
{
source_p += 3;
continue;
}
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
{
lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
{
octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
{
octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
}
}
destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
continue;
}
if (*source_p >= LIT_CHAR_4 && *source_p <= LIT_CHAR_7)
{
uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
{
octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
}
*destination_p++ = (uint8_t) octal_number;
continue;
}
if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
{
source_p++;
destination_p += lit_code_point_to_cesu8_bytes (destination_p,
lexer_unchecked_hex_to_character (&source_p));
continue;
}
conv_character = *source_p;
switch (*source_p)
{
case LIT_CHAR_LOWERCASE_B:
{
conv_character = 0x08;
break;
}
case LIT_CHAR_LOWERCASE_T:
{
conv_character = 0x09;
break;
}
case LIT_CHAR_LOWERCASE_N:
{
conv_character = 0x0a;
break;
}
case LIT_CHAR_LOWERCASE_V:
{
conv_character = 0x0b;
break;
}
case LIT_CHAR_LOWERCASE_F:
{
conv_character = 0x0c;
break;
}
case LIT_CHAR_LOWERCASE_R:
{
conv_character = 0x0d;
break;
}
}
if (conv_character != *source_p)
{
*destination_p++ = conv_character;
source_p++;
continue;
}
}
#if ENABLED (JERRY_ES2015)
else if (str_end_character == LIT_CHAR_GRAVE_ACCENT)
{
if (source_p[0] == LIT_CHAR_DOLLAR_SIGN
&& source_p[1] == LIT_CHAR_LEFT_BRACE)
{
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
break;
}
if (*source_p == LIT_CHAR_CR)
{
*destination_p++ = LIT_CHAR_LF;
source_p++;
if (*source_p != str_end_character
&& *source_p == LIT_CHAR_LF)
{
source_p++;
}
continue;
}
}
#endif /* ENABLED (JERRY_ES2015) */
if (*source_p >= LEXER_UTF8_4BYTE_START)
{
/* Processing 4 byte unicode sequence (even if it is
* after a backslash). Always converted to two 3 byte
* long sequence. */
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
destination_p += 6;
source_p += 4;
continue;
}
*destination_p++ = *source_p++;
/* There is no need to check the source_end_p
* since the string is terminated by a quotation mark. */
while (IS_UTF8_INTERMEDIATE_OCTET (*source_p))
{
*destination_p++ = *source_p++;
}
}
JERRY_ASSERT (destination_p == destination_start_p + literal_p->length);
return destination_start_p;
} /* lexer_convert_literal_to_chars */
/**
* Construct a literal object from an identifier.
*/
@@ -1982,213 +2234,11 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
const lexer_lit_location_t *literal_p, /**< literal location */
uint8_t literal_type) /**< final literal type */
{
uint8_t *destination_start_p;
const uint8_t *source_p;
uint8_t local_byte_array[LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE];
JERRY_ASSERT (literal_p->type == LEXER_IDENT_LITERAL
|| literal_p->type == LEXER_STRING_LITERAL);
JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
destination_start_p = local_byte_array;
source_p = literal_p->char_p;
if (literal_p->has_escape)
{
uint8_t *destination_p;
if (literal_p->length > LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE)
{
destination_start_p = (uint8_t *) parser_malloc_local (context_p, literal_p->length);
context_p->u.allocated_buffer_p = destination_start_p;
context_p->allocated_buffer_size = literal_p->length;
}
destination_p = destination_start_p;
if (literal_p->type == LEXER_IDENT_LITERAL)
{
lexer_convert_ident_to_cesu8 (destination_start_p, source_p, literal_p->length);
}
else
{
uint8_t str_end_character = source_p[-1];
#if ENABLED (JERRY_ES2015)
if (str_end_character == LIT_CHAR_RIGHT_BRACE)
{
str_end_character = LIT_CHAR_GRAVE_ACCENT;
}
#endif /* ENABLED (JERRY_ES2015) */
while (true)
{
if (*source_p == str_end_character)
{
break;
}
if (*source_p == LIT_CHAR_BACKSLASH)
{
uint8_t conv_character;
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
/* Newline is ignored. */
if (*source_p == LIT_CHAR_CR)
{
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p == LIT_CHAR_LF)
{
source_p++;
}
continue;
}
else if (*source_p == LIT_CHAR_LF)
{
source_p++;
continue;
}
else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
{
source_p += 3;
continue;
}
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
{
lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
{
octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
{
octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
}
}
destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
continue;
}
if (*source_p >= LIT_CHAR_4 && *source_p <= LIT_CHAR_7)
{
uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
{
octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
}
*destination_p++ = (uint8_t) octal_number;
continue;
}
if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
{
source_p++;
destination_p += lit_code_point_to_cesu8_bytes (destination_p,
lexer_unchecked_hex_to_character (&source_p));
continue;
}
conv_character = *source_p;
switch (*source_p)
{
case LIT_CHAR_LOWERCASE_B:
{
conv_character = 0x08;
break;
}
case LIT_CHAR_LOWERCASE_T:
{
conv_character = 0x09;
break;
}
case LIT_CHAR_LOWERCASE_N:
{
conv_character = 0x0a;
break;
}
case LIT_CHAR_LOWERCASE_V:
{
conv_character = 0x0b;
break;
}
case LIT_CHAR_LOWERCASE_F:
{
conv_character = 0x0c;
break;
}
case LIT_CHAR_LOWERCASE_R:
{
conv_character = 0x0d;
break;
}
}
if (conv_character != *source_p)
{
*destination_p++ = conv_character;
source_p++;
continue;
}
}
#if ENABLED (JERRY_ES2015)
else if (str_end_character == LIT_CHAR_GRAVE_ACCENT
&& source_p[0] == LIT_CHAR_DOLLAR_SIGN
&& source_p[1] == LIT_CHAR_LEFT_BRACE)
{
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
break;
}
#endif /* ENABLED (JERRY_ES2015) */
if (*source_p >= LEXER_UTF8_4BYTE_START)
{
/* Processing 4 byte unicode sequence (even if it is
* after a backslash). Always converted to two 3 byte
* long sequence. */
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
destination_p += 6;
source_p += 4;
continue;
}
*destination_p++ = *source_p++;
/* There is no need to check the source_end_p
* since the string is terminated by a quotation mark. */
while (IS_UTF8_INTERMEDIATE_OCTET (*source_p))
{
*destination_p++ = *source_p++;
}
}
JERRY_ASSERT (destination_p == destination_start_p + literal_p->length);
}
source_p = destination_start_p;
}
const uint8_t *source_p = lexer_convert_literal_to_chars (context_p,
literal_p,
local_byte_array,
LEXER_STRING_NO_OPTS);
lexer_process_char_literal (context_p,
source_p,
@@ -2196,20 +2246,10 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
literal_type,
literal_p->has_escape);
if (destination_start_p != local_byte_array)
{
JERRY_ASSERT (context_p->u.allocated_buffer_p == destination_start_p);
context_p->u.allocated_buffer_p = NULL;
parser_free_local (destination_start_p,
context_p->allocated_buffer_size);
}
parser_free_allocated_buffer (context_p);
JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
} /* lexer_construct_literal_object */
#undef LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE
/**
* Construct a number object.
*
@@ -2784,7 +2824,7 @@ lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */
case LIT_CHAR_DOUBLE_QUOTE:
case LIT_CHAR_SINGLE_QUOTE:
{
lexer_parse_string (context_p);
lexer_parse_string (context_p, LEXER_STRING_NO_OPTS);
create_literal_object = true;
break;
}