Add core unicode functionality.
Add utf-8 processing routines. Change ecma_char_t from char/uint16_t to uint16_t. Apply all utf-8 processing routines. Change char to jerry_api_char in API functions' declarations. JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
This commit is contained in:
@@ -15,14 +15,10 @@
|
||||
*/
|
||||
|
||||
#include "ecma-helpers.h"
|
||||
#include "ecma-exceptions.h"
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "jsp-mm.h"
|
||||
#include "lexer.h"
|
||||
#include "mem-allocator.h"
|
||||
#include "opcodes.h"
|
||||
#include "parser.h"
|
||||
#include "stack.h"
|
||||
#include "lit-magic-strings.h"
|
||||
#include "syntax-errors.h"
|
||||
|
||||
static token saved_token, prev_token, sent_token, empty_token;
|
||||
@@ -31,9 +27,9 @@ static bool allow_dump_lines = false, strict_mode;
|
||||
static size_t buffer_size = 0;
|
||||
|
||||
/* Represents the contents of a script. */
|
||||
static const char *buffer_start = NULL;
|
||||
static const char *buffer = NULL;
|
||||
static const char *token_start;
|
||||
static const jerry_api_char_t *buffer_start = NULL;
|
||||
static const jerry_api_char_t *buffer = NULL;
|
||||
static const jerry_api_char_t *token_start;
|
||||
|
||||
#define LA(I) (get_char (I))
|
||||
|
||||
@@ -56,7 +52,7 @@ current_locus (void)
|
||||
}
|
||||
}
|
||||
|
||||
static char
|
||||
static ecma_char_t
|
||||
get_char (size_t i)
|
||||
{
|
||||
if ((buffer + i) >= (buffer_start + buffer_size))
|
||||
@@ -69,7 +65,7 @@ get_char (size_t i)
|
||||
static void
|
||||
dump_current_line (void)
|
||||
{
|
||||
const char *i;
|
||||
const lit_utf8_byte_t *i;
|
||||
|
||||
if (!allow_dump_lines)
|
||||
{
|
||||
@@ -78,6 +74,7 @@ dump_current_line (void)
|
||||
|
||||
printf ("// ");
|
||||
|
||||
FIXME ("Unicode: properly process non-ascii characters.");
|
||||
for (i = buffer; *i != '\n' && *i != 0; i++)
|
||||
{
|
||||
putchar (*i);
|
||||
@@ -122,18 +119,18 @@ create_token (token_type type, /**< type of token */
|
||||
*/
|
||||
static token
|
||||
convert_string_to_token (token_type tt, /**< token type */
|
||||
const ecma_char_t *str_p, /**< characters buffer */
|
||||
ecma_length_t length) /**< string's length */
|
||||
const lit_utf8_byte_t *str_p, /**< characters buffer */
|
||||
lit_utf8_size_t length) /**< string's length */
|
||||
{
|
||||
JERRY_ASSERT (str_p != NULL);
|
||||
|
||||
literal_t lit = lit_find_literal_by_charset (str_p, length);
|
||||
literal_t lit = lit_find_literal_by_utf8_string (str_p, length);
|
||||
if (lit != NULL)
|
||||
{
|
||||
return create_token_from_lit (tt, lit);
|
||||
}
|
||||
|
||||
lit = lit_create_literal_from_charset (str_p, length);
|
||||
lit = lit_create_literal_from_utf8_string (str_p, length);
|
||||
JERRY_ASSERT (lit->get_type () == LIT_STR_T
|
||||
|| lit->get_type () == LIT_MAGIC_STR_T
|
||||
|| lit->get_type () == LIT_MAGIC_STR_EX_T);
|
||||
@@ -150,8 +147,8 @@ convert_string_to_token (token_type tt, /**< token type */
|
||||
* else - return empty_token.
|
||||
*/
|
||||
static token
|
||||
decode_keyword (const ecma_char_t *str_p, /**< characters buffer */
|
||||
size_t length) /**< string's length */
|
||||
decode_keyword (const lit_utf8_byte_t *str_p, /**< characters buffer */
|
||||
lit_utf8_size_t str_size) /**< string's length */
|
||||
{
|
||||
typedef struct
|
||||
{
|
||||
@@ -211,8 +208,10 @@ decode_keyword (const ecma_char_t *str_p, /**< characters buffer */
|
||||
|
||||
for (uint32_t i = 0; i < sizeof (keywords) / sizeof (kw_descr_t); i++)
|
||||
{
|
||||
if (strlen (keywords[i].keyword_p) == length
|
||||
&& !strncmp (keywords[i].keyword_p, (const char *) str_p, length))
|
||||
if (lit_compare_utf8_strings (str_p,
|
||||
str_size,
|
||||
(lit_utf8_byte_t *) keywords[i].keyword_p,
|
||||
(lit_utf8_size_t) strlen (keywords[i].keyword_p)))
|
||||
{
|
||||
kw = keywords[i].keyword_id;
|
||||
break;
|
||||
@@ -233,7 +232,7 @@ decode_keyword (const ecma_char_t *str_p, /**< characters buffer */
|
||||
case KW_STATIC:
|
||||
case KW_YIELD:
|
||||
{
|
||||
return convert_string_to_token (TOK_NAME, str_p, (ecma_length_t) length);
|
||||
return convert_string_to_token (TOK_NAME, str_p, (ecma_length_t) str_size);
|
||||
}
|
||||
|
||||
default:
|
||||
@@ -249,22 +248,15 @@ decode_keyword (const ecma_char_t *str_p, /**< characters buffer */
|
||||
}
|
||||
else
|
||||
{
|
||||
const ecma_char_t *false_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_FALSE);
|
||||
const ecma_char_t *true_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_TRUE);
|
||||
const ecma_char_t *null_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_NULL);
|
||||
|
||||
if (strlen ((const char*) false_p) == length
|
||||
&& !strncmp ((const char*) str_p, (const char*) false_p, length))
|
||||
if (lit_compare_utf8_string_and_magic_string (str_p, str_size, LIT_MAGIC_STRING_FALSE))
|
||||
{
|
||||
return create_token (TOK_BOOL, false);
|
||||
}
|
||||
else if (strlen ((const char*) true_p) == length
|
||||
&& !strncmp ((const char*) str_p, (const char*) true_p, length))
|
||||
else if (lit_compare_utf8_string_and_magic_string (str_p, str_size, LIT_MAGIC_STRING_TRUE))
|
||||
{
|
||||
return create_token (TOK_BOOL, true);
|
||||
}
|
||||
else if (strlen ((const char*) null_p) == length
|
||||
&& !strncmp ((const char*) str_p, (const char*) null_p, length))
|
||||
else if (lit_compare_utf8_string_and_magic_string (str_p, str_size, LIT_MAGIC_STRING_NULL))
|
||||
{
|
||||
return create_token (TOK_NULL, 0);
|
||||
}
|
||||
@@ -432,8 +424,8 @@ convert_single_escape_character (ecma_char_t c, /**< character to decode */
|
||||
*/
|
||||
static token
|
||||
convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of token to produce */
|
||||
const char *source_str_p, /**< string to convert,
|
||||
* located in source buffer */
|
||||
const jerry_api_char_t *source_str_p, /**< string to convert,
|
||||
* located in source buffer */
|
||||
size_t source_str_size) /**< size of the string */
|
||||
{
|
||||
token ret;
|
||||
@@ -441,7 +433,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
if (source_str_size == 0)
|
||||
{
|
||||
return convert_string_to_token (tok_type,
|
||||
lit_get_magic_string_zt (LIT_MAGIC_STRING__EMPTY),
|
||||
lit_get_magic_string_utf8 (LIT_MAGIC_STRING__EMPTY),
|
||||
0);
|
||||
}
|
||||
else
|
||||
@@ -449,10 +441,10 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
JERRY_ASSERT (source_str_p != NULL);
|
||||
}
|
||||
|
||||
ecma_char_t *str_buf_p = (ecma_char_t*) jsp_mm_alloc (source_str_size * sizeof (ecma_char_t));
|
||||
lit_utf8_byte_t *str_buf_p = (lit_utf8_byte_t*) jsp_mm_alloc (source_str_size);
|
||||
|
||||
const char *source_str_iter_p = source_str_p;
|
||||
ecma_char_t *str_buf_iter_p = str_buf_p;
|
||||
const lit_utf8_byte_t *source_str_iter_p = source_str_p;
|
||||
lit_utf8_byte_t *str_buf_iter_p = str_buf_p;
|
||||
|
||||
bool is_correct_sequence = true;
|
||||
bool every_char_islower = true;
|
||||
@@ -464,7 +456,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
|
||||
if (*source_str_iter_p != '\\')
|
||||
{
|
||||
converted_char = (ecma_char_t) *source_str_iter_p++;
|
||||
converted_char = (lit_utf8_byte_t) *source_str_iter_p++;
|
||||
|
||||
JERRY_ASSERT (str_buf_iter_p <= str_buf_p + source_str_size);
|
||||
JERRY_ASSERT (source_str_iter_p <= source_str_p + source_str_size);
|
||||
@@ -473,7 +465,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
{
|
||||
source_str_iter_p++;
|
||||
|
||||
const ecma_char_t escape_character = (ecma_char_t) *source_str_iter_p++;
|
||||
const lit_utf8_byte_t escape_character = (lit_utf8_byte_t) *source_str_iter_p++;
|
||||
JERRY_ASSERT (source_str_iter_p <= source_str_p + source_str_size);
|
||||
|
||||
if (isdigit (escape_character))
|
||||
@@ -505,9 +497,9 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
|
||||
for (uint32_t i = 0; i < hex_chars_num; i++)
|
||||
{
|
||||
const char nc = *source_str_iter_p++;
|
||||
const lit_utf8_byte_t byte = (lit_utf8_byte_t) *source_str_iter_p++;
|
||||
|
||||
if (!isxdigit (nc))
|
||||
if (!isxdigit (byte))
|
||||
{
|
||||
chars_are_hex = false;
|
||||
break;
|
||||
@@ -520,7 +512,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
JERRY_ASSERT ((char_code & 0xF000u) == 0);
|
||||
|
||||
char_code = (uint16_t) (char_code << 4u);
|
||||
char_code = (uint16_t) (char_code + ecma_char_hex_to_int ((ecma_char_t) nc));
|
||||
char_code = (uint16_t) (char_code + ecma_char_hex_to_int (byte));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -544,10 +536,10 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
{
|
||||
if (source_str_iter_p + 1 <= source_str_p + source_str_size)
|
||||
{
|
||||
char nc = *source_str_iter_p;
|
||||
lit_utf8_byte_t byte = *source_str_iter_p;
|
||||
|
||||
if (escape_character == '\x0D'
|
||||
&& nc == '\x0A')
|
||||
&& byte == '\x0A')
|
||||
{
|
||||
source_str_iter_p++;
|
||||
}
|
||||
@@ -561,7 +553,8 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
}
|
||||
}
|
||||
|
||||
*str_buf_iter_p++ = converted_char;
|
||||
TODO ("Support surrogate paris.")
|
||||
str_buf_iter_p += lit_code_unit_to_utf8 (converted_char, str_buf_iter_p);
|
||||
JERRY_ASSERT (str_buf_iter_p <= str_buf_p + source_str_size);
|
||||
|
||||
if (!islower (converted_char))
|
||||
@@ -580,7 +573,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of
|
||||
|
||||
if (is_correct_sequence)
|
||||
{
|
||||
ecma_length_t length = (ecma_length_t) (str_buf_iter_p - str_buf_p);
|
||||
lit_utf8_size_t length = (lit_utf8_size_t) (str_buf_iter_p - str_buf_p);
|
||||
ret = empty_token;
|
||||
|
||||
if (tok_type == TOK_NAME)
|
||||
@@ -683,7 +676,7 @@ parse_name (void)
|
||||
static token
|
||||
parse_number (void)
|
||||
{
|
||||
char c = LA (0);
|
||||
ecma_char_t c = LA (0);
|
||||
bool is_hex = false;
|
||||
bool is_fp = false;
|
||||
bool is_exp = false;
|
||||
@@ -736,11 +729,11 @@ parse_number (void)
|
||||
{
|
||||
if (!is_overflow)
|
||||
{
|
||||
res = (res << 4) + ecma_char_hex_to_int ((ecma_char_t) token_start[i]);
|
||||
res = (res << 4) + ecma_char_hex_to_int (token_start[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fp_res = fp_res * 16 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]);
|
||||
fp_res = fp_res * 16 + (ecma_number_t) ecma_char_hex_to_int (token_start[i]);
|
||||
}
|
||||
|
||||
if (res > 255)
|
||||
@@ -830,15 +823,11 @@ parse_number (void)
|
||||
consume_char ();
|
||||
}
|
||||
|
||||
tok_length = (size_t) (buffer - token_start);;
|
||||
tok_length = (size_t) (buffer - token_start);
|
||||
if (is_fp || is_exp)
|
||||
{
|
||||
ecma_char_t *temp = (ecma_char_t*) jsp_mm_alloc ((size_t) (tok_length + 1) * sizeof (ecma_char_t));
|
||||
strncpy ((char *) temp, token_start, (size_t) (tok_length));
|
||||
temp[tok_length] = '\0';
|
||||
ecma_number_t res = ecma_zt_string_to_number (temp);
|
||||
ecma_number_t res = ecma_utf8_string_to_number (token_start, (jerry_api_size_t) tok_length);
|
||||
JERRY_ASSERT (!ecma_number_is_nan (res));
|
||||
jsp_mm_free (temp);
|
||||
known_token = convert_seen_num_to_token (res);
|
||||
token_start = NULL;
|
||||
return known_token;
|
||||
@@ -854,11 +843,11 @@ parse_number (void)
|
||||
{
|
||||
if (!is_overflow)
|
||||
{
|
||||
res = res * 8 + ecma_char_hex_to_int ((ecma_char_t) token_start[i]);
|
||||
res = res * 8 + ecma_char_hex_to_int (token_start[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fp_res = fp_res * 8 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]);
|
||||
fp_res = fp_res * 8 + (ecma_number_t) ecma_char_hex_to_int (token_start[i]);
|
||||
}
|
||||
if (res > 255)
|
||||
{
|
||||
@@ -874,11 +863,11 @@ parse_number (void)
|
||||
{
|
||||
if (!is_overflow)
|
||||
{
|
||||
res = res * 10 + ecma_char_hex_to_int ((ecma_char_t) token_start[i]);
|
||||
res = res * 10 + ecma_char_hex_to_int (token_start[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fp_res = fp_res * 10 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]);
|
||||
fp_res = fp_res * 10 + (ecma_number_t) ecma_char_hex_to_int (token_start[i]);
|
||||
}
|
||||
if (res > 255)
|
||||
{
|
||||
@@ -1029,7 +1018,7 @@ parse_regexp (void)
|
||||
}
|
||||
|
||||
result = convert_string_to_token (TOK_REGEXP,
|
||||
(const ecma_char_t*) token_start,
|
||||
(const lit_utf8_byte_t *) token_start,
|
||||
static_cast<ecma_length_t> (buffer - token_start));
|
||||
|
||||
token_start = NULL;
|
||||
@@ -1039,7 +1028,7 @@ parse_regexp (void)
|
||||
static void
|
||||
grobble_whitespaces (void)
|
||||
{
|
||||
char c = LA (0);
|
||||
ecma_char_t c = LA (0);
|
||||
|
||||
while ((isspace (c) && c != '\n'))
|
||||
{
|
||||
@@ -1049,7 +1038,7 @@ grobble_whitespaces (void)
|
||||
}
|
||||
|
||||
static void
|
||||
lexer_set_source (const char * source)
|
||||
lexer_set_source (const jerry_api_char_t * source)
|
||||
{
|
||||
buffer_start = source;
|
||||
buffer = buffer_start;
|
||||
@@ -1058,7 +1047,7 @@ lexer_set_source (const char * source)
|
||||
static bool
|
||||
replace_comment_by_newline (void)
|
||||
{
|
||||
char c = LA (0);
|
||||
ecma_char_t c = LA (0);
|
||||
bool multiline;
|
||||
bool was_newlines = false;
|
||||
|
||||
@@ -1106,7 +1095,7 @@ replace_comment_by_newline (void)
|
||||
static token
|
||||
lexer_next_token_private (void)
|
||||
{
|
||||
char c = LA (0);
|
||||
ecma_char_t c = LA (0);
|
||||
|
||||
JERRY_ASSERT (token_start == NULL);
|
||||
|
||||
@@ -1295,6 +1284,7 @@ lexer_next_token (void)
|
||||
|
||||
prev_token = sent_token;
|
||||
sent_token = lexer_next_token_private ();
|
||||
|
||||
if (sent_token.type == TOK_NEWLINE)
|
||||
{
|
||||
dump_current_line ();
|
||||
@@ -1331,7 +1321,7 @@ void
|
||||
lexer_locus_to_line_and_column (size_t locus, size_t *line, size_t *column)
|
||||
{
|
||||
JERRY_ASSERT (locus <= buffer_size);
|
||||
const char *buf;
|
||||
const jerry_api_char_t *buf;
|
||||
size_t l = 0, c = 0;
|
||||
for (buf = buffer_start; (size_t) (buf - buffer_start) < locus; buf++)
|
||||
{
|
||||
@@ -1358,7 +1348,7 @@ void
|
||||
lexer_dump_line (size_t line)
|
||||
{
|
||||
size_t l = 0;
|
||||
for (const char *buf = buffer_start; *buf != '\0'; buf++)
|
||||
for (const lit_utf8_byte_t *buf = buffer_start; *buf != '\0'; buf++)
|
||||
{
|
||||
if (l == line)
|
||||
{
|
||||
@@ -1538,11 +1528,11 @@ lexer_are_tokens_with_same_identifier (token id1, /**< identifier token (TOK_NAM
|
||||
} /* lexer_are_tokens_with_same_identifier */
|
||||
|
||||
/**
|
||||
* Intitialize lexer
|
||||
* Initialize lexer to start parsing of a new source
|
||||
*/
|
||||
void
|
||||
lexer_init (const char *source, /**< script source */
|
||||
size_t source_size /**< script source size in bytes */,
|
||||
lexer_init (const jerry_api_char_t *source, /**< script source */
|
||||
size_t source_size, /**< script source size in bytes */
|
||||
bool show_opcodes) /**< flag indicating if to dump opcodes */
|
||||
{
|
||||
empty_token.type = TOK_EMPTY;
|
||||
|
||||
@@ -170,10 +170,7 @@ typedef struct
|
||||
*/
|
||||
#define TOKEN_EMPTY_INITIALIZER {0, TOK_EMPTY, 0}
|
||||
|
||||
void lexer_init (const char *, size_t, bool);
|
||||
void lexer_init_source (const char *, size_t);
|
||||
|
||||
void lexer_free (void);
|
||||
void lexer_init (const jerry_api_char_t *, size_t, bool);
|
||||
|
||||
token lexer_next_token (void);
|
||||
void lexer_save_token (token);
|
||||
|
||||
@@ -230,27 +230,27 @@ name_to_native_call_id (operand obj)
|
||||
{
|
||||
return OPCODE_NATIVE_CALL__COUNT;
|
||||
}
|
||||
if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "LEDToggle"))
|
||||
if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "LEDToggle"))
|
||||
{
|
||||
return OPCODE_NATIVE_CALL_LED_TOGGLE;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "LEDOn"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "LEDOn"))
|
||||
{
|
||||
return OPCODE_NATIVE_CALL_LED_ON;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "LEDOff"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "LEDOff"))
|
||||
{
|
||||
return OPCODE_NATIVE_CALL_LED_OFF;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "LEDOnce"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "LEDOnce"))
|
||||
{
|
||||
return OPCODE_NATIVE_CALL_LED_ONCE;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "wait"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "wait"))
|
||||
{
|
||||
return OPCODE_NATIVE_CALL_WAIT;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "print"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "print"))
|
||||
{
|
||||
return OPCODE_NATIVE_CALL_PRINT;
|
||||
}
|
||||
@@ -803,10 +803,8 @@ dumper_is_eval_literal (operand obj) /**< byte-code operand */
|
||||
/*
|
||||
* FIXME: Switch to corresponding magic string
|
||||
*/
|
||||
const ecma_char_t *eval_string_p = (const ecma_char_t *) "eval";
|
||||
bool is_eval_lit = (obj.type == OPERAND_LITERAL
|
||||
&& lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id),
|
||||
eval_string_p));
|
||||
&& lit_literal_equal_type_cstr (lit_get_literal_by_cp (obj.data.lit_id), "eval"));
|
||||
|
||||
return is_eval_lit;
|
||||
} /* dumper_is_eval_literal */
|
||||
|
||||
@@ -309,7 +309,8 @@ parse_property_name (void)
|
||||
case TOK_KEYWORD:
|
||||
{
|
||||
const char *s = lexer_keyword_to_string ((keyword) token_data ());
|
||||
literal_t lit = lit_find_or_create_literal_from_charset ((const ecma_char_t *) s, (ecma_length_t) strlen (s));
|
||||
literal_t lit = lit_find_or_create_literal_from_utf8_string ((const lit_utf8_byte_t *) s,
|
||||
(lit_utf8_size_t)strlen (s));
|
||||
return literal_operand (lit_cpointer_t::compress (lit));
|
||||
}
|
||||
default:
|
||||
@@ -345,11 +346,11 @@ parse_property_assignment (void)
|
||||
{
|
||||
bool is_setter;
|
||||
|
||||
if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()), (const ecma_char_t *) "get"))
|
||||
if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "get"))
|
||||
{
|
||||
is_setter = false;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()), (const ecma_char_t *) "set"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "set"))
|
||||
{
|
||||
is_setter = true;
|
||||
}
|
||||
@@ -874,7 +875,8 @@ parse_member_expression (operand *this_arg, operand *prop_gl)
|
||||
else if (token_is (TOK_KEYWORD))
|
||||
{
|
||||
const char *s = lexer_keyword_to_string ((keyword) token_data ());
|
||||
literal_t lit = lit_find_or_create_literal_from_charset ((const ecma_char_t *) s, (ecma_length_t) strlen (s));
|
||||
literal_t lit = lit_find_or_create_literal_from_utf8_string ((lit_utf8_byte_t *) s,
|
||||
(lit_utf8_size_t) strlen (s));
|
||||
if (lit == NULL)
|
||||
{
|
||||
EMIT_ERROR ("Expected identifier");
|
||||
@@ -2848,8 +2850,8 @@ preparse_scope (bool is_global)
|
||||
bool is_ref_eval_identifier = false;
|
||||
bool is_use_strict = false;
|
||||
|
||||
if (token_is (TOK_STRING) && lit_literal_equal_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()),
|
||||
(const ecma_char_t *) "use strict"))
|
||||
if (token_is (TOK_STRING) && lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()),
|
||||
"use strict"))
|
||||
{
|
||||
scopes_tree_set_strict_mode (STACK_TOP (scopes), true);
|
||||
is_use_strict = true;
|
||||
@@ -2866,13 +2868,11 @@ preparse_scope (bool is_global)
|
||||
{
|
||||
if (token_is (TOK_NAME))
|
||||
{
|
||||
if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()),
|
||||
(const ecma_char_t *) "arguments"))
|
||||
if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "arguments"))
|
||||
{
|
||||
is_ref_arguments_identifier = true;
|
||||
}
|
||||
else if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()),
|
||||
(const ecma_char_t *) "eval"))
|
||||
else if (lit_literal_equal_type_cstr (lit_get_literal_by_cp (token_data_as_lit_cp ()), "eval"))
|
||||
{
|
||||
is_ref_eval_identifier = true;
|
||||
}
|
||||
@@ -3032,7 +3032,7 @@ parse_source_element_list (bool is_global) /**< flag indicating if we are parsin
|
||||
* false - otherwise.
|
||||
*/
|
||||
static bool
|
||||
parser_parse_program (const char *source_p, /**< source code buffer */
|
||||
parser_parse_program (const jerry_api_char_t *source_p, /**< source code buffer */
|
||||
size_t source_size, /**< source code size in bytes */
|
||||
bool in_function, /**< flag indicating if we are parsing body of a function */
|
||||
bool in_eval, /**< flag indicating if we are parsing body of eval code */
|
||||
@@ -3137,7 +3137,7 @@ parser_parse_program (const char *source_p, /**< source code buffer */
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
parser_parse_script (const char *source, /**< source script */
|
||||
parser_parse_script (const jerry_api_char_t *source, /**< source script */
|
||||
size_t source_size, /**< source script size it bytes */
|
||||
const opcode_t **opcodes_p) /**< out: generated byte-code array
|
||||
* (in case there were no syntax errors) */
|
||||
@@ -3152,7 +3152,7 @@ parser_parse_script (const char *source, /**< source script */
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
parser_parse_eval (const char *source, /**< string passed to eval() */
|
||||
parser_parse_eval (const jerry_api_char_t *source, /**< string passed to eval() */
|
||||
size_t source_size, /**< string size in bytes */
|
||||
bool is_strict, /**< flag, indicating whether eval is called
|
||||
* from strict code in direct mode */
|
||||
@@ -3173,7 +3173,9 @@ parser_parse_eval (const char *source, /**< string passed to eval() */
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
parser_parse_new_function (const char **params, /**< array of arguments of new Function (p1, p2, ..., pn, body) call */
|
||||
parser_parse_new_function (const jerry_api_char_t **params, /**< array of arguments of new Function (p1, p2, ..., pn,
|
||||
* body) call */
|
||||
const size_t *params_size, /**< sizes of arguments strings */
|
||||
size_t params_count, /**< total number of arguments passed to new Function (...) */
|
||||
const opcode_t **out_opcodes_p) /**< out: generated byte-code array
|
||||
* (in case there were no syntax errors) */
|
||||
@@ -3183,10 +3185,10 @@ parser_parse_new_function (const char **params, /**< array of arguments of new F
|
||||
for (size_t i = 0; i < params_count - 1; ++i)
|
||||
{
|
||||
FIXME ("check parameter's name for syntax errors");
|
||||
lit_find_or_create_literal_from_charset ((ecma_char_t *) params[i], (ecma_length_t) strlen (params[i]));
|
||||
lit_find_or_create_literal_from_utf8_string ((lit_utf8_byte_t *) params[i], (lit_utf8_size_t) params_size[i]);
|
||||
}
|
||||
return parser_parse_program (params[params_count - 1],
|
||||
strlen (params[params_count - 1]),
|
||||
params_size[params_count - 1],
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
#include "jrt.h"
|
||||
|
||||
void parser_set_show_opcodes (bool);
|
||||
bool parser_parse_script (const char *, size_t, const opcode_t **);
|
||||
bool parser_parse_eval (const char *, size_t, bool, const opcode_t **);
|
||||
bool parser_parse_new_function (const char **, size_t, const opcode_t **);
|
||||
bool parser_parse_script (const jerry_api_char_t *, size_t, const opcode_t **);
|
||||
bool parser_parse_eval (const jerry_api_char_t *, size_t, bool, const opcode_t **);
|
||||
bool parser_parse_new_function (const jerry_api_char_t **, const size_t *, size_t, const opcode_t **);
|
||||
|
||||
#endif /* PARSER_H */
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "parser.h"
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "ecma-helpers.h"
|
||||
#include "lit-magic-strings.h"
|
||||
|
||||
/**
|
||||
* SyntaxError longjmp label, used to finish parse upon a SyntaxError is raised
|
||||
@@ -177,10 +178,12 @@ emit_error_on_eval_and_arguments (operand op, locus loc __attr_unused___)
|
||||
{
|
||||
if (op.type == OPERAND_LITERAL)
|
||||
{
|
||||
if (lit_literal_equal_type_zt (lit_get_literal_by_cp (op.data.lit_id),
|
||||
lit_get_magic_string_zt (LIT_MAGIC_STRING_ARGUMENTS))
|
||||
|| lit_literal_equal_type_zt (lit_get_literal_by_cp (op.data.lit_id),
|
||||
lit_get_magic_string_zt (LIT_MAGIC_STRING_EVAL)))
|
||||
if (lit_literal_equal_type_utf8 (lit_get_literal_by_cp (op.data.lit_id),
|
||||
lit_get_magic_string_utf8 (LIT_MAGIC_STRING_ARGUMENTS),
|
||||
lit_get_magic_string_size (LIT_MAGIC_STRING_ARGUMENTS))
|
||||
|| lit_literal_equal_type_utf8 (lit_get_literal_by_cp (op.data.lit_id),
|
||||
lit_get_magic_string_utf8 (LIT_MAGIC_STRING_EVAL),
|
||||
lit_get_magic_string_size (LIT_MAGIC_STRING_EVAL)))
|
||||
{
|
||||
PARSE_ERROR ("'eval' and 'arguments' are not allowed here in strict mode", loc);
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "mem-heap.h"
|
||||
#include "re-compiler.h"
|
||||
#include "re-parser.h"
|
||||
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
|
||||
@@ -382,7 +383,7 @@ parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
|
||||
if (re_ctx_p->recursion_depth >= RE_COMPILE_RECURSION_LIMIT)
|
||||
{
|
||||
ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp compiler recursion limit is exceeded.");
|
||||
ret_value = ecma_raise_range_error ("RegExp compiler recursion limit is exceeded.");
|
||||
return ret_value;
|
||||
}
|
||||
re_ctx_p->recursion_depth++;
|
||||
@@ -575,7 +576,7 @@ parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
|
||||
if (expect_eof)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of paren.");
|
||||
ret_value = ecma_raise_syntax_error ("Unexpected end of paren.");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -589,7 +590,7 @@ parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
{
|
||||
if (!expect_eof)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of pattern.");
|
||||
ret_value = ecma_raise_syntax_error ("Unexpected end of pattern.");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -601,7 +602,7 @@ parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
}
|
||||
default:
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected RegExp token.");
|
||||
ret_value = ecma_raise_syntax_error ("Unexpected RegExp token.");
|
||||
return ret_value;
|
||||
}
|
||||
}
|
||||
@@ -619,8 +620,8 @@ parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
*/
|
||||
ecma_completion_value_t
|
||||
re_compile_bytecode (ecma_property_t *bytecode_p, /**< bytecode */
|
||||
ecma_string_t *pattern_str_p, /**< pattern */
|
||||
uint8_t flags) /**< flags */
|
||||
ecma_string_t *pattern_str_p, /**< pattern */
|
||||
uint8_t flags) /**< flags */
|
||||
{
|
||||
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
|
||||
re_compiler_ctx_t re_ctx;
|
||||
@@ -636,10 +637,12 @@ re_compile_bytecode (ecma_property_t *bytecode_p, /**< bytecode */
|
||||
|
||||
re_ctx.bytecode_ctx_p = &bc_ctx;
|
||||
|
||||
ecma_length_t pattern_str_len = ecma_string_get_length (pattern_str_p);
|
||||
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_len + 1, ecma_char_t);
|
||||
ssize_t zt_str_size = (ssize_t) (sizeof (ecma_char_t) * (pattern_str_len + 1));
|
||||
ecma_string_to_zt_string (pattern_str_p, pattern_start_p, zt_str_size);
|
||||
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
|
||||
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size + 1, lit_utf8_byte_t);
|
||||
|
||||
ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, (ssize_t) pattern_str_size);
|
||||
FIXME ("Update regexp compiler so that zero symbol is not needed.");
|
||||
pattern_start_p[pattern_str_size] = LIT_BYTE_NULL;
|
||||
|
||||
re_parser_ctx_t parser_ctx;
|
||||
parser_ctx.pattern_start_p = pattern_start_p;
|
||||
@@ -656,7 +659,7 @@ re_compile_bytecode (ecma_property_t *bytecode_p, /**< bytecode */
|
||||
/* 2. Check for invalid backreference */
|
||||
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid backreference.\n");
|
||||
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -25,13 +25,13 @@
|
||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
|
||||
|
||||
/* FIXME: change it, when unicode support would be implemented */
|
||||
#define RE_LOOKUP(str_p, lookup) (ecma_zt_string_length (str_p) > lookup ? str_p[lookup] : '\0')
|
||||
#define RE_LOOKUP(str_p, lookup) (lit_zt_utf8_string_size (str_p) > (lookup) ? str_p[lookup] : '\0')
|
||||
|
||||
/* FIXME: change it, when unicode support would be implemented */
|
||||
#define RE_ADVANCE(str_p, advance) do { str_p += advance; } while (0)
|
||||
|
||||
static ecma_char_t
|
||||
get_ecma_char (ecma_char_t** char_p)
|
||||
get_ecma_char (lit_utf8_byte_t **char_p)
|
||||
{
|
||||
/* FIXME: change to string iterator with unicode support, when it would be implemented */
|
||||
ecma_char_t ch = **char_p;
|
||||
@@ -46,7 +46,7 @@ get_ecma_char (ecma_char_t** char_p)
|
||||
* Returned value must be freed with ecma_free_completion_value
|
||||
*/
|
||||
static ecma_completion_value_t
|
||||
parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
parse_re_iterator (lit_utf8_byte_t *pattern_p, /**< RegExp pattern */
|
||||
re_token_t *re_token_p, /**< output token */
|
||||
uint32_t lookup, /**< size of lookup */
|
||||
uint32_t *advance_p) /**< output length of current advance */
|
||||
@@ -120,7 +120,7 @@ parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
{
|
||||
if (digits >= ECMA_NUMBER_MAX_DIGITS)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: too many digits.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp quantifier error: too many digits.");
|
||||
return ret_value;
|
||||
}
|
||||
digits++;
|
||||
@@ -130,14 +130,14 @@ parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
{
|
||||
if (qmax != RE_ITERATOR_INFINITE)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: double comma.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp quantifier error: double comma.");
|
||||
return ret_value;
|
||||
}
|
||||
if ((RE_LOOKUP (pattern_p, lookup + *advance_p + 1)) == '}')
|
||||
{
|
||||
if (digits == 0)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp quantifier error: missing digits.");
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
@@ -154,7 +154,7 @@ parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
{
|
||||
if (digits == 0)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp quantifier error: missing digits.");
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
@@ -174,7 +174,7 @@ parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: unknown char.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp quantifier error: unknown char.");
|
||||
return ret_value;
|
||||
}
|
||||
}
|
||||
@@ -206,7 +206,7 @@ parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
|
||||
if (re_token_p->qmin > re_token_p->qmax)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: qmin > qmax.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp quantifier error: qmin > qmax.");
|
||||
}
|
||||
|
||||
return ret_value;
|
||||
@@ -218,13 +218,13 @@ parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
|
||||
static void
|
||||
re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */
|
||||
{
|
||||
ecma_char_t *pattern_p = parser_ctx_p->pattern_start_p;
|
||||
lit_utf8_byte_t *pattern_p = parser_ctx_p->pattern_start_p;
|
||||
ecma_char_t ch1;
|
||||
int char_class_in = 0;
|
||||
parser_ctx_p->num_of_groups = 0;
|
||||
|
||||
ch1 = get_ecma_char (&pattern_p);
|
||||
while (ch1 != '\0')
|
||||
while (ch1 != ECMA_CHAR_NULL)
|
||||
{
|
||||
ecma_char_t ch0 = ch1;
|
||||
ch1 = get_ecma_char (&pattern_p);
|
||||
@@ -275,7 +275,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
|
||||
re_token_t *out_token_p) /**< output token */
|
||||
{
|
||||
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
|
||||
ecma_char_t **pattern_p = &(parser_ctx_p->current_char_p);
|
||||
lit_utf8_byte_t **pattern_p = &(parser_ctx_p->current_char_p);
|
||||
|
||||
out_token_p->qmax = out_token_p->qmin = 1;
|
||||
ecma_char_t start = RE_CHAR_UNDEF;
|
||||
@@ -338,7 +338,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape");
|
||||
ret_value = ecma_raise_syntax_error ("invalid regexp control escape");
|
||||
return ret_value;
|
||||
}
|
||||
}
|
||||
@@ -433,7 +433,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
|
||||
{
|
||||
if (is_range)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range");
|
||||
ret_value = ecma_raise_syntax_error ("invalid character class range");
|
||||
return ret_value;
|
||||
}
|
||||
else
|
||||
@@ -451,7 +451,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
|
||||
{
|
||||
if (start > ch)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range");
|
||||
ret_value = ecma_raise_syntax_error ("invalid character class range");
|
||||
return ret_value;
|
||||
}
|
||||
else
|
||||
@@ -500,8 +500,8 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
{
|
||||
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
|
||||
uint32_t advance = 0;
|
||||
ecma_char_t ch0 = *(parser_ctx_p->current_char_p);
|
||||
|
||||
ecma_char_t ch0 = *(parser_ctx_p->current_char_p);
|
||||
switch (ch0)
|
||||
{
|
||||
case '|':
|
||||
@@ -580,7 +580,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape");
|
||||
ret_value = ecma_raise_syntax_error ("invalid regexp control escape");
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -640,7 +640,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
{
|
||||
if (isdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2)))
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp escape pattern error.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp escape pattern error.");
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -664,13 +664,13 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
{
|
||||
if (index >= RE_MAX_RE_DECESC_DIGITS)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ((const ecma_char_t *)
|
||||
"RegExp escape pattern error: decimal escape too long.");
|
||||
ret_value = ecma_raise_syntax_error ("RegExp escape pattern error: decimal escape too long.");
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
advance++;
|
||||
ecma_char_t digit = RE_LOOKUP (parser_ctx_p->current_char_p, advance);
|
||||
ecma_char_t digit = RE_LOOKUP (parser_ctx_p->current_char_p,
|
||||
advance);
|
||||
if (!isdigit (digit))
|
||||
{
|
||||
break;
|
||||
|
||||
@@ -71,8 +71,8 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ecma_char_t *pattern_start_p;
|
||||
ecma_char_t *current_char_p;
|
||||
lit_utf8_byte_t *pattern_start_p;
|
||||
lit_utf8_byte_t *current_char_p;
|
||||
int num_of_groups;
|
||||
uint32_t num_of_classes;
|
||||
} re_parser_ctx_t;
|
||||
|
||||
Reference in New Issue
Block a user