Enable regular expressions.
- add regular expressions support to JS parser and interpreter; - add tests for regular expressions. JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
@@ -15,9 +15,14 @@
|
||||
*/
|
||||
|
||||
#include "ecma-helpers.h"
|
||||
#include "ecma-exceptions.h"
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "jsp-mm.h"
|
||||
#include "lexer.h"
|
||||
#include "mem-allocator.h"
|
||||
#include "opcodes.h"
|
||||
#include "parser.h"
|
||||
#include "stack.h"
|
||||
#include "syntax-errors.h"
|
||||
|
||||
static token saved_token, prev_token, sent_token, empty_token;
|
||||
@@ -961,6 +966,76 @@ parse_string (void)
|
||||
return ret;
|
||||
} /* parse_string */
|
||||
|
||||
/**
|
||||
* Parse string literal (ECMA-262 v5, 7.8.5)
|
||||
*/
|
||||
static token
|
||||
parse_regexp (void)
|
||||
{
|
||||
token result;
|
||||
bool is_char_class = false;
|
||||
|
||||
/* Eat up '/' */
|
||||
JERRY_ASSERT ((ecma_char_t) LA (0) == '/');
|
||||
consume_char ();
|
||||
new_token ();
|
||||
|
||||
while (true)
|
||||
{
|
||||
ecma_char_t c = (ecma_char_t) LA (0);
|
||||
|
||||
if (c == '\0')
|
||||
{
|
||||
PARSE_ERROR ("Unclosed string", token_start - buffer_start);
|
||||
}
|
||||
else if (c == '\n')
|
||||
{
|
||||
PARSE_ERROR ("RegExp literal shall not contain newline character", token_start - buffer_start);
|
||||
}
|
||||
else if (c == '\\')
|
||||
{
|
||||
consume_char ();
|
||||
}
|
||||
else if (c == '[')
|
||||
{
|
||||
is_char_class = true;
|
||||
}
|
||||
else if (c == ']')
|
||||
{
|
||||
is_char_class = false;
|
||||
}
|
||||
else if (c == '/' && !is_char_class)
|
||||
{
|
||||
/* Eat up '/' */
|
||||
consume_char ();
|
||||
break;
|
||||
}
|
||||
|
||||
consume_char ();
|
||||
}
|
||||
|
||||
/* Try to parse RegExp flags */
|
||||
while (true)
|
||||
{
|
||||
ecma_char_t c = (ecma_char_t) LA (0);
|
||||
|
||||
if (c == '\0'
|
||||
|| !ecma_char_is_word_char (c)
|
||||
|| ecma_char_is_line_terminator (c))
|
||||
{
|
||||
break;
|
||||
}
|
||||
consume_char ();
|
||||
}
|
||||
|
||||
result = convert_string_to_token (TOK_REGEXP,
|
||||
(const ecma_char_t*) token_start,
|
||||
static_cast<ecma_length_t> (buffer - token_start));
|
||||
|
||||
token_start = NULL;
|
||||
return result;
|
||||
} /* parse_regexp */
|
||||
|
||||
static void
|
||||
grobble_whitespaces (void)
|
||||
{
|
||||
@@ -1084,10 +1159,27 @@ lexer_next_token_private (void)
|
||||
}
|
||||
}
|
||||
|
||||
if (c == '/' && LA (1) == '/')
|
||||
|
||||
if (c == '/')
|
||||
{
|
||||
replace_comment_by_newline ();
|
||||
return lexer_next_token_private ();
|
||||
if (LA (1) == '/')
|
||||
{
|
||||
replace_comment_by_newline ();
|
||||
return lexer_next_token_private ();
|
||||
}
|
||||
else if (!(sent_token.type == TOK_NAME
|
||||
|| sent_token.type == TOK_NULL
|
||||
|| sent_token.type == TOK_BOOL
|
||||
|| sent_token.type == TOK_CLOSE_BRACE
|
||||
|| sent_token.type == TOK_CLOSE_SQUARE
|
||||
|| sent_token.type == TOK_CLOSE_PAREN
|
||||
|| sent_token.type == TOK_SMALL_INT
|
||||
|| sent_token.type == TOK_NUMBER
|
||||
|| sent_token.type == TOK_STRING
|
||||
|| sent_token.type == TOK_REGEXP))
|
||||
{
|
||||
return parse_regexp ();
|
||||
}
|
||||
}
|
||||
|
||||
switch (c)
|
||||
@@ -1203,7 +1295,6 @@ lexer_next_token (void)
|
||||
|
||||
prev_token = sent_token;
|
||||
sent_token = lexer_next_token_private ();
|
||||
|
||||
if (sent_token.type == TOK_NEWLINE)
|
||||
{
|
||||
dump_current_line ();
|
||||
|
||||
@@ -99,7 +99,7 @@ typedef enum __attr_packed___
|
||||
TOK_OPEN_PAREN, // (
|
||||
TOK_CLOSE_PAREN, //)
|
||||
TOK_OPEN_SQUARE, // [
|
||||
TOK_CLOSE_SQUARE, // [
|
||||
TOK_CLOSE_SQUARE, // ]
|
||||
|
||||
TOK_DOT, // .
|
||||
TOK_SEMICOLON, // ;
|
||||
@@ -152,6 +152,7 @@ typedef enum __attr_packed___
|
||||
TOK_DIV, // /
|
||||
TOK_DIV_EQ, // /=
|
||||
TOK_EMPTY,
|
||||
TOK_REGEXP, // RegularExpressionLiteral (/.../gim)
|
||||
} token_type;
|
||||
|
||||
typedef size_t locus;
|
||||
@@ -170,6 +171,9 @@ typedef struct
|
||||
#define TOKEN_EMPTY_INITIALIZER {0, TOK_EMPTY, 0}
|
||||
|
||||
void lexer_init (const char *, size_t, bool);
|
||||
void lexer_init_source (const char *, size_t);
|
||||
|
||||
void lexer_free (void);
|
||||
|
||||
token lexer_next_token (void);
|
||||
void lexer_save_token (token);
|
||||
|
||||
@@ -843,6 +843,34 @@ dump_number_assignment_res (lit_cpointer_t lit_id)
|
||||
return op;
|
||||
}
|
||||
|
||||
void
|
||||
dump_regexp_assignment (operand op, lit_cpointer_t lit_id)
|
||||
{
|
||||
switch (op.type)
|
||||
{
|
||||
case OPERAND_LITERAL:
|
||||
{
|
||||
const opcode_t opcode = getop_assignment (LITERAL_TO_REWRITE, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE);
|
||||
serializer_dump_op_meta (create_op_meta_101 (opcode, op.data.lit_id, lit_id));
|
||||
break;
|
||||
}
|
||||
case OPERAND_TMP:
|
||||
{
|
||||
const opcode_t opcode = getop_assignment (op.data.uid, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE);
|
||||
serializer_dump_op_meta (create_op_meta_001 (opcode, lit_id));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
operand
|
||||
dump_regexp_assignment_res (lit_cpointer_t lit_id)
|
||||
{
|
||||
operand op = tmp_operand ();
|
||||
dump_regexp_assignment (op, lit_id);
|
||||
return op;
|
||||
}
|
||||
|
||||
void
|
||||
dump_smallint_assignment (operand op, idx_t uid)
|
||||
{
|
||||
|
||||
@@ -69,6 +69,8 @@ void dump_string_assignment (operand, lit_cpointer_t);
|
||||
operand dump_string_assignment_res (lit_cpointer_t);
|
||||
void dump_number_assignment (operand, lit_cpointer_t);
|
||||
operand dump_number_assignment_res (lit_cpointer_t);
|
||||
void dump_regexp_assignment (operand, lit_cpointer_t);
|
||||
operand dump_regexp_assignment_res (lit_cpointer_t);
|
||||
void dump_smallint_assignment (operand, idx_t);
|
||||
operand dump_smallint_assignment_res (idx_t);
|
||||
void dump_undefined_assignment (operand);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/* Copyright 2014-2015 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -22,6 +23,7 @@
|
||||
#include "opcodes-dumper.h"
|
||||
#include "opcodes-native-call.h"
|
||||
#include "parser.h"
|
||||
#include "re-parser.h"
|
||||
#include "scopes-tree.h"
|
||||
#include "serializer.h"
|
||||
#include "stack.h"
|
||||
@@ -745,6 +747,7 @@ parse_object_literal (void)
|
||||
| 'false'
|
||||
| number_literal
|
||||
| string_literal
|
||||
| regexp_literal
|
||||
; */
|
||||
static operand
|
||||
parse_literal (void)
|
||||
@@ -753,6 +756,7 @@ parse_literal (void)
|
||||
{
|
||||
case TOK_NUMBER: return dump_number_assignment_res (token_data_as_lit_cp ());
|
||||
case TOK_STRING: return dump_string_assignment_res (token_data_as_lit_cp ());
|
||||
case TOK_REGEXP: return dump_regexp_assignment_res (token_data_as_lit_cp ());
|
||||
case TOK_NULL: return dump_null_assignment_res ();
|
||||
case TOK_BOOL: return dump_boolean_assignment_res ((bool) token_data ());
|
||||
case TOK_SMALL_INT: return dump_smallint_assignment_res ((idx_t) token_data ());
|
||||
@@ -786,6 +790,7 @@ parse_primary_expression (void)
|
||||
case TOK_BOOL:
|
||||
case TOK_SMALL_INT:
|
||||
case TOK_NUMBER:
|
||||
case TOK_REGEXP:
|
||||
case TOK_STRING: return parse_literal ();
|
||||
case TOK_NAME: return literal_operand (token_data_as_lit_cp ());
|
||||
case TOK_OPEN_SQUARE: return parse_array_literal ();
|
||||
|
||||
@@ -291,6 +291,7 @@ generate_opcode (scopes_tree tree, opcode_counter_t opc_index, lit_id_hash_table
|
||||
}
|
||||
case OPCODE_ARG_TYPE_NUMBER:
|
||||
case OPCODE_ARG_TYPE_NUMBER_NEGATE:
|
||||
case OPCODE_ARG_TYPE_REGEXP:
|
||||
case OPCODE_ARG_TYPE_STRING:
|
||||
case OPCODE_ARG_TYPE_VARIABLE:
|
||||
{
|
||||
@@ -430,6 +431,7 @@ count_new_literals_in_opcode (scopes_tree tree, opcode_counter_t opc_index)
|
||||
}
|
||||
case OPCODE_ARG_TYPE_NUMBER:
|
||||
case OPCODE_ARG_TYPE_NUMBER_NEGATE:
|
||||
case OPCODE_ARG_TYPE_REGEXP:
|
||||
case OPCODE_ARG_TYPE_STRING:
|
||||
case OPCODE_ARG_TYPE_VARIABLE:
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user