Enable regular expressions.

- add regular expressions support to JS parser and interpreter;
- add tests for regular expressions.

JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
László Langó
2015-06-26 00:03:20 +03:00
parent f992f5d92e
commit e027b4d65d
19 changed files with 1087 additions and 6 deletions
+95 -4
View File
@@ -15,9 +15,14 @@
*/
#include "ecma-helpers.h"
#include "ecma-exceptions.h"
#include "jrt-libc-includes.h"
#include "jsp-mm.h"
#include "lexer.h"
#include "mem-allocator.h"
#include "opcodes.h"
#include "parser.h"
#include "stack.h"
#include "syntax-errors.h"
static token saved_token, prev_token, sent_token, empty_token;
@@ -961,6 +966,76 @@ parse_string (void)
return ret;
} /* parse_string */
/**
* Parse string literal (ECMA-262 v5, 7.8.5)
*/
static token
parse_regexp (void)
{
token result;
bool is_char_class = false;
/* Eat up '/' */
JERRY_ASSERT ((ecma_char_t) LA (0) == '/');
consume_char ();
new_token ();
while (true)
{
ecma_char_t c = (ecma_char_t) LA (0);
if (c == '\0')
{
PARSE_ERROR ("Unclosed string", token_start - buffer_start);
}
else if (c == '\n')
{
PARSE_ERROR ("RegExp literal shall not contain newline character", token_start - buffer_start);
}
else if (c == '\\')
{
consume_char ();
}
else if (c == '[')
{
is_char_class = true;
}
else if (c == ']')
{
is_char_class = false;
}
else if (c == '/' && !is_char_class)
{
/* Eat up '/' */
consume_char ();
break;
}
consume_char ();
}
/* Try to parse RegExp flags */
while (true)
{
ecma_char_t c = (ecma_char_t) LA (0);
if (c == '\0'
|| !ecma_char_is_word_char (c)
|| ecma_char_is_line_terminator (c))
{
break;
}
consume_char ();
}
result = convert_string_to_token (TOK_REGEXP,
(const ecma_char_t*) token_start,
static_cast<ecma_length_t> (buffer - token_start));
token_start = NULL;
return result;
} /* parse_regexp */
static void
grobble_whitespaces (void)
{
@@ -1084,10 +1159,27 @@ lexer_next_token_private (void)
}
}
if (c == '/' && LA (1) == '/')
if (c == '/')
{
replace_comment_by_newline ();
return lexer_next_token_private ();
if (LA (1) == '/')
{
replace_comment_by_newline ();
return lexer_next_token_private ();
}
else if (!(sent_token.type == TOK_NAME
|| sent_token.type == TOK_NULL
|| sent_token.type == TOK_BOOL
|| sent_token.type == TOK_CLOSE_BRACE
|| sent_token.type == TOK_CLOSE_SQUARE
|| sent_token.type == TOK_CLOSE_PAREN
|| sent_token.type == TOK_SMALL_INT
|| sent_token.type == TOK_NUMBER
|| sent_token.type == TOK_STRING
|| sent_token.type == TOK_REGEXP))
{
return parse_regexp ();
}
}
switch (c)
@@ -1203,7 +1295,6 @@ lexer_next_token (void)
prev_token = sent_token;
sent_token = lexer_next_token_private ();
if (sent_token.type == TOK_NEWLINE)
{
dump_current_line ();
+5 -1
View File
@@ -99,7 +99,7 @@ typedef enum __attr_packed___
TOK_OPEN_PAREN, // (
TOK_CLOSE_PAREN, //)
TOK_OPEN_SQUARE, // [
TOK_CLOSE_SQUARE, // [
TOK_CLOSE_SQUARE, // ]
TOK_DOT, // .
TOK_SEMICOLON, // ;
@@ -152,6 +152,7 @@ typedef enum __attr_packed___
TOK_DIV, // /
TOK_DIV_EQ, // /=
TOK_EMPTY,
TOK_REGEXP, // RegularExpressionLiteral (/.../gim)
} token_type;
typedef size_t locus;
@@ -170,6 +171,9 @@ typedef struct
#define TOKEN_EMPTY_INITIALIZER {0, TOK_EMPTY, 0}
void lexer_init (const char *, size_t, bool);
void lexer_init_source (const char *, size_t);
void lexer_free (void);
token lexer_next_token (void);
void lexer_save_token (token);
+28
View File
@@ -843,6 +843,34 @@ dump_number_assignment_res (lit_cpointer_t lit_id)
return op;
}
void
dump_regexp_assignment (operand op, lit_cpointer_t lit_id)
{
switch (op.type)
{
case OPERAND_LITERAL:
{
const opcode_t opcode = getop_assignment (LITERAL_TO_REWRITE, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE);
serializer_dump_op_meta (create_op_meta_101 (opcode, op.data.lit_id, lit_id));
break;
}
case OPERAND_TMP:
{
const opcode_t opcode = getop_assignment (op.data.uid, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE);
serializer_dump_op_meta (create_op_meta_001 (opcode, lit_id));
break;
}
}
}
operand
dump_regexp_assignment_res (lit_cpointer_t lit_id)
{
operand op = tmp_operand ();
dump_regexp_assignment (op, lit_id);
return op;
}
void
dump_smallint_assignment (operand op, idx_t uid)
{
+2
View File
@@ -69,6 +69,8 @@ void dump_string_assignment (operand, lit_cpointer_t);
operand dump_string_assignment_res (lit_cpointer_t);
void dump_number_assignment (operand, lit_cpointer_t);
operand dump_number_assignment_res (lit_cpointer_t);
void dump_regexp_assignment (operand, lit_cpointer_t);
operand dump_regexp_assignment_res (lit_cpointer_t);
void dump_smallint_assignment (operand, idx_t);
operand dump_smallint_assignment_res (idx_t);
void dump_undefined_assignment (operand);
+5
View File
@@ -1,4 +1,5 @@
/* Copyright 2014-2015 Samsung Electronics Co., Ltd.
* Copyright 2015 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,6 +23,7 @@
#include "opcodes-dumper.h"
#include "opcodes-native-call.h"
#include "parser.h"
#include "re-parser.h"
#include "scopes-tree.h"
#include "serializer.h"
#include "stack.h"
@@ -745,6 +747,7 @@ parse_object_literal (void)
| 'false'
| number_literal
| string_literal
| regexp_literal
; */
static operand
parse_literal (void)
@@ -753,6 +756,7 @@ parse_literal (void)
{
case TOK_NUMBER: return dump_number_assignment_res (token_data_as_lit_cp ());
case TOK_STRING: return dump_string_assignment_res (token_data_as_lit_cp ());
case TOK_REGEXP: return dump_regexp_assignment_res (token_data_as_lit_cp ());
case TOK_NULL: return dump_null_assignment_res ();
case TOK_BOOL: return dump_boolean_assignment_res ((bool) token_data ());
case TOK_SMALL_INT: return dump_smallint_assignment_res ((idx_t) token_data ());
@@ -786,6 +790,7 @@ parse_primary_expression (void)
case TOK_BOOL:
case TOK_SMALL_INT:
case TOK_NUMBER:
case TOK_REGEXP:
case TOK_STRING: return parse_literal ();
case TOK_NAME: return literal_operand (token_data_as_lit_cp ());
case TOK_OPEN_SQUARE: return parse_array_literal ();
+2
View File
@@ -291,6 +291,7 @@ generate_opcode (scopes_tree tree, opcode_counter_t opc_index, lit_id_hash_table
}
case OPCODE_ARG_TYPE_NUMBER:
case OPCODE_ARG_TYPE_NUMBER_NEGATE:
case OPCODE_ARG_TYPE_REGEXP:
case OPCODE_ARG_TYPE_STRING:
case OPCODE_ARG_TYPE_VARIABLE:
{
@@ -430,6 +431,7 @@ count_new_literals_in_opcode (scopes_tree tree, opcode_counter_t opc_index)
}
case OPCODE_ARG_TYPE_NUMBER:
case OPCODE_ARG_TYPE_NUMBER_NEGATE:
case OPCODE_ARG_TYPE_REGEXP:
case OPCODE_ARG_TYPE_STRING:
case OPCODE_ARG_TYPE_VARIABLE:
{