add parser

This commit is contained in:
e.gavrin
2014-07-01 17:57:11 +04:00
parent 9a63527b91
commit 2e64056390
10 changed files with 5422 additions and 0 deletions
+3
View File
@@ -11,3 +11,6 @@
*.lai
*.la
*.a
# Random Trash
*~
+62
View File
@@ -0,0 +1,62 @@
# Copyright 2014 Samsung Electronics Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
TARGET ?= jerry
OBJ_DIR = obj
SOURCES = \
$(sort \
$(wildcard ./src/*.c))
INCLUDES = -I src
OBJS = $(sort \
$(patsubst %.c,./$(OBJ_DIR)/%.o,$(notdir $(SOURCES))))
#CROSS_COMPILE ?= arm-none-eabi-
CC = $(CROSS_COMPILE)gcc
LD = $(CROSS_COMPILE)ld
OBJDUMP = $(CROSS_COMPILE)objdump
OBJCOPY = $(CROSS_COMPILE)objcopy
SIZE = $(CROSS_COMPILE)size
CFLAGS ?= -Wall -std=c99 -Wextra -Wpedantic -fdiagnostics-color=always
#CFLAGS += -Werror
#CFLAGS += -Wformat-security -Wformat-nonliteral -Winit-self
#CFLAGS += -Wconversion -Wsign-conversion -Wlogical-op
#CFLAGS += -Wstrict-prototypes -Wmissing-prototypes
#CFLAGS += -Winline -Wstack-protector
#CFLAGS += -mlittle-endian -mcpu=cortex-m4 -march=armv7e-m -mthumb
#CFLAGS += -mfpu=fpv4-sp-d16 -mfloat-abi=hard
#CFLAGS += -ffunction-sections -fdata-sections
#DEBUG_OPTIONS = -fsanitize=address -g3 -O0
#RELEASE_OPTIONS = -Os
HEADERS = error.h lexer.h pretty-printer.h parser.h
#OBJS = lexer.o pretty-printer.o parser.o main.o
DEFINES = -DDEBUG
all:
$(CC) $(INCLUDES) $(CFLAGS) $(DEBUG_OPTIONS) $(DEFINES) $(SOURCES) \
-o $(TARGET)
clean:
rm -f $(OBJ_DIR)/*.o *.o $(TARGET)
test:
./tools/jerry_test.sh
+46
View File
@@ -0,0 +1,46 @@
/* Copyright 2014 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ERROR_H
#define ERROR_H
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
void lexer_dump_buffer_state ();
#define unreachable() assert(0)
static inline void
fatal (int code)
{
printf ("FATAL: %d\n", code);
lexer_dump_buffer_state ();
unreachable ();
exit (code);
}
#define ERR_IO (-1)
#define ERR_BUFFER_SIZE (-2)
#define ERR_SEVERAL_FILES (-3)
#define ERR_NO_FILES (-4)
#define ERR_NON_CHAR (-5)
#define ERR_UNCLOSED (-6)
#define ERR_INT_LITERAL (-7)
#define ERR_STRING (-8)
#define ERR_PARSER (-9)
#endif /* ERROR_H */
+751
View File
@@ -0,0 +1,751 @@
/* Copyright 2014 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "error.h"
#include "lexer.h"
static token saved_token;
#ifdef DEBUG
FILE *lexer_debug_log;
#endif
/* If TOKEN represents a keyword, return decoded keyword,
if TOKEN represents a Future Reserved Word, return KW_RESERVED,
otherwise return KW_NONE. */
static keyword
decode_keyword (const char *tok)
{
assert (tok);
if (!strcmp ("break", tok))
return KW_BREAK;
else if (!strcmp ("case", tok))
return KW_CASE;
else if (!strcmp ("catch", tok))
return KW_CATCH;
else if (!strcmp ("continue", tok))
return KW_CONTINUE;
else if (!strcmp ("debugger", tok))
return KW_DEBUGGER;
else if (!strcmp ("default", tok))
return KW_DEFAULT;
else if (!strcmp ("delete", tok))
return KW_DELETE;
else if (!strcmp ("do", tok))
return KW_DO;
else if (!strcmp ("else", tok))
return KW_ELSE;
else if (!strcmp ("finally", tok))
return KW_FINALLY;
else if (!strcmp ("for", tok))
return KW_FOR;
else if (!strcmp ("function", tok))
return KW_FUNCTION;
else if (!strcmp ("if", tok))
return KW_IF;
else if (!strcmp ("in", tok))
return KW_IN;
else if (!strcmp ("instanceof", tok))
return KW_INSTANCEOF;
else if (!strcmp ("new", tok))
return KW_NEW;
else if (!strcmp ("return", tok))
return KW_RETURN;
else if (!strcmp ("switch", tok))
return KW_SWITCH;
else if (!strcmp ("this", tok))
return KW_THIS;
else if (!strcmp ("throw", tok))
return KW_THROW;
else if (!strcmp ("try", tok))
return KW_TRY;
else if (!strcmp ("typeof", tok))
return KW_TYPEOF;
else if (!strcmp ("var", tok))
return KW_VAR;
else if (!strcmp ("void", tok))
return KW_VOID;
else if (!strcmp ("while", tok))
return KW_WHILE;
else if (!strcmp ("with", tok))
return KW_WITH;
else if (!strcmp ("class", tok) || !strcmp ("const", tok)
|| !strcmp ("enum", tok) || !strcmp ("export", tok)
|| !strcmp ("extends", tok) || !strcmp ("import", tok)
|| !strcmp ("super", tok) || !strcmp ("implements", tok)
|| !strcmp ("interface", tok) || !strcmp ("let", tok)
|| !strcmp ("package", tok) || !strcmp ("private", tok)
|| !strcmp ("protected", tok) || !strcmp ("public", tok)
|| !strcmp ("static", tok) || !strcmp ("yield", tok))
return KW_RESERVED;
else
return KW_NONE;
}
static FILE *file;
/* Represents the contents of a file. */
static char *buffer = NULL;
static char *buffer_start;
static char *token_start;
#define BUFFER_SIZE 1024
static char
get_char (int i)
{
int error;
const int tail_size = BUFFER_SIZE - (buffer - buffer_start);
assert (file);
if (buffer == NULL)
{
buffer = (char *) malloc (BUFFER_SIZE);
error = fread (buffer, 1, BUFFER_SIZE, file);
if (error < 0)
fatal (ERR_IO);
if (error == 0)
return '\0';
if (error < BUFFER_SIZE)
memset (buffer + error, '\0', BUFFER_SIZE - error);
buffer_start = buffer;
}
if (tail_size <= i)
{
/* We are almost at the end of the buffer. */
if (token_start)
{
const int token_size = buffer - token_start;
/* Whole buffer contains single token. */
if (token_start == buffer_start)
fatal (ERR_BUFFER_SIZE);
/* Move parsed token and tail of buffer to head. */
memmove (buffer_start, token_start, tail_size + token_size);
/* Adjust pointers. */
token_start = buffer_start;
buffer = buffer_start + token_size;
/* Read more characters form input file. */
error = fread (buffer + tail_size, 1, BUFFER_SIZE - tail_size - token_size, file);
if (error < 0)
fatal (ERR_IO);
if (error == 0)
return '\0';
if (error < BUFFER_SIZE - tail_size - token_size)
memset (buffer + tail_size + error, '\0',
BUFFER_SIZE - tail_size - token_size - error);
}
else
{
memmove (buffer_start, buffer, tail_size);
buffer = buffer_start;
error = fread (buffer + tail_size, 1, BUFFER_SIZE - tail_size, file);
if (error < 0)
fatal (ERR_IO);
if (error == 0)
return '\0';
if (error < BUFFER_SIZE - tail_size)
memset (buffer + tail_size + error, '\0', BUFFER_SIZE - tail_size - error);
}
}
return *(buffer + i);
}
#define LA(I) (get_char (I))
static inline void
new_token ()
{
assert (buffer);
token_start = buffer;
}
static inline void
consume_char ()
{
assert (buffer);
buffer++;
}
static inline const char *
current_token ()
{
assert (buffer);
assert (token_start);
int length = buffer - token_start;
char *res = (char *) malloc (length + 1);
strncpy (res, token_start, length);
res[length] = '\0';
token_start = NULL;
return res;
}
#define RETURN_PUNC_EX(TOK, NUM) \
do \
{ \
buffer += NUM; \
return (token) { .type = TOK, .data.none = NULL }; \
} \
while (0)
#define RETURN_PUNC(TOK) RETURN_PUNC_EX(TOK, 1)
#define IF_LA_N_IS(CHAR, THEN_TOK, ELSE_TOK, NUM) \
do \
{ \
if (LA (NUM) == CHAR) \
RETURN_PUNC_EX (THEN_TOK, NUM + 1); \
else \
RETURN_PUNC_EX (ELSE_TOK, NUM); \
} \
while (0)
#define IF_LA_IS(CHAR, THEN_TOK, ELSE_TOK) \
IF_LA_N_IS (CHAR, THEN_TOK, ELSE_TOK, 1)
#define IF_LA_IS_OR(CHAR1, THEN1_TOK, CHAR2, THEN2_TOK, ELSE_TOK) \
do \
{ \
if (LA (1) == CHAR1) \
RETURN_PUNC_EX (THEN1_TOK, 2); \
else if (LA (1) == CHAR2) \
RETURN_PUNC_EX (THEN2_TOK, 2); \
else \
RETURN_PUNC (ELSE_TOK); \
} \
while (0)
static token
parse_name ()
{
char c = LA (0);
bool every_char_islower = isalpha (c) && islower (c);
const char *tok = NULL;
assert (isalpha (c) || c == '$' || c == '_');
new_token ();
consume_char ();
while (true)
{
c = LA (0);
if (c == '\0')
c = c;
if (!isalpha (c) && !isdigit (c) && c != '$' && c != '_')
break;
if (every_char_islower && (!isalpha (c) || !islower (c)))
every_char_islower = false;
consume_char ();
}
tok = current_token ();
if (every_char_islower)
{
keyword kw = decode_keyword (tok);
if (kw != KW_NONE)
{
free ((char *) tok);
return (token) { .type = TOK_KEYWORD, .data.kw = kw };
}
if (!strcmp ("null", tok))
{
free ((char *) tok);
return (token) { .type = TOK_NULL, .data.none = NULL };
}
if (!strcmp ("true", tok))
{
free ((char *) tok);
return (token) { .type = TOK_BOOL, .data.is_true = true };
}
if (!strcmp ("false", tok))
{
free ((char *) tok);
return (token) { .type = TOK_BOOL, .data.is_true = false };
}
}
return (token) { .type = TOK_NAME, .data.name = tok };
}
static bool
is_hex_digit (char c)
{
return isdigit (c) || c == 'a' || c == 'A' || c == 'b' || c == 'B'
|| c == 'c' || c == 'C' || c == 'd' || c == 'D'
|| c == 'e' || c == 'E' || c == 'f' || c == 'F';
}
static int
hex_to_int (char hex)
{
switch (hex)
{
case '0': return 0x0;
case '1': return 0x1;
case '2': return 0x2;
case '3': return 0x3;
case '4': return 0x4;
case '5': return 0x5;
case '6': return 0x6;
case '7': return 0x7;
case '8': return 0x8;
case '9': return 0x9;
case 'a':
case 'A': return 0xA;
case 'b':
case 'B': return 0xB;
case 'c':
case 'C': return 0xC;
case 'd':
case 'D': return 0xD;
case 'e':
case 'E': return 0xE;
case 'f':
case 'F': return 0xF;
default: unreachable ();
}
}
/* In this function we cannot use strtol function
since there is no octal literals in ECMAscript. */
static token
parse_number ()
{
char c = LA (0);
bool is_hex = false;
bool is_fp = false;
bool is_exp = false;
const char *tok = NULL;
int tok_length = 0;
int res = 0;
assert (isdigit (c) || c == '.');
if (c == '0')
if (LA (1) == 'x' || LA (1) == 'X')
is_hex = true;
if (c == '.')
{
assert (!isalpha (LA (1)));
is_fp = true;
}
if (is_hex)
{
// Eat up '0x'
consume_char ();
consume_char ();
new_token ();
while (true)
{
c = LA (0);
if (!is_hex_digit (c))
break;
consume_char ();
}
if (isalpha (c) || c == '_' || c == '$')
fatal (ERR_INT_LITERAL);
tok_length = buffer - token_start;
tok = current_token ();
// OK, I know that integer overflow can occur here
for (int i = 0; i < tok_length; i++)
res = (res << 4) + hex_to_int (tok[i]);
free ((char *) tok);
return (token) { .type = TOK_INT, .data.num = res };
}
assert (!is_hex && !is_exp);
new_token ();
// Eat up '.'
if (is_fp)
consume_char ();
while (true)
{
c = LA (0);
if (is_fp && c == '.')
fatal (ERR_INT_LITERAL);
if (is_exp && (c == 'e' || c == 'E'))
fatal (ERR_INT_LITERAL);
if (c == '.')
{
if (isalpha (LA (1)) || LA (1) == '_' || LA (1) == '$')
fatal (ERR_INT_LITERAL);
is_fp = true;
consume_char ();
continue;
}
if (c == 'e' || c == 'E')
{
if (LA (1) == '-' || LA (1) == '+')
consume_char ();
if (!isdigit (LA (1)))
fatal (ERR_INT_LITERAL);
is_exp = true;
consume_char ();
continue;
}
if (isalpha (c) || c == '_' || c == '$')
fatal (ERR_INT_LITERAL);
if (!isdigit (c))
break;
consume_char ();
}
if (is_fp || is_exp)
{
tok = current_token ();
float res = strtof (tok, NULL);
free ((char *) tok);
return (token) { .type = TOK_FLOAT, .data.fp_num = res };
}
tok_length = buffer - token_start;
tok = current_token ();
for (int i = 0; i < tok_length; i++)
res = res * 10 + hex_to_int (tok[i]);
free ((char *) tok);
return (token) { .type = TOK_INT, .data.num = res };
}
static char
escape_char (char c)
{
switch (c)
{
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
case '\'':
case '"':
case '\\':
default: return c;
}
}
static token
parse_string ()
{
char c = LA (0);
bool is_double_quoted;
char *tok = NULL;
char *index = NULL;
int length;
assert (c == '\'' || c == '"');
is_double_quoted = (c == '"');
// Eat up '"'
consume_char ();
new_token ();
while (true)
{
c = LA (0);
if (c == '\0')
fatal (ERR_UNCLOSED);
if (c == '\n')
fatal (ERR_STRING);
if (c == '\\')
{
/* Only single escape character is allowed. */
if (LA (1) == 'x' || LA (1) == 'u' || isdigit (LA (1)))
fatal (ERR_STRING);
if ((LA (1) == '\'' && !is_double_quoted)
|| (LA (1) == '"' && is_double_quoted)
|| LA (1) == '\n')
{
consume_char ();
consume_char ();
continue;
}
}
else if ((c == '\'' && !is_double_quoted)
|| (c == '"' && is_double_quoted))
break;
consume_char ();
}
length = buffer - token_start;
tok = (char *) malloc (length);
index = tok;
for (char *i = token_start; i < buffer; i++)
{
if (*i == '\\')
{
if (*(i+1) == '\n')
{
i++;
continue;
}
*index = escape_char (*(i+1));
index++;
i++;
continue;
}
*index = *i;
index++;
}
memset (index, '\0', length - (index - tok));
token_start = NULL;
// Eat up '"'
consume_char ();
return (token) { .type = TOK_STRING, .data.str = tok };
}
static void
grobble_whitespaces ()
{
char c = LA (0);
while ((isspace (c) && c != '\n') || c == '\0')
{
consume_char ();
c = LA (0);
}
}
void
lexer_set_file (FILE *ex_file)
{
assert (ex_file);
file = ex_file;
lexer_debug_log = fopen ("lexer.log", "w");
}
static bool
replace_comment_by_newline ()
{
char c = LA (0);
bool multiline;
bool was_newlines = false;
assert (LA (0) == '/');
assert (LA (1) == '/' || LA (1) == '*');
multiline = (LA (1) == '*');
consume_char ();
consume_char ();
while (true)
{
c = LA (0);
if (!multiline && (c == '\n' || c == '\0'))
return false;
if (multiline && c == '*' && LA (1) == '/')
{
consume_char ();
consume_char ();
if (was_newlines)
return true;
else
return false;
}
if (multiline && c == '\n')
was_newlines = true;
if (multiline && c == '\0')
fatal (ERR_UNCLOSED);
consume_char ();
}
}
token
#ifdef DEBUG
lexer_next_token_private ()
#else
lexer_next_token ()
#endif
{
char c = LA (0);
if (saved_token.type != TOK_EOF)
{
token res = saved_token;
saved_token.type = TOK_EOF;
return res;
}
assert (token_start == NULL);
if (isalpha (c) || c == '$' || c == '_')
return parse_name ();
if (isdigit (c) || (c == '.' && isdigit (LA (1))))
return parse_number ();
if (c == '\n')
{
consume_char ();
return (token) { .type = TOK_NEWLINE, .data.none = NULL };
}
if (c == '\0')
return (token) { .type = TOK_EOF, .data.none = NULL };
if (c == '\'' || c == '"')
return parse_string ();
if (isspace (c))
{
grobble_whitespaces ();
return lexer_next_token ();
}
if (c == '/' && LA (1) == '*')
{
if (replace_comment_by_newline ())
return (token) { .type = TOK_NEWLINE, .data.none = NULL };
else
return lexer_next_token ();
}
if (c == '/' && LA (1) == '/')
{
replace_comment_by_newline ();
return lexer_next_token ();
}
switch (c)
{
case '{': RETURN_PUNC (TOK_OPEN_BRACE);
case '}': RETURN_PUNC (TOK_CLOSE_BRACE);
case '(': RETURN_PUNC (TOK_OPEN_PAREN);
case ')': RETURN_PUNC (TOK_CLOSE_PAREN);
case '[': RETURN_PUNC (TOK_OPEN_SQUARE);
case ']': RETURN_PUNC (TOK_CLOSE_SQUARE);
case '.': RETURN_PUNC (TOK_DOT);
case ';': RETURN_PUNC (TOK_SEMICOLON);
case ',': RETURN_PUNC (TOK_COMMA);
case '~': RETURN_PUNC (TOK_COMPL);
case ':': RETURN_PUNC (TOK_COLON);
case '?': RETURN_PUNC (TOK_QUERY);
case '*': IF_LA_IS ('=', TOK_MULT_EQ, TOK_MULT);
case '/': IF_LA_IS ('=', TOK_DIV_EQ, TOK_DIV);
case '^': IF_LA_IS ('=', TOK_XOR_EQ, TOK_XOR);
case '%': IF_LA_IS ('=', TOK_MOD_EQ, TOK_MOD);
case '+': IF_LA_IS_OR ('+', TOK_DOUBLE_PLUS, '=', TOK_PLUS_EQ, TOK_PLUS);
case '-': IF_LA_IS_OR ('-', TOK_DOUBLE_MINUS, '=', TOK_MINUS_EQ, TOK_MINUS);
case '&': IF_LA_IS_OR ('&', TOK_DOUBLE_AND, '=', TOK_AND_EQ, TOK_AND);
case '|': IF_LA_IS_OR ('|', TOK_DOUBLE_OR, '=', TOK_OR_EQ, TOK_OR);
case '<':
switch (LA (1))
{
case '<': IF_LA_N_IS ('=', TOK_LSHIFT_EQ, TOK_LSHIFT, 2);
case '=': RETURN_PUNC_EX (TOK_LESS_EQ, 2);
default: RETURN_PUNC (TOK_LESS);
}
case '>':
switch (LA (1))
{
case '>':
switch (LA (2))
{
case '>': IF_LA_N_IS ('=', TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3);
case '=': RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3);
default: RETURN_PUNC_EX (TOK_RSHIFT, 2);
}
case '=': RETURN_PUNC_EX (TOK_GREATER_EQ, 2);
default: RETURN_PUNC (TOK_GREATER);
}
case '=':
if (LA (1) == '=')
IF_LA_N_IS ('=', TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2);
else
RETURN_PUNC (TOK_EQ);
case '!':
if (LA (1) == '=')
IF_LA_N_IS ('=', TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2);
else
RETURN_PUNC (TOK_NOT);
default:
unreachable ();
}
fatal (ERR_NON_CHAR);
}
#ifdef DEBUG
static int i = 0;
token
lexer_next_token ()
{
token tok = lexer_next_token_private ();
if (tok.type == TOK_NEWLINE)
return tok;
if (tok.type == TOK_CLOSE_BRACE)
{
if (i == 300)
fprintf (lexer_debug_log, "lexer_next_token(%d): type=0x%x, data=%p\n", i, tok.type, tok.data.none);
i++;
}
return tok;
}
#endif
void
lexer_save_token (token tok)
{
#ifdef DEBUG
if (tok.type == TOK_CLOSE_BRACE)
fprintf (lexer_debug_log, "lexer_save_token(%d): type=0x%x, data=%p\n", i, tok.type, tok.data.none);
#endif
saved_token = tok;
}
void
lexer_dump_buffer_state ()
{
printf ("%s\n", buffer);
}
+158
View File
@@ -0,0 +1,158 @@
/* Copyright 2014 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LEXER_H
#define LEXER_H
#include <stdbool.h>
#include <stdio.h>
/* Keywords. */
typedef enum
{
/* Not a keyword. */
KW_NONE = 0,
/* Future reserved keyword. */
KW_RESERVED,
KW_BREAK,
KW_CASE,
KW_CATCH,
KW_CONTINUE,
KW_DEBUGGER,
KW_DEFAULT,
KW_DELETE,
KW_DO,
KW_ELSE,
KW_FINALLY,
KW_FOR,
KW_FUNCTION,
KW_IF,
KW_IN,
KW_INSTANCEOF,
KW_NEW,
KW_RETURN,
KW_SWITCH,
KW_THIS,
KW_THROW,
KW_TRY,
KW_TYPEOF,
KW_VAR,
KW_VOID,
KW_WHILE,
KW_WITH
}
keyword;
/* Type of tokens. */
typedef enum
{
TOK_EOF = 0x0, // End of file
TOK_NAME = 0x1, // Identifier
TOK_KEYWORD = 0x2, // Keyword
TOK_INT = 0x3,
TOK_FLOAT = 0x4,
TOK_NULL = 0x5,
TOK_BOOL = 0x6,
TOK_NEWLINE = 0x7,
TOK_STRING = 0x8,
/* Punctuators. */
TOK_OPEN_BRACE = 0x9, // {
TOK_CLOSE_BRACE = 0xa, // }
TOK_OPEN_PAREN = 0xb, // (
TOK_CLOSE_PAREN = 0xc, // )
TOK_OPEN_SQUARE, // [
TOK_CLOSE_SQUARE, // [
TOK_DOT, // .
TOK_SEMICOLON, // ;
TOK_COMMA, // ,
TOK_LESS, // <
TOK_GREATER, // >
TOK_LESS_EQ, // <=
TOK_GREATER_EQ, // <=
TOK_DOUBLE_EQ, // ==
TOK_NOT_EQ, // !=
TOK_TRIPLE_EQ, // ===
TOK_NOT_DOUBLE_EQ, // !==
TOK_PLUS, // +
TOK_MINUS, // -
TOK_MULT, // *
TOK_MOD, // %
TOK_DOUBLE_PLUS, // ++
TOK_DOUBLE_MINUS, // --
TOK_LSHIFT, // <<
TOK_RSHIFT, // >>
TOK_RSHIFT_EX, // >>>
TOK_AND, // &
TOK_OR, // |
TOK_XOR, // ^
TOK_NOT, // !
TOK_COMPL, // ~
TOK_DOUBLE_AND, // &&
TOK_DOUBLE_OR, // ||
TOK_QUERY, // ?
TOK_COLON, // :
TOK_EQ, // =
TOK_PLUS_EQ, // +=
TOK_MINUS_EQ, // -=
TOK_MULT_EQ, // *=
TOK_MOD_EQ, // %=
TOK_LSHIFT_EQ, // <<=
TOK_RSHIFT_EQ, // >>=
TOK_RSHIFT_EX_EQ, // >>>=
TOK_AND_EQ, // &=
TOK_OR_EQ, // |=
TOK_XOR_EQ, // ^=
TOK_DIV, // /
TOK_DIV_EQ // /=
}
token_type;
/* Represents the contents of a token. */
typedef struct
{
token_type type;
union
{
void *none;
keyword kw;
const char *name;
bool is_true;
int num;
float fp_num;
const char *str;
}
data;
}
token;
void lexer_set_file (FILE *);
token lexer_next_token ();
void lexer_save_token (token);
#endif
+87
View File
@@ -0,0 +1,87 @@
/* Copyright 2014 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "error.h"
#include "lexer.h"
#include "parser.h"
#include "pretty-printer.h"
int
main (int argc, char **argv)
{
bool dump_tokens = false;
bool dump_ast = false;
const char *file_name = NULL;
FILE *file = NULL;
if (argc > 0)
for (int i = 1; i < argc; i++)
{
if (!strcmp ("-t", argv[i]))
dump_tokens = true;
else if (!strcmp ("-a", argv[i]))
dump_ast = true;
else if (file_name == NULL)
file_name = argv[i];
else
fatal (ERR_SEVERAL_FILES);
}
if (file_name == NULL)
fatal (ERR_NO_FILES);
if (dump_tokens && dump_ast)
fatal (ERR_SEVERAL_FILES);
file = fopen (file_name, "r");
if (file == NULL)
fatal (ERR_IO);
if (dump_tokens)
{
token tok;
lexer_set_file (file);
tok = lexer_next_token ();
pp_reset ();
while (tok.type != TOK_EOF)
{
pp_token (tok);
tok = lexer_next_token ();
}
}
if (dump_ast)
{
statement *st;
lexer_set_file (file);
parser_init ();
st = parser_parse_statement ();
assert (st);
while (st->type != STMT_EOF)
{
pp_statement (st);
st = parser_parse_statement ();
assert (st);
}
pp_finish ();
}
return 0;
}
+2074
View File
File diff suppressed because it is too large Load Diff
+738
View File
@@ -0,0 +1,738 @@
/* Copyright 2014 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PARSER_H
#define PARSER_H
#include <stdio.h>
#include <stdbool.h>
struct source_element_list;
struct statement_list;
struct statement;
struct assignment_expression;
struct member_expression;
/** Represents list of parameters. */
typedef struct formal_parameter_list
{
/** Identifier of a parameter. Cannot be NULL. */
const char *name;
/** Next parameter: can be NULL. */
struct formal_parameter_list *next;
}
formal_parameter_list;
/** @function_declaration represents both declaration and expression of a function.
After this parser must return a block of statements. */
typedef struct
{
/** Identifier: name of a function. Can be NULL for anonimous functions. */
const char *name;
/** List of parameter of a function. Can be NULL. */
formal_parameter_list *params;
}
function_declaration;
typedef function_declaration function_expression;
/** Represents expression, array literal and list of argument. */
typedef struct expression_list
{
/** Single assignment expression. Cannot be NULL for expression and list of arguments.
But can be NULL for array literal. */
struct assignment_expression *assign_expr;
/** Next expression. Can be NULL. */
struct expression_list *next;
}
expression_list;
typedef expression_list expression;
typedef expression_list array_literal;
typedef expression_list argument_list;
/** Types of literals: null, bool, decimal and string.
Decimal type is represented by LIT_INT and supports only double-word sized integers. */
typedef enum
{
LIT_NULL,
LIT_BOOL,
LIT_INT,
LIT_STR
}
literal_type;
/** Represents different literals, contains a data of them. */
typedef struct
{
/** Type of a literal. */
literal_type type;
/** Value of a literal. */
union
{
/** Used by null literal, always NULL. */
void *data;
/** String literal value. */
const char *str;
/** Number value. */
int num;
/** Boolean value. */
bool is_true;
}
data;
}
literal;
/** type of PropertyName. Can be integer, identifier of string literal. */
typedef enum
{
PN_NAME,
PN_STRING,
PN_NUM
}
property_name_type;
/** Represents name of property. */
typedef struct
{
/** Type of property name. */
property_name_type type;
/** Value of property name. */
union
{
/** Identifier. */
const char *name;
/** Value of string literal. */
const char *str;
/** Numeric value. */
int num;
}
data;
}
property_name;
/** Represents a single property. */
typedef struct
{
/** Name of property. */
property_name *name;
/** Value of property. */
struct assignment_expression *assign_expr;
}
property_name_and_value;
/** List of properties. Represents ObjectLiteral. */
typedef struct property_name_and_value_list
{
/** Current property. */
property_name_and_value *nav;
/** Next property. */
struct property_name_and_value_list *next;
}
property_name_and_value_list;
typedef property_name_and_value_list object_literal;
/** Type of PrimaryExpression. Can be ThisLiteral, Identifier, Literal, ArrayLiteral,
ObjectLiteral or expression. */
typedef enum
{
PE_THIS,
PE_NAME,
PE_LITERAL,
PE_ARRAY,
PE_OBJECT,
PE_EXPR
}
primary_expression_type;
/** PrimaryExpression. */
typedef struct
{
/** Type of PrimaryExpression. */
primary_expression_type type;
/** Value of PrimaryExpression. */
union
{
/** Used for ThisLiteral. Always NULL. */
void *none;
/** Identifier. */
const char *name;
/** Literal. */
literal *lit;
/** ArrayLiteral. */
array_literal *array_lit;
/** ObjectLiteral. */
object_literal *object_lit;
/** Expression. */
expression *expr;
}
data;
}
primary_expression;
/** Type of suffix of MemberExpression. Can be either index-like ([]) or property-like (.). */
typedef enum
{
MES_INDEX,
MES_PROPERTY
}
member_expression_suffix_type;
/** Suffix of MemberExpression. */
typedef struct
{
/** Type of suffix. */
member_expression_suffix_type type;
/** Value of suffix. */
union
{
/** Used by index-like suffix. */
expression *index_expr;
/** Used by property-like suffix. */
const char *name;
}
data;
}
member_expression_suffix;
/** List of MemberExpression's suffixes. */
typedef struct member_expression_suffix_list
{
/** Current suffix. */
member_expression_suffix *suffix;
/** Next suffix. */
struct member_expression_suffix_list *next;
}
member_expression_suffix_list;
/** Represents MemberExpression Arguments grammar production. */
typedef struct
{
/** MemberExpression. */
struct member_expression *member_expr;
/** Arguments. */
argument_list *args;
}
member_expression_with_arguments;
/** Types of MemberExpression. Can be PrimaryExpression,
FunctionExpression or MemberExpression Arguments. */
typedef enum
{
ME_PRIMARY,
ME_FUNCTION,
ME_ARGS
}
member_expression_type;
/** Represents MemberExpression. */
typedef struct member_expression
{
/** Type of MemberExpression. */
member_expression_type type;
/** Value of MemberExpression. */
union
{
/** PrimaryExpression. */
primary_expression *primary_expr;
/** FunctionExpression. */
function_expression *function_expr;
/** MemberExpression Arguments. */
member_expression_with_arguments *args;
}
data;
member_expression_suffix_list *suffix_list;
}
member_expression;
/** Types of NewExpression. Can be either MemberExpression or NewExpression. */
typedef enum
{
NE_MEMBER,
NE_NEW
}
new_expression_type;
/** Represents NewExpression. */
typedef struct new_expression
{
/** Type of NewExpression. */
new_expression_type type;
/** Value of NewExpression. */
union
{
/** MemberExpression. */
member_expression *member_expr;
/** NewExpression. */
struct new_expression *new_expr;
}
data;
}
new_expression;
/** Types of CallExpression' suffix. Can be Arguments, index-like access ([]) or
property-like access (.). */
typedef enum
{
CAS_ARGS,
CAS_INDEX,
CAS_PROPERTY
}
call_expression_suffix_type;
/** Suffix of CallExpression. */
typedef struct
{
/** Type of suffix. */
call_expression_suffix_type type;
/** Value of suffix. */
union
{
/** Arguments. */
argument_list *args;
/** index-like access expression. */
expression *index_expr;
/** Identifier of property. */
const char *name;
}
data;
}
call_expression_suffix;
/** List of CallExpression's suffixes. */
typedef struct call_expression_suffix_list
{
/** Current suffix. */
call_expression_suffix *suffix;
/** Next suffix. */
struct call_expression_suffix_list *next;
}
call_expression_suffix_list;
/** CallExpression. */
typedef struct
{
/** Callee. Cannot be NULL. */
member_expression *member_expr;
/** List of arguments. Can be NULL. */
argument_list *args;
/** Suffixes of CallExpression. Can be NULL. */
call_expression_suffix_list *suffix_list;
}
call_expression;
/** Types of LeftHandSideExpression. Can be either CallExpression or NewExpression. */
typedef enum
{
LHSE_CALL,
LHSE_NEW
}
left_hand_side_expression_type;
/** LeftHandSideExpression. */
typedef struct
{
/** Type of LeftHandSideExpression. */
left_hand_side_expression_type type;
/** Value of LeftHandSideExpression. */
union
{
/** Value of CallExpression. */
call_expression *call_expr;
/** Value of NewExpression. */
new_expression *new_expr;
}
data;
}
left_hand_side_expression;
/** Type of PostfixExpression. Unlike ECMA, it can contain no postfix operator in addition to
increment and decrement. */
typedef enum
{
PE_NONE,
PE_INCREMENT,
PE_DECREMENT
}
postfix_expression_type;
/** PostfixExpression. */
typedef struct
{
/** Type of PostfixExpression. */
postfix_expression_type type;
/** LeftHandSideExpression. */
left_hand_side_expression *expr;
}
postfix_expression;
/** Types of UnaryExpression. Can be PostfixExpression, delete UnaryExpression,
void UnaryExpression, typeof UnaryExpression, ++ UnaryExpression, -- UnaryExpression,
+ UnaryExpression, - UnaryExpression, ~ UnaryExpression, ! UnaryExpression. */
typedef enum
{
UE_POSTFIX,
UE_DELETE,
UE_VOID,
UE_TYPEOF,
UE_INCREMENT,
UE_DECREMENT,
UE_PLUS,
UE_MINUS,
UE_COMPL,
UE_NOT
}
unary_expression_type;
/** UnaryExpression. */
typedef struct unary_expression
{
/** Type of UnaryExpression. */
unary_expression_type type;
/** Data of UnaryExpression. */
union
{
/** PostfixExpression. Exists only when type of UE_POSTFIX. */
postfix_expression *postfix_expr;
/** UnaryExpression after an operator. Exists otherwise. */
struct unary_expression *unary_expr;
}
data;
}
unary_expression;
/** Type of MultiplicativeExpression. In addition to ECMA if there is only one operand,
we use ME_NONE. */
typedef enum
{
ME_NONE,
ME_MULT,
ME_DIV,
ME_MOD
}
multiplicative_expression_type;
/** List of MultiplicativeExpressions. It can contain 1..n operands. */
typedef struct multiplicative_expression_list
{
/** Type of current MultiplicativeExpression. */
multiplicative_expression_type type;
/** Current operand. */
unary_expression *unary_expr;
/** Next operand. */
struct multiplicative_expression_list *next;
}
multiplicative_expression_list;
typedef enum
{
AE_NONE,
AE_PLUS,
AE_MINUS
}
additive_expression_type;
typedef struct additive_expression_list
{
additive_expression_type type;
multiplicative_expression_list *mult_expr;
struct additive_expression_list *next;
}
additive_expression_list;
typedef enum
{
SE_NONE,
SE_LSHIFT,
SE_RSHIFT,
SE_RSHIFT_EX
}
shift_expression_type;
typedef struct shift_expression_list
{
shift_expression_type type;
additive_expression_list *add_expr;
struct shift_expression_list *next;
}
shift_expression_list;
typedef enum
{
RE_NONE,
RE_LESS,
RE_GREATER,
RE_LESS_EQ,
RE_GREATER_EQ,
RE_INSTANCEOF,
RE_IN
}
relational_expression_type;
typedef struct relational_expression_list
{
relational_expression_type type;
shift_expression_list *shift_expr;
struct relational_expression_list *next;
}
relational_expression_list;
typedef enum
{
EE_NONE,
EE_DOUBLE_EQ,
EE_NOT_EQ,
EE_TRIPLE_EQ,
EE_NOT_DOUBLE_EQ
}
equality_expression_type;
typedef struct equality_expression_list
{
equality_expression_type type;
relational_expression_list *rel_expr;
struct equality_expression_list *next;
}
equality_expression_list;
typedef struct bitwise_and_expression_list
{
equality_expression_list *eq_expr;
struct bitwise_and_expression_list *next;
}
bitwise_and_expression_list;
typedef struct bitwise_xor_expression_list
{
bitwise_and_expression_list *and_expr;
struct bitwise_xor_expression_list *next;
}
bitwise_xor_expression_list;
typedef struct bitwise_or_expression_list
{
bitwise_xor_expression_list *xor_expr;
struct bitwise_or_expression_list *next;
}
bitwise_or_expression_list;
typedef struct logical_and_expression_list
{
bitwise_or_expression_list *or_expr;
struct logical_and_expression_list *next;
}
logical_and_expression_list;
typedef struct logical_or_expression_list
{
logical_and_expression_list *and_expr;
struct logical_or_expression_list *next;
}
logical_or_expression_list;
typedef struct
{
logical_or_expression_list *or_expr;
struct assignment_expression *then_expr, *else_expr;
}
conditional_expression;
typedef enum
{
AE_COND,
AE_EQ,
AE_MULT_EQ,
AE_DIV_EQ,
AE_MOD_EQ,
AE_PLUS_EQ,
AE_MINUS_EQ,
AE_LSHIFT_EQ,
AE_RSHIFT_EQ,
AE_RSHIFT_EX_EQ,
AE_AND_EQ,
AE_OR_EQ,
AE_XOR_EQ
}
assignment_expression_type;
typedef struct
{
left_hand_side_expression *left_hand_expr;
struct assignment_expression *assign_expr;
}
left_hand_and_assignment_expression;
typedef struct assignment_expression
{
assignment_expression_type type;
union
{
conditional_expression *cond_expr;
left_hand_and_assignment_expression s;
}
data;
}
assignment_expression;
/* Statements. */
typedef struct
{
const char *name;
assignment_expression *ass_expr;
}
variable_declaration;
typedef struct variable_declaration_list
{
variable_declaration *var_decl;
struct variable_declaration_list *next;
}
variable_declaration_list;
typedef struct
{
bool is_decl;
union
{
expression *expr;
variable_declaration_list *decl_list;
}
data;
}
for_statement_initialiser_part;
typedef struct
{
for_statement_initialiser_part *init;
expression *limit, *incr;
}
for_statement;
typedef struct
{
bool is_decl;
union
{
left_hand_side_expression *left_hand_expr;
variable_declaration *decl;
}
data;
}
for_in_statement_initializer_part;
typedef struct
{
for_in_statement_initializer_part *init;
expression *list_expr;
}
for_in_statement;
typedef struct
{
bool is_for_in;
union
{
for_statement *for_stmt;
for_in_statement *for_in_stmt;
}
data;
}
for_or_for_in_statement;
typedef enum
{
STMT_BLOCK_START,
STMT_BLOCK_END,
STMT_VARIABLE,
STMT_EMPTY,
STMT_IF,
STMT_ELSE,
STMT_ELSE_IF,
STMT_DO,
STMT_WHILE,
STMT_FOR_OR_FOR_IN,
STMT_CONTINUE,
STMT_BREAK,
STMT_RETURN,
STMT_WITH,
STMT_LABELLED,
STMT_SWITCH,
STMT_CASE,
STMT_THROW,
STMT_TRY,
STMT_CATCH,
STMT_FINALLY,
STMT_EXPRESSION,
STMT_SUBEXPRESSION_END,
STMT_FUNCTION,
STMT_EOF
}
statement_type;
typedef struct statement
{
statement_type type;
union
{
void *none;
variable_declaration_list *var_stmt;
expression *expr;
for_or_for_in_statement *for_stmt;
const char *name;
function_declaration *fun_decl;
}
data;
}
statement;
void parser_init ();
statement *parser_parse_statement ();
#endif
+1475
View File
File diff suppressed because it is too large Load Diff
+28
View File
@@ -0,0 +1,28 @@
/* Copyright 2014 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PRETTY_PRINTER_H
#define PRETTY_PRINTER_H
#include "lexer.h"
#include "parser.h"
void pp_reset ();
void pp_finish ();
void pp_token (token);
void pp_keyword (keyword);
void pp_statement (statement *);
#endif