Simplify serializer/deserializer. Reduce memory usage in lexer. Create HashTable data structure. Finish preparations for introducing new strings addressation.
This commit is contained in:
+122
-288
@@ -17,31 +17,43 @@
|
||||
#include "jerry-libc.h"
|
||||
#include "lexer.h"
|
||||
#include "parser.h"
|
||||
#include "stack.h"
|
||||
#include "opcodes.h"
|
||||
|
||||
static token saved_token;
|
||||
static token empty_token =
|
||||
{
|
||||
.type =
|
||||
TOK_EMPTY,
|
||||
.type = TOK_EMPTY,
|
||||
.uid = 0
|
||||
};
|
||||
|
||||
static bool allow_dump_lines = false;
|
||||
static size_t buffer_size = 0;
|
||||
|
||||
typedef struct
|
||||
/* Represents the contents of a script. */
|
||||
static const char *buffer_start = NULL;
|
||||
static const char *buffer = NULL;
|
||||
static const char *token_start;
|
||||
|
||||
#define LA(I) (get_char (I))
|
||||
|
||||
enum
|
||||
{
|
||||
ecma_number_t num;
|
||||
token tok;
|
||||
}
|
||||
num_and_token;
|
||||
strings_global_size
|
||||
};
|
||||
STATIC_STACK (strings, uint8_t, lp_string)
|
||||
|
||||
#define MAX_NUMS 25
|
||||
enum
|
||||
{
|
||||
numbers_global_size
|
||||
};
|
||||
STATIC_STACK (numbers, uint8_t, ecma_number_t)
|
||||
|
||||
static uint8_t seen_names_count = 0;
|
||||
|
||||
static num_and_token seen_nums[MAX_NUMS];
|
||||
static uint8_t seen_nums_count = 0;
|
||||
enum
|
||||
{
|
||||
num_ids_global_size
|
||||
};
|
||||
STATIC_STACK (num_ids, uint8_t, idx_t)
|
||||
|
||||
static bool
|
||||
is_empty (token tok)
|
||||
@@ -49,11 +61,6 @@ is_empty (token tok)
|
||||
return tok.type == TOK_EMPTY;
|
||||
}
|
||||
|
||||
/* Represents the contents of a script. */
|
||||
static const char *buffer_start = NULL;
|
||||
static const char *buffer = NULL;
|
||||
static const char *token_start;
|
||||
|
||||
static char
|
||||
get_char (size_t i)
|
||||
{
|
||||
@@ -64,44 +71,6 @@ get_char (size_t i)
|
||||
return *(buffer + i);
|
||||
}
|
||||
|
||||
#define LA(I) (get_char (I))
|
||||
|
||||
/* Continuous array of NULL-terminated strings. */
|
||||
static char *strings_cache = NULL;
|
||||
static size_t strings_cache_size = 0;
|
||||
|
||||
static void
|
||||
increase_strings_cache (void)
|
||||
{
|
||||
char *new_cache;
|
||||
size_t new_cache_size;
|
||||
|
||||
// if strings_cache_size == 0, allocator recommends minimum size that is more than 0
|
||||
new_cache_size = mem_heap_recommend_allocation_size (strings_cache_size * 2);
|
||||
new_cache = (char *) mem_heap_alloc_block (new_cache_size, MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
|
||||
if (!new_cache)
|
||||
{
|
||||
// Allocator alligns recommended memory size
|
||||
new_cache_size = mem_heap_recommend_allocation_size (strings_cache_size + 1);
|
||||
new_cache = (char *) mem_heap_alloc_block (new_cache_size, MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
|
||||
if (!new_cache)
|
||||
{
|
||||
parser_fatal (ERR_MEMORY);
|
||||
}
|
||||
}
|
||||
|
||||
if (strings_cache)
|
||||
{
|
||||
__memcpy (new_cache, strings_cache, strings_cache_size);
|
||||
mem_heap_free_block ((uint8_t *) strings_cache);
|
||||
}
|
||||
|
||||
strings_cache = new_cache;
|
||||
strings_cache_size = new_cache_size;
|
||||
}
|
||||
|
||||
#ifdef __TARGET_HOST_x64
|
||||
static void
|
||||
dump_current_line (void)
|
||||
@@ -137,6 +106,20 @@ current_token_equals_to (const char *str)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
current_token_equals_to_lp (lp_string str)
|
||||
{
|
||||
if (str.length != (ecma_length_t) (buffer - token_start))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!__strncmp ((const char *) str.str, token_start, str.length))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If TOKEN represents a keyword, return decoded keyword,
|
||||
if TOKEN represents a Future Reserved Word, return KW_RESERVED,
|
||||
otherwise return KW_NONE. */
|
||||
@@ -507,171 +490,121 @@ decode_keyword (void)
|
||||
}
|
||||
|
||||
static token
|
||||
convert_seen_name_to_token (token_type tt, const char *string)
|
||||
convert_current_token_to_token (token_type tt)
|
||||
{
|
||||
uint8_t i;
|
||||
char *current_string = strings_cache;
|
||||
JERRY_ASSERT (strings_cache);
|
||||
token ret_val = empty_token;
|
||||
JERRY_ASSERT (token_start);
|
||||
|
||||
for (i = 0; i < seen_names_count; i++)
|
||||
for (uint8_t i = 0; i < STACK_SIZE (strings); i++)
|
||||
{
|
||||
if ((string == NULL && current_token_equals_to (current_string))
|
||||
|| (string != NULL && !__strcmp (current_string, string)))
|
||||
if (current_token_equals_to_lp (strings.data[i]))
|
||||
{
|
||||
ret_val = (token)
|
||||
return (token)
|
||||
{
|
||||
.type = tt,
|
||||
.uid = i
|
||||
};
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
current_string += __strlen (current_string) + 1;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
static token
|
||||
add_token_to_seen_names (token_type tt, const char *string)
|
||||
{
|
||||
size_t i;
|
||||
char *current_string = strings_cache;
|
||||
size_t required_size;
|
||||
size_t len = (string == NULL ? (size_t) (buffer - token_start) : __strlen (string));
|
||||
token ret_val = empty_token;
|
||||
|
||||
// Go to unused memory of cache
|
||||
for (i = 0; i < seen_names_count; i++)
|
||||
const lp_string str = (lp_string)
|
||||
{
|
||||
current_string += __strlen (current_string) + 1;
|
||||
}
|
||||
|
||||
required_size = (size_t) (current_string - strings_cache) + len + 1;
|
||||
if (required_size > strings_cache_size)
|
||||
{
|
||||
size_t offset = (size_t) (current_string - strings_cache);
|
||||
increase_strings_cache ();
|
||||
|
||||
// Now our pointer are invalid, adjust it
|
||||
current_string = strings_cache + offset;
|
||||
}
|
||||
|
||||
if (string == NULL)
|
||||
{
|
||||
// Copy current token with terminating NULL
|
||||
__strncpy (current_string, token_start, (size_t) (buffer - token_start));
|
||||
current_string += buffer - token_start;
|
||||
*current_string = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
__memcpy (current_string, string, __strlen (string) + 1);
|
||||
}
|
||||
|
||||
ret_val = (token)
|
||||
{
|
||||
.type = tt,
|
||||
.uid = seen_names_count++
|
||||
.length = (uint8_t) (buffer - token_start),
|
||||
.str = (const ecma_char_t *) token_start
|
||||
};
|
||||
|
||||
return ret_val;
|
||||
STACK_PUSH (strings, str);
|
||||
|
||||
return (token)
|
||||
{
|
||||
.type = tt,
|
||||
.uid = (idx_t) (STACK_SIZE (strings) - 1)
|
||||
};
|
||||
}
|
||||
|
||||
static token
|
||||
convert_seen_num_to_token (ecma_number_t num)
|
||||
{
|
||||
size_t i;
|
||||
uint8_t num_id;
|
||||
|
||||
for (i = 0; i < seen_nums_count; i++)
|
||||
JERRY_ASSERT (STACK_SIZE (num_ids) == STACK_SIZE (numbers));
|
||||
for (uint8_t i = 0; i < STACK_SIZE (numbers); i++)
|
||||
{
|
||||
// token must be exactly the same as seen
|
||||
if (seen_nums[i].num == num)
|
||||
if (numbers.data[i] == num)
|
||||
{
|
||||
return seen_nums[i].tok;
|
||||
return (token)
|
||||
{
|
||||
.type = TOK_NUMBER,
|
||||
.uid = num_ids.data[i]
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return empty_token;
|
||||
num_id = STACK_SIZE (num_ids);
|
||||
STACK_PUSH (num_ids, num_id);
|
||||
STACK_PUSH (numbers, num);
|
||||
|
||||
return (token)
|
||||
{
|
||||
.type = TOK_NUMBER,
|
||||
.uid = num_id
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
add_num_to_seen_tokens (num_and_token nat)
|
||||
const lp_string *
|
||||
lexer_get_strings (void)
|
||||
{
|
||||
JERRY_ASSERT (seen_nums_count < MAX_NUMS);
|
||||
|
||||
seen_nums[seen_nums_count++] = nat;
|
||||
return STACK_RAW_DATA (strings);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
lexer_get_strings (const char **strings)
|
||||
lexer_get_strings_count (void)
|
||||
{
|
||||
if (strings)
|
||||
{
|
||||
char *current_string = strings_cache;
|
||||
int i;
|
||||
for (i = 0; i < seen_names_count; i++)
|
||||
{
|
||||
strings[i] = current_string;
|
||||
current_string += __strlen (current_string) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return seen_names_count;
|
||||
return STACK_SIZE (strings);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
lexer_get_reserved_ids_count (void)
|
||||
{
|
||||
return (uint8_t) (seen_names_count + seen_nums_count);
|
||||
return (uint8_t) (STACK_SIZE (strings) + STACK_SIZE (numbers));
|
||||
}
|
||||
|
||||
const char *
|
||||
lp_string
|
||||
lexer_get_string_by_id (uint8_t id)
|
||||
{
|
||||
int i;
|
||||
char *current_string = strings_cache;
|
||||
JERRY_ASSERT (id < seen_names_count);
|
||||
JERRY_ASSERT (id < STACK_SIZE (strings));
|
||||
return STACK_ELEMENT (strings, id);
|
||||
}
|
||||
|
||||
for (i = 0 ; i < id; i++)
|
||||
{
|
||||
current_string += __strlen (current_string) + 1;
|
||||
}
|
||||
|
||||
return current_string;
|
||||
const ecma_number_t *
|
||||
lexer_get_nums (void)
|
||||
{
|
||||
return STACK_RAW_DATA (numbers);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
lexer_get_nums (ecma_number_t *nums)
|
||||
lexer_get_nums_count (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!nums)
|
||||
{
|
||||
return seen_nums_count;
|
||||
}
|
||||
|
||||
for (i = 0; i < seen_nums_count; i++)
|
||||
{
|
||||
nums[i] = seen_nums[i].num;
|
||||
}
|
||||
|
||||
return seen_nums_count;
|
||||
return STACK_SIZE (numbers);
|
||||
}
|
||||
|
||||
void
|
||||
lexer_adjust_num_ids (void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < seen_nums_count; i++)
|
||||
JERRY_ASSERT (STACK_SIZE (numbers) == STACK_SIZE (num_ids));
|
||||
for (uint8_t i = 0; i < STACK_SIZE (numbers); i++)
|
||||
{
|
||||
seen_nums[i].tok.uid = (uint8_t) (seen_nums[i].tok.uid + seen_names_count);
|
||||
STACK_ELEMENT (num_ids, i) = (uint8_t) (STACK_ELEMENT (num_ids, i) + STACK_SIZE (strings));
|
||||
}
|
||||
}
|
||||
|
||||
ecma_number_t
|
||||
lexer_get_num_by_id (uint8_t id)
|
||||
{
|
||||
JERRY_ASSERT (id >= lexer_get_strings_count () && id < lexer_get_reserved_ids_count ());
|
||||
JERRY_ASSERT (STACK_ELEMENT (num_ids, id - lexer_get_strings_count ()) == id);
|
||||
return STACK_ELEMENT (numbers, id - lexer_get_strings_count ());
|
||||
}
|
||||
|
||||
static void
|
||||
new_token (void)
|
||||
{
|
||||
@@ -773,20 +706,14 @@ parse_name (void)
|
||||
}
|
||||
}
|
||||
|
||||
known_token = convert_seen_name_to_token (TOK_NAME, NULL);
|
||||
if (!is_empty (known_token))
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
|
||||
known_token = add_token_to_seen_names (TOK_NAME, NULL);
|
||||
known_token = convert_current_token_to_token (TOK_NAME);
|
||||
|
||||
end:
|
||||
token_start = NULL;
|
||||
return known_token;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
static uint32_t
|
||||
hex_to_int (char hex)
|
||||
{
|
||||
switch (hex)
|
||||
@@ -827,7 +754,7 @@ parse_number (void)
|
||||
bool is_fp = false;
|
||||
bool is_exp = false;
|
||||
size_t tok_length = 0, i;
|
||||
int32_t res = 0;
|
||||
uint32_t res = 0;
|
||||
token known_token;
|
||||
|
||||
JERRY_ASSERT (__isdigit (c) || c == '.');
|
||||
@@ -868,10 +795,17 @@ parse_number (void)
|
||||
}
|
||||
|
||||
tok_length = (size_t) (buffer - token_start);
|
||||
// OK, I know that integer overflow can occur here
|
||||
|
||||
for (i = 0; i < tok_length; i++)
|
||||
{
|
||||
#ifndef JERRY_NDEBUG
|
||||
uint32_t old_res = res;
|
||||
#endif
|
||||
res = (res << 4) + hex_to_int (token_start[i]);
|
||||
FIXME (Replace with conversion to ecma_number_t)
|
||||
#ifndef JERRY_NDEBUG
|
||||
JERRY_ASSERT (old_res <= res);
|
||||
#endif
|
||||
}
|
||||
|
||||
token_start = NULL;
|
||||
@@ -886,23 +820,8 @@ parse_number (void)
|
||||
}
|
||||
|
||||
known_token = convert_seen_num_to_token ((ecma_number_t) res);
|
||||
if (!is_empty (known_token))
|
||||
{
|
||||
return known_token;
|
||||
}
|
||||
JERRY_ASSERT (!is_empty (known_token));
|
||||
|
||||
known_token = (token)
|
||||
{
|
||||
.type = TOK_NUMBER,
|
||||
.uid = seen_nums_count
|
||||
};
|
||||
add_num_to_seen_tokens (
|
||||
(num_and_token)
|
||||
{
|
||||
.num = (ecma_number_t) res,
|
||||
.tok = known_token
|
||||
}
|
||||
);
|
||||
return known_token;
|
||||
}
|
||||
|
||||
@@ -973,23 +892,7 @@ parse_number (void)
|
||||
token_start = NULL;
|
||||
|
||||
known_token = convert_seen_num_to_token (res);
|
||||
if (!is_empty (known_token))
|
||||
{
|
||||
return known_token;
|
||||
}
|
||||
|
||||
known_token = (token)
|
||||
{
|
||||
.type = TOK_NUMBER,
|
||||
.uid = seen_nums_count
|
||||
};
|
||||
add_num_to_seen_tokens (
|
||||
(num_and_token)
|
||||
{
|
||||
.num = res,
|
||||
.tok = known_token
|
||||
}
|
||||
);
|
||||
return known_token;
|
||||
}
|
||||
|
||||
@@ -1011,54 +914,15 @@ parse_number (void)
|
||||
}
|
||||
|
||||
known_token = convert_seen_num_to_token ((ecma_number_t) res);
|
||||
if (!is_empty (known_token))
|
||||
{
|
||||
return known_token;
|
||||
}
|
||||
|
||||
known_token = (token)
|
||||
{
|
||||
.type = TOK_NUMBER,
|
||||
.uid = seen_nums_count
|
||||
};
|
||||
add_num_to_seen_tokens (
|
||||
(num_and_token)
|
||||
{
|
||||
.num = (ecma_number_t) res,
|
||||
.tok = known_token
|
||||
}
|
||||
);
|
||||
return known_token;
|
||||
}
|
||||
|
||||
static char
|
||||
escape_char (char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'b': return '\b';
|
||||
case 'f': return '\f';
|
||||
case 'n': return '\n';
|
||||
case 'r': return '\r';
|
||||
case 't': return '\t';
|
||||
case 'v': return '\v';
|
||||
case '\'':
|
||||
case '"':
|
||||
case '\\':
|
||||
default: return c;
|
||||
}
|
||||
}
|
||||
|
||||
static token
|
||||
parse_string (void)
|
||||
{
|
||||
char c = LA (0);
|
||||
bool is_double_quoted;
|
||||
char *tok = NULL;
|
||||
char *index = NULL;
|
||||
const char *i;
|
||||
size_t length;
|
||||
token known_token = empty_token;
|
||||
token result;
|
||||
|
||||
JERRY_ASSERT (c == '\'' || c == '"');
|
||||
|
||||
@@ -1104,46 +968,13 @@ parse_string (void)
|
||||
consume_char ();
|
||||
}
|
||||
|
||||
length = (size_t) (buffer - token_start);
|
||||
tok = (char *) mem_heap_alloc_block (length + 1, MEM_HEAP_ALLOC_SHORT_TERM);
|
||||
__memset (tok, '\0', length + 1);
|
||||
index = tok;
|
||||
|
||||
// Copy current token to TOK and replace escape sequences by there meanings
|
||||
for (i = token_start; i < buffer; i++)
|
||||
{
|
||||
if (*i == '\\')
|
||||
{
|
||||
if (*(i+1) == '\n')
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
*index = escape_char (*(i+1));
|
||||
index++;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
*index = *i;
|
||||
index++;
|
||||
}
|
||||
|
||||
// Eat up '"'
|
||||
result = convert_current_token_to_token (TOK_STRING);
|
||||
|
||||
consume_char ();
|
||||
|
||||
known_token = convert_seen_name_to_token (TOK_STRING, tok);
|
||||
if (!is_empty (known_token))
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
|
||||
known_token = add_token_to_seen_names (TOK_STRING, tok);
|
||||
|
||||
end:
|
||||
mem_heap_free_block ((uint8_t *) tok);
|
||||
token_start = NULL;
|
||||
return known_token;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1421,7 +1252,10 @@ lexer_init (const char *source, size_t source_size, bool show_opcodes)
|
||||
allow_dump_lines = show_opcodes;
|
||||
buffer_size = source_size;
|
||||
lexer_set_source (source);
|
||||
increase_strings_cache ();
|
||||
|
||||
STACK_INIT (lp_string, strings);
|
||||
STACK_INIT (ecma_number_t, numbers);
|
||||
STACK_INIT (idx_t, num_ids);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1439,7 +1273,7 @@ lexer_run_first_pass (void)
|
||||
void
|
||||
lexer_free (void)
|
||||
{
|
||||
mem_heap_free_block ((uint8_t *) strings_cache);
|
||||
strings_cache = NULL;
|
||||
strings_cache_size = 0;
|
||||
STACK_FREE (strings);
|
||||
STACK_FREE (numbers);
|
||||
STACK_FREE (num_ids);
|
||||
}
|
||||
|
||||
+14
-3
@@ -18,6 +18,7 @@
|
||||
|
||||
#include "globals.h"
|
||||
#include "ecma-globals.h"
|
||||
#include "lp-string.h"
|
||||
|
||||
/* Keywords. */
|
||||
typedef uint8_t keyword;
|
||||
@@ -145,14 +146,24 @@ token;
|
||||
|
||||
void lexer_init (const char *, size_t, bool);
|
||||
void lexer_free (void);
|
||||
|
||||
void lexer_run_first_pass (void);
|
||||
|
||||
token lexer_next_token (void);
|
||||
void lexer_save_token (token);
|
||||
|
||||
void lexer_dump_buffer_state (void);
|
||||
uint8_t lexer_get_strings (const char **);
|
||||
|
||||
uint8_t lexer_get_reserved_ids_count (void);
|
||||
const char *lexer_get_string_by_id (uint8_t);
|
||||
uint8_t lexer_get_nums (ecma_number_t *);
|
||||
|
||||
const lp_string *lexer_get_strings (void);
|
||||
uint8_t lexer_get_strings_count (void);
|
||||
lp_string lexer_get_string_by_id (uint8_t);
|
||||
|
||||
const ecma_number_t *lexer_get_nums (void);
|
||||
ecma_number_t lexer_get_num_by_id (uint8_t);
|
||||
uint8_t lexer_get_nums_count (void);
|
||||
|
||||
void lexer_adjust_num_ids (void);
|
||||
|
||||
#endif
|
||||
|
||||
+699
-549
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
|
||||
#include "globals.h"
|
||||
|
||||
void parser_init (void);
|
||||
void parser_init (const char *, size_t, bool);
|
||||
void parser_parse_program (void);
|
||||
void parser_free (void);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user