Simplify serializer/deserializer. Reduce memory usage in lexer. Create HashTable data structure. Finish preparations for introducing new strings addressation.

2014-09-23 16:39:30 +04:00
parent f237a8d5e1
commit 8a5b956e38
27 changed files with 1152 additions and 1159 deletions
@@ -17,31 +17,43 @@
 #include "jerry-libc.h"
 #include "lexer.h"
 #include "parser.h"
+#include "stack.h"
+#include "opcodes.h"

 static token saved_token;
 static token empty_token =
 {
-  .type =
-  TOK_EMPTY,
+  .type = TOK_EMPTY,
  .uid = 0
 };

 static bool allow_dump_lines = false;
 static size_t buffer_size = 0;

-typedef struct
+/* Represents the contents of a script.  */
+static const char *buffer_start = NULL;
+static const char *buffer = NULL;
+static const char *token_start;
+
+#define LA(I)       (get_char (I))
+
+enum
 {
-  ecma_number_t num;
-  token tok;
-}
-num_and_token;
+  strings_global_size
+};
+STATIC_STACK (strings, uint8_t, lp_string)

-#define MAX_NUMS 25
+enum
+{
+  numbers_global_size
+};
+STATIC_STACK (numbers, uint8_t, ecma_number_t)

-static uint8_t seen_names_count = 0;
-
-static num_and_token seen_nums[MAX_NUMS];
-static uint8_t seen_nums_count = 0;
+enum
+{
+  num_ids_global_size
+};
+STATIC_STACK (num_ids, uint8_t, idx_t)

 static bool
 is_empty (token tok)
@@ -49,11 +61,6 @@ is_empty (token tok)
  return tok.type == TOK_EMPTY;
 }

-/* Represents the contents of a script.  */
-static const char *buffer_start = NULL;
-static const char *buffer = NULL;
-static const char *token_start;
-
 static char
 get_char (size_t i)
 {
@@ -64,44 +71,6 @@ get_char (size_t i)
  return *(buffer + i);
 }

-#define LA(I)       (get_char (I))
-
-/* Continuous array of NULL-terminated strings.  */
-static char *strings_cache = NULL;
-static size_t strings_cache_size = 0;
-
-static void
-increase_strings_cache (void)
-{
-  char *new_cache;
-  size_t new_cache_size;
-
-  // if strings_cache_size == 0, allocator recommends minimum size that is more than 0
-  new_cache_size = mem_heap_recommend_allocation_size (strings_cache_size * 2);
-  new_cache = (char *) mem_heap_alloc_block (new_cache_size, MEM_HEAP_ALLOC_SHORT_TERM);
-
-  if (!new_cache)
-  {
-    // Allocator alligns recommended memory size
-    new_cache_size = mem_heap_recommend_allocation_size (strings_cache_size + 1);
-    new_cache = (char *) mem_heap_alloc_block (new_cache_size, MEM_HEAP_ALLOC_SHORT_TERM);
-
-    if (!new_cache)
-    {
-      parser_fatal (ERR_MEMORY);
-    }
-  }
-
-  if (strings_cache)
-  {
-    __memcpy (new_cache, strings_cache, strings_cache_size);
-    mem_heap_free_block ((uint8_t *) strings_cache);
-  }
-
-  strings_cache = new_cache;
-  strings_cache_size = new_cache_size;
-}
-
 #ifdef __TARGET_HOST_x64
 static void
 dump_current_line (void)
@@ -137,6 +106,20 @@ current_token_equals_to (const char *str)
  return false;
 }

+static bool
+current_token_equals_to_lp (lp_string str)
+{
+  if (str.length != (ecma_length_t) (buffer - token_start))
+  {
+    return false;
+  }
+  if (!__strncmp ((const char *) str.str, token_start, str.length))
+  {
+    return true;
+  }
+  return false;
+}
+
 /* If TOKEN represents a keyword, return decoded keyword,
   if TOKEN represents a Future Reserved Word, return KW_RESERVED,
   otherwise return KW_NONE.  */
@@ -507,171 +490,121 @@ decode_keyword (void)
 }

 static token
-convert_seen_name_to_token (token_type tt, const char *string)
+convert_current_token_to_token (token_type tt)
 {
-  uint8_t i;
-  char *current_string = strings_cache;
-  JERRY_ASSERT (strings_cache);
-  token ret_val = empty_token;
+  JERRY_ASSERT (token_start);

-  for (i = 0; i < seen_names_count; i++)
+  for (uint8_t i = 0; i < STACK_SIZE (strings); i++)
  {
-    if ((string == NULL && current_token_equals_to (current_string))
-        || (string != NULL && !__strcmp (current_string, string)))
+    if (current_token_equals_to_lp (strings.data[i]))
    {
-      ret_val = (token)
+      return (token)
      {
        .type = tt,
        .uid = i
      };
-
-      break;
    }
-
-    current_string += __strlen (current_string) + 1;
  }

-  return ret_val;
-}
-
-static token
-add_token_to_seen_names (token_type tt, const char *string)
-{
-  size_t i;
-  char *current_string = strings_cache;
-  size_t required_size;
-  size_t len = (string == NULL ? (size_t) (buffer - token_start) : __strlen (string));
-  token ret_val = empty_token;
-
-  // Go to unused memory of cache
-  for (i = 0; i < seen_names_count; i++)
+  const lp_string str = (lp_string)
  {
-    current_string += __strlen (current_string) + 1;
-  }
-
-  required_size = (size_t) (current_string - strings_cache) + len + 1;
-  if (required_size > strings_cache_size)
-  {
-    size_t offset = (size_t) (current_string - strings_cache);
-    increase_strings_cache ();
-
-    // Now our pointer are invalid, adjust it
-    current_string = strings_cache + offset;
-  }
-
-  if (string == NULL)
-  {
-    // Copy current token with terminating NULL
-    __strncpy (current_string, token_start, (size_t) (buffer - token_start));
-    current_string += buffer - token_start;
-    *current_string = '\0';
-  }
-  else
-  {
-    __memcpy (current_string, string, __strlen (string) + 1);
-  }
-
-  ret_val = (token)
-  {
-    .type = tt,
-    .uid = seen_names_count++
+    .length = (uint8_t) (buffer - token_start),
+    .str = (const ecma_char_t *) token_start
  };

-  return ret_val;
+  STACK_PUSH (strings, str);
+
+  return (token)
+  {
+    .type = tt,
+    .uid = (idx_t) (STACK_SIZE (strings) - 1)
+  };
 }

 static token
 convert_seen_num_to_token (ecma_number_t num)
 {
-  size_t i;
+  uint8_t num_id;

-  for (i = 0; i < seen_nums_count; i++)
+  JERRY_ASSERT (STACK_SIZE (num_ids) == STACK_SIZE (numbers));
+  for (uint8_t i = 0; i < STACK_SIZE (numbers); i++)
  {
-    // token must be exactly the same as seen
-    if (seen_nums[i].num == num)
+    if (numbers.data[i] == num)
    {
-      return seen_nums[i].tok;
+      return (token)
+      {
+        .type = TOK_NUMBER,
+        .uid = num_ids.data[i]
+      };
    }
  }

-  return empty_token;
+  num_id = STACK_SIZE (num_ids);
+  STACK_PUSH (num_ids, num_id);
+  STACK_PUSH (numbers, num);
+
+  return (token)
+  {
+    .type = TOK_NUMBER,
+    .uid = num_id
+  };
 }

-static void
-add_num_to_seen_tokens (num_and_token nat)
+const lp_string *
+lexer_get_strings (void)
 {
-  JERRY_ASSERT (seen_nums_count < MAX_NUMS);
-
-  seen_nums[seen_nums_count++] = nat;
+  return STACK_RAW_DATA (strings);
 }

 uint8_t
-lexer_get_strings (const char **strings)
+lexer_get_strings_count (void)
 {
-  if (strings)
-  {
-    char *current_string = strings_cache;
-    int i;
-    for (i = 0; i < seen_names_count; i++)
-    {
-      strings[i] = current_string;
-      current_string += __strlen (current_string) + 1;
-    }
-  }
-
-  return seen_names_count;
+  return STACK_SIZE (strings);
 }

 uint8_t
 lexer_get_reserved_ids_count (void)
 {
-  return (uint8_t) (seen_names_count + seen_nums_count);
+  return (uint8_t) (STACK_SIZE (strings) + STACK_SIZE (numbers));
 }

-const char *
+lp_string
 lexer_get_string_by_id (uint8_t id)
 {
-  int i;
-  char *current_string = strings_cache;
-  JERRY_ASSERT (id < seen_names_count);
+  JERRY_ASSERT (id < STACK_SIZE (strings));
+  return STACK_ELEMENT (strings, id);
+}

-  for (i = 0 ; i < id; i++)
-  {
-    current_string += __strlen (current_string) + 1;
-  }
-
-  return current_string;
+const ecma_number_t *
+lexer_get_nums (void)
+{
+  return STACK_RAW_DATA (numbers);
 }

 uint8_t
-lexer_get_nums (ecma_number_t *nums)
+lexer_get_nums_count (void)
 {
-  int i;
-
-  if (!nums)
-  {
-    return seen_nums_count;
-  }
-
-  for (i = 0; i < seen_nums_count; i++)
-  {
-    nums[i] = seen_nums[i].num;
-  }
-
-  return seen_nums_count;
+  return STACK_SIZE (numbers);
 }

 void
 lexer_adjust_num_ids (void)
 {
-  size_t i;
-
-  for (i = 0; i < seen_nums_count; i++)
+  JERRY_ASSERT (STACK_SIZE (numbers) == STACK_SIZE (num_ids));
+  for (uint8_t i = 0; i < STACK_SIZE (numbers); i++)
  {
-    seen_nums[i].tok.uid = (uint8_t) (seen_nums[i].tok.uid + seen_names_count);
+    STACK_ELEMENT (num_ids, i) = (uint8_t) (STACK_ELEMENT (num_ids, i) + STACK_SIZE (strings));
  }
 }

+ecma_number_t
+lexer_get_num_by_id (uint8_t id)
+{
+  JERRY_ASSERT (id >= lexer_get_strings_count () && id < lexer_get_reserved_ids_count ());
+  JERRY_ASSERT (STACK_ELEMENT (num_ids, id - lexer_get_strings_count ()) == id);
+  return STACK_ELEMENT (numbers, id - lexer_get_strings_count ());
+}
+
 static void
 new_token (void)
 {
@@ -773,20 +706,14 @@ parse_name (void)
    }
  }

-  known_token = convert_seen_name_to_token (TOK_NAME, NULL);
-  if (!is_empty (known_token))
-  {
-    goto end;
-  }
-
-  known_token = add_token_to_seen_names (TOK_NAME, NULL);
+  known_token = convert_current_token_to_token (TOK_NAME);

 end:
  token_start = NULL;
  return known_token;
 }

-static int32_t
+static uint32_t
 hex_to_int (char hex)
 {
  switch (hex)
@@ -827,7 +754,7 @@ parse_number (void)
  bool is_fp = false;
  bool is_exp = false;
  size_t tok_length = 0, i;
-  int32_t res = 0;
+  uint32_t res = 0;
  token known_token;

  JERRY_ASSERT (__isdigit (c) || c == '.');
@@ -868,10 +795,17 @@ parse_number (void)
    }

    tok_length = (size_t) (buffer - token_start);
-    // OK, I know that integer overflow can occur here
+
    for (i = 0; i < tok_length; i++)
    {
+#ifndef JERRY_NDEBUG
+      uint32_t old_res = res;
+#endif
      res = (res << 4) + hex_to_int (token_start[i]);
+      FIXME (Replace with conversion to ecma_number_t)
+#ifndef JERRY_NDEBUG
+      JERRY_ASSERT (old_res <= res);
+#endif
    }

    token_start = NULL;
@@ -886,23 +820,8 @@ parse_number (void)
    }

    known_token = convert_seen_num_to_token ((ecma_number_t) res);
-    if (!is_empty (known_token))
-    {
-      return known_token;
-    }
+    JERRY_ASSERT (!is_empty (known_token));

-    known_token = (token)
-    {
-      .type = TOK_NUMBER,
-      .uid = seen_nums_count
-    };
-    add_num_to_seen_tokens (
-      (num_and_token)
-      {
-        .num = (ecma_number_t) res,
-        .tok = known_token
-      }
-);
    return known_token;
  }

@@ -973,23 +892,7 @@ parse_number (void)
    token_start = NULL;

    known_token = convert_seen_num_to_token (res);
-    if (!is_empty (known_token))
-    {
-      return known_token;
-    }

-    known_token = (token)
-    {
-      .type = TOK_NUMBER,
-      .uid = seen_nums_count
-    };
-    add_num_to_seen_tokens (
-      (num_and_token)
-      {
-        .num = res,
-        .tok = known_token
-      }
-);
    return known_token;
  }

@@ -1011,54 +914,15 @@ parse_number (void)
  }

  known_token = convert_seen_num_to_token ((ecma_number_t) res);
-  if (!is_empty (known_token))
-  {
-    return known_token;
-  }
-
-  known_token = (token)
-  {
-    .type = TOK_NUMBER,
-    .uid = seen_nums_count
-  };
-  add_num_to_seen_tokens (
-    (num_and_token)
-    {
-      .num = (ecma_number_t) res,
-      .tok = known_token
-    }
-);
  return known_token;
 }

-static char
-escape_char (char c)
-{
-  switch (c)
-  {
-    case 'b': return '\b';
-    case 'f': return '\f';
-    case 'n': return '\n';
-    case 'r': return '\r';
-    case 't': return '\t';
-    case 'v': return '\v';
-    case '\'':
-    case '"':
-    case '\\':
-    default: return c;
-  }
-}
-
 static token
 parse_string (void)
 {
  char c = LA (0);
  bool is_double_quoted;
-  char *tok = NULL;
-  char *index = NULL;
-  const char *i;
-  size_t length;
-  token known_token = empty_token;
+  token result;

  JERRY_ASSERT (c == '\'' || c == '"');

@@ -1104,46 +968,13 @@ parse_string (void)
    consume_char ();
  }

-  length = (size_t) (buffer - token_start);
-  tok = (char *) mem_heap_alloc_block (length + 1, MEM_HEAP_ALLOC_SHORT_TERM);
-  __memset (tok, '\0', length + 1);
-  index = tok;
-
-  // Copy current token to TOK and replace escape sequences by there meanings
-  for (i = token_start; i < buffer; i++)
-  {
-    if (*i == '\\')
-    {
-      if (*(i+1) == '\n')
-      {
-        i++;
-        continue;
-      }
-      *index = escape_char (*(i+1));
-      index++;
-      i++;
-      continue;
-    }
-
-    *index = *i;
-    index++;
-  }
-
  // Eat up '"'
+  result = convert_current_token_to_token (TOK_STRING);
+
  consume_char ();
-
-  known_token = convert_seen_name_to_token (TOK_STRING, tok);
-  if (!is_empty (known_token))
-  {
-    goto end;
-  }
-
-  known_token = add_token_to_seen_names (TOK_STRING, tok);
-
-end:
-  mem_heap_free_block ((uint8_t *) tok);
  token_start = NULL;
-  return known_token;
+
+  return result;
 }

 static void
@@ -1421,7 +1252,10 @@ lexer_init (const char *source, size_t source_size, bool show_opcodes)
  allow_dump_lines = show_opcodes;
  buffer_size = source_size;
  lexer_set_source (source);
-  increase_strings_cache ();
+
+  STACK_INIT (lp_string, strings);
+  STACK_INIT (ecma_number_t, numbers);
+  STACK_INIT (idx_t, num_ids);
 }

 void
@@ -1439,7 +1273,7 @@ lexer_run_first_pass (void)
 void
 lexer_free (void)
 {
-  mem_heap_free_block ((uint8_t *) strings_cache);
-  strings_cache = NULL;
-  strings_cache_size = 0;
+  STACK_FREE (strings);
+  STACK_FREE (numbers);
+  STACK_FREE (num_ids);
 }
@@ -18,6 +18,7 @@

 #include "globals.h"
 #include "ecma-globals.h"
+#include "lp-string.h"

 /* Keywords.  */
 typedef uint8_t keyword;
@@ -145,14 +146,24 @@ token;

 void lexer_init (const char *, size_t, bool);
 void lexer_free (void);
+
 void lexer_run_first_pass (void);
+
 token lexer_next_token (void);
 void lexer_save_token (token);
+
 void lexer_dump_buffer_state (void);
-uint8_t lexer_get_strings (const char **);
+
 uint8_t lexer_get_reserved_ids_count (void);
-const char *lexer_get_string_by_id (uint8_t);
-uint8_t lexer_get_nums (ecma_number_t *);
+
+const lp_string *lexer_get_strings (void);
+uint8_t lexer_get_strings_count (void);
+lp_string lexer_get_string_by_id (uint8_t);
+
+const ecma_number_t *lexer_get_nums (void);
+ecma_number_t lexer_get_num_by_id (uint8_t);
+uint8_t lexer_get_nums_count (void);
+
 void lexer_adjust_num_ids (void);

 #endif
@@ -18,7 +18,7 @@

 #include "globals.h"

-void parser_init (void);
+void parser_init (const char *, size_t, bool);
 void parser_parse_program (void);
 void parser_free (void);