Add parser and compiler of regular expressions.

JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
2015-06-25 23:51:34 +03:00
parent 1f9add4735
commit 4ffcb4d464
5 changed files with 1898 additions and 0 deletions
@@ -102,6 +102,7 @@ project (JerryCore CXX C ASM)
     ${CMAKE_SOURCE_DIR}/jerry-core/ecma/operations
     ${CMAKE_SOURCE_DIR}/jerry-core/parser/js
     ${CMAKE_SOURCE_DIR}/jerry-core/parser/js/collections
+     ${CMAKE_SOURCE_DIR}/jerry-core/parser/regexp
     ${CMAKE_SOURCE_DIR}/jerry-core/jrt)

 # Third-party
@@ -120,6 +121,7 @@ project (JerryCore CXX C ASM)
  file(GLOB SOURCE_CORE_ECMA_OPERATIONS       ecma/operations/*.cpp)
  file(GLOB SOURCE_CORE_PARSER_JS             parser/js/*.cpp)
  file(GLOB SOURCE_CORE_PARSER_JS_COLLECTIONS parser/js/collections/*.cpp)
+  file(GLOB SOURCE_CORE_PARSER_REGEXP         parser/regexp/*.cpp)
  file(GLOB SOURCE_CORE_JRT                   jrt/*.cpp)

  set(SOURCE_CORE
@@ -134,6 +136,7 @@ project (JerryCore CXX C ASM)
      ${SOURCE_CORE_ECMA_OPERATIONS}
      ${SOURCE_CORE_PARSER_JS}
      ${SOURCE_CORE_PARSER_JS_COLLECTIONS}
+      ${SOURCE_CORE_PARSER_REGEXP}
      ${SOURCE_CORE_JRT})

 # Per-option configuration
@@ -0,0 +1,888 @@
+/* Copyright 2015 Samsung Electronics Co., Ltd.
+ * Copyright 2015 University of Szeged.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ecma-exceptions.h"
+#include "ecma-helpers.h"
+#include "ecma-try-catch-macro.h"
+#include "jrt-libc-includes.h"
+#include "mem-heap.h"
+#include "re-compiler.h"
+
+#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
+
+/**
+ * FIXME:
+ *       Add comments to macro definitions in the component
+ */
+
+#define REGEXP_BYTECODE_BLOCK_SIZE 256UL
+#define BYTECODE_LEN(bc_ctx_p) ((uint32_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p))
+
+void
+regexp_dump_bytecode (re_bytecode_ctx_t *bc_ctx);
+
+/**
+ * FIXME:
+ *       Add missing 're' prefixes to the component's external and internal interfaces
+ */
+
+/**
+ * Realloc the bytecode container
+ */
+static re_bytecode_t*
+realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
+{
+  JERRY_ASSERT (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p >= 0);
+  size_t old_size = static_cast<size_t> (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p);
+  JERRY_ASSERT (!bc_ctx_p->current_p && !bc_ctx_p->block_end_p && !bc_ctx_p->block_start_p);
+
+  size_t new_block_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE;
+  JERRY_ASSERT (bc_ctx_p->current_p - bc_ctx_p->block_start_p >= 0);
+  size_t current_ptr_offset = static_cast<size_t> (bc_ctx_p->current_p - bc_ctx_p->block_start_p);
+
+  re_bytecode_t *new_block_start_p = (re_bytecode_t *) mem_heap_alloc_block (new_block_size,
+                                                                             MEM_HEAP_ALLOC_SHORT_TERM);
+  if (bc_ctx_p->current_p)
+  {
+    memcpy (new_block_start_p, bc_ctx_p->block_start_p, static_cast<size_t> (current_ptr_offset));
+    mem_heap_free_block (bc_ctx_p->block_start_p);
+  }
+  bc_ctx_p->block_start_p = new_block_start_p;
+  bc_ctx_p->block_end_p = new_block_start_p + new_block_size;
+  bc_ctx_p->current_p = new_block_start_p + current_ptr_offset;
+
+  return bc_ctx_p->current_p;
+} /* realloc_regexp_bytecode_block */
+
+/**
+ * Append a new bytecode to the and of the bytecode container
+ */
+static void
+bytecode_list_append (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+                      re_bytecode_t *bytecode_p, /**< input bytecode */
+                      size_t length) /**< length of input */
+{
+  JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);
+
+  re_bytecode_t *current_p = bc_ctx_p->current_p;
+  if (current_p + length > bc_ctx_p->block_end_p)
+  {
+    current_p = realloc_regexp_bytecode_block (bc_ctx_p);
+  }
+
+  memcpy (current_p, bytecode_p, length);
+  bc_ctx_p->current_p += length;
+} /* bytecode_list_append */
+
+/**
+ * Insert a new bytecode to the bytecode container
+ */
+static void
+bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+                      size_t offset, /**< distance from the start of the container */
+                      re_bytecode_t *bytecode_p, /**< input bytecode */
+                      size_t length) /**< length of input */
+{
+  JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);
+
+  re_bytecode_t *current_p = bc_ctx_p->current_p;
+  if (current_p + length > bc_ctx_p->block_end_p)
+  {
+    realloc_regexp_bytecode_block (bc_ctx_p);
+  }
+
+  re_bytecode_t *src_p = bc_ctx_p->block_start_p + offset;
+  if ((BYTECODE_LEN (bc_ctx_p) - offset) > 0)
+  {
+    re_bytecode_t *dest_p = src_p + length;
+    re_bytecode_t *tmp_block_start_p = (re_bytecode_t *) mem_heap_alloc_block ((BYTECODE_LEN (bc_ctx_p) - offset),
+                                                                               MEM_HEAP_ALLOC_SHORT_TERM);
+    memcpy (tmp_block_start_p, src_p, (size_t) (BYTECODE_LEN (bc_ctx_p) - offset));
+    memcpy (dest_p, tmp_block_start_p, (size_t) (BYTECODE_LEN (bc_ctx_p) - offset));
+    mem_heap_free_block (tmp_block_start_p);
+  }
+  memcpy (src_p, bytecode_p, length);
+
+  bc_ctx_p->current_p += length;
+} /* bytecode_list_insert */
+
+/**
+ * Append a RegExp opcode
+ */
+static void
+append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+               re_opcode_t opcode) /**< input opcode */
+{
+  bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t));
+} /* append_opcode */
+
+/**
+ * Append a parameter of a RegExp opcode
+ */
+static void
+append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+            uint32_t value) /**< input value */
+{
+  bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &value, sizeof (uint32_t));
+} /* append_u32 */
+
+/**
+ * Append a jump offset parameter of a RegExp opcode
+ */
+static void
+append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+                    uint32_t value) /**< input value */
+{
+  value += (uint32_t) (sizeof (uint32_t));
+  append_u32 (bc_ctx_p, value);
+} /* append_jump_offset */
+
+/**
+ * Insert a RegExp opcode
+ */
+static void
+insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+               uint32_t offset, /**< distance from the start of the container */
+               re_opcode_t opcode) /**< input opcode */
+{
+  bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t));
+} /* insert_opcode */
+
+/**
+ * Insert a parameter of a RegExp opcode
+ */
+static void
+insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
+            uint32_t offset, /**< distance from the start of the container */
+            uint32_t value) /**< input value */
+{
+  bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &value, sizeof (uint32_t));
+} /* insert_u32 */
+
+/**
+ * Get a RegExp opcode
+ */
+re_opcode_t
+re_get_opcode (re_bytecode_t **bc_p) /**< pointer to bytecode start */
+{
+  re_bytecode_t bytecode = **bc_p;
+  (*bc_p) += sizeof (re_bytecode_t);
+  return (re_opcode_t) bytecode;
+} /* get_opcode */
+
+/**
+ * Get a parameter of a RegExp opcode
+ */
+uint32_t
+re_get_value (re_bytecode_t **bc_p) /**< pointer to bytecode start */
+{
+  uint32_t value = *((uint32_t*) *bc_p);
+  (*bc_p) += sizeof (uint32_t);
+  return value;
+} /* get_value */
+
+/**
+ * Callback function of character class generation
+ */
+static void
+append_char_class (void* re_ctx_p, /**< RegExp compiler context */
+                   uint32_t start, /**< character class range from */
+                   uint32_t end) /**< character class range to */
+{
+  /* FIXME: Handle ignore case flag and add unicode support. */
+  re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t*) re_ctx_p;
+  append_u32 (ctx_p->bytecode_ctx_p, start);
+  append_u32 (ctx_p->bytecode_ctx_p, end);
+  ctx_p->parser_ctx_p->num_of_classes++;
+} /* append_char_class */
+
+/**
+ * Insert simple atom iterator
+ */
+static void
+insert_simple_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
+                        uint32_t new_atom_start_offset) /**< atom start offset */
+{
+  uint32_t atom_code_length;
+  uint32_t offset;
+  uint32_t qmin, qmax;
+
+  qmin = re_ctx_p->current_token.qmin;
+  qmax = re_ctx_p->current_token.qmax;
+  JERRY_ASSERT (qmin <= qmax);
+
+  /* FIXME: optimize bytecode length. Store 0 rather than INF */
+
+  append_opcode (re_ctx_p->bytecode_ctx_p, RE_OP_MATCH);   /* complete 'sub atom' */
+  uint32_t bytecode_length = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p);
+  atom_code_length = (uint32_t) (bytecode_length - new_atom_start_offset);
+
+  offset = new_atom_start_offset;
+  insert_u32 (re_ctx_p->bytecode_ctx_p, offset, atom_code_length);
+  insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmax);
+  insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmin);
+  if (re_ctx_p->current_token.greedy)
+  {
+    insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_GREEDY_ITERATOR);
+  }
+  else
+  {
+    insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_NON_GREEDY_ITERATOR);
+  }
+} /* insert_simple_iterator */
+
+/**
+ * Get the type of a group start
+ */
+static re_opcode_t
+get_start_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
+                       bool is_capturable) /**< is capturabel group */
+{
+  if (is_capturable)
+  {
+    if (re_ctx_p->current_token.qmin == 0)
+    {
+      if (re_ctx_p->current_token.greedy)
+      {
+        return RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START;
+      }
+
+      return RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START;
+    }
+
+    return RE_OP_CAPTURE_GROUP_START;
+  }
+
+  if (re_ctx_p->current_token.qmin == 0)
+  {
+    if (re_ctx_p->current_token.greedy)
+    {
+      return RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START;
+    }
+
+    return RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START;
+  }
+
+  return RE_OP_NON_CAPTURE_GROUP_START;
+
+  JERRY_UNREACHABLE ();
+  return 0;
+} /* get_start_opcode_type */
+
+/**
+ * Get the type of a group end
+ */
+static re_opcode_t
+get_end_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
+                     bool is_capturable) /**< is capturabel group */
+{
+  if (is_capturable)
+  {
+    if (re_ctx_p->current_token.greedy)
+    {
+      return RE_OP_CAPTURE_GREEDY_GROUP_END;
+    }
+
+    return RE_OP_CAPTURE_NON_GREEDY_GROUP_END;
+  }
+
+  if (re_ctx_p->current_token.greedy)
+  {
+    return RE_OP_NON_CAPTURE_GREEDY_GROUP_END;
+  }
+
+  return RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END;
+
+  JERRY_UNREACHABLE ();
+  return 0;
+} /* get_end_opcode_type */
+
+/**
+ * Enclose the given bytecode to a group
+ */
+static void
+insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
+                   uint32_t group_start_offset, /**< offset of group start */
+                   uint32_t idx, /**< index of group */
+                   bool is_capturable) /**< is capturabel group */
+{
+  uint32_t qmin, qmax;
+  re_opcode_t start_opcode = get_start_opcode_type (re_ctx_p, is_capturable);
+  re_opcode_t end_opcode = get_end_opcode_type (re_ctx_p, is_capturable);
+  uint32_t start_head_offset_len;
+
+  qmin = re_ctx_p->current_token.qmin;
+  qmax = re_ctx_p->current_token.qmax;
+  JERRY_ASSERT (qmin <= qmax);
+
+  start_head_offset_len = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p);
+  insert_u32 (re_ctx_p->bytecode_ctx_p, group_start_offset, idx);
+  insert_opcode (re_ctx_p->bytecode_ctx_p, group_start_offset, start_opcode);
+  start_head_offset_len = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - start_head_offset_len;
+  append_opcode (re_ctx_p->bytecode_ctx_p, end_opcode);
+  append_u32 (re_ctx_p->bytecode_ctx_p, idx);
+  append_u32 (re_ctx_p->bytecode_ctx_p, qmin);
+  append_u32 (re_ctx_p->bytecode_ctx_p, qmax);
+
+  group_start_offset += start_head_offset_len;
+  append_jump_offset (re_ctx_p->bytecode_ctx_p,
+                      BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset);
+
+  if (start_opcode != RE_OP_CAPTURE_GROUP_START && start_opcode != RE_OP_NON_CAPTURE_GROUP_START)
+  {
+    insert_u32 (re_ctx_p->bytecode_ctx_p,
+                group_start_offset,
+                BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset);
+  }
+} /* insert_into_group */
+
+/**
+ * Enclose the given bytecode to a group and inster jump value
+ */
+static void
+insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
+                             uint32_t group_start_offset, /**< offset of group start */
+                             uint32_t idx, /**< index of group */
+                             bool is_capturable) /**< is capturabel group */
+{
+  insert_u32 (re_ctx_p->bytecode_ctx_p,
+              group_start_offset,
+              BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset);
+  insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable);
+} /* insert_into_group_with_jump */
+
+/**
+ * Parse alternatives
+ *
+ * @return completion value
+ *         Returned value must be freed with ecma_free_completion_value
+ */
+static ecma_completion_value_t
+parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
+                   bool expect_eof) /**< expect end of file */
+{
+  uint32_t idx;
+  re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
+  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
+
+  uint32_t alterantive_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p);
+
+  if (re_ctx_p->recursion_depth >= RE_COMPILE_RECURSION_LIMIT)
+  {
+    ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp compiler recursion limit is exceeded.");
+    return ret_value;
+  }
+  re_ctx_p->recursion_depth++;
+
+  while (true)
+  {
+    ECMA_TRY_CATCH (empty,
+                    re_parse_next_token (re_ctx_p->parser_ctx_p,
+                                         &(re_ctx_p->current_token)),
+                    ret_value);
+    ECMA_FINALIZE (empty);
+    if (!ecma_is_completion_value_empty (ret_value))
+    {
+      return ret_value; /* error */
+    }
+    uint32_t new_atom_start_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p);
+
+    switch (re_ctx_p->current_token.type)
+    {
+      case RE_TOK_START_CAPTURE_GROUP:
+      {
+        idx = re_ctx_p->num_of_captures++;
+        JERRY_DDLOG ("Compile a capture group start (idx: %d)\n", idx);
+
+        ret_value = parse_alternative (re_ctx_p, false);
+        if (ecma_is_completion_value_empty (ret_value))
+        {
+          insert_into_group (re_ctx_p, new_atom_start_offset, idx, true);
+        }
+        else
+        {
+          return ret_value; /* error */
+        }
+        break;
+      }
+      case RE_TOK_START_NON_CAPTURE_GROUP:
+      {
+        idx = re_ctx_p->num_of_non_captures++;
+        JERRY_DDLOG ("Compile a non-capture group start (idx: %d)\n", idx);
+
+        ret_value = parse_alternative (re_ctx_p, false);
+        if (ecma_is_completion_value_empty (ret_value))
+        {
+          insert_into_group (re_ctx_p, new_atom_start_offset, idx, false);
+        }
+        else
+        {
+          return ret_value; /* error */
+        }
+        break;
+      }
+      case RE_TOK_CHAR:
+      {
+        JERRY_DDLOG ("Compile character token: %c, qmin: %d, qmax: %d\n",
+                     re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);
+
+        append_opcode (bc_ctx_p, RE_OP_CHAR);
+        append_u32 (bc_ctx_p, re_ctx_p->current_token.value);
+
+        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
+        {
+          insert_simple_iterator (re_ctx_p, new_atom_start_offset);
+        }
+        break;
+      }
+      case RE_TOK_PERIOD:
+      {
+        JERRY_DDLOG ("Compile a period\n");
+        append_opcode (bc_ctx_p, RE_OP_PERIOD);
+
+        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
+        {
+          insert_simple_iterator (re_ctx_p, new_atom_start_offset);
+        }
+        break;
+      }
+      case RE_TOK_ALTERNATIVE:
+      {
+        JERRY_DDLOG ("Compile an alternative\n");
+        insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset);
+        append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE);
+        alterantive_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p);
+        break;
+      }
+      case RE_TOK_ASSERT_START:
+      {
+        JERRY_DDLOG ("Compile a start assertion\n");
+        append_opcode (bc_ctx_p, RE_OP_ASSERT_START);
+        break;
+      }
+      case RE_TOK_ASSERT_END:
+      {
+        JERRY_DDLOG ("Compile an end assertion\n");
+        append_opcode (bc_ctx_p, RE_OP_ASSERT_END);
+        break;
+      }
+      case RE_TOK_ASSERT_WORD_BOUNDARY:
+      {
+        JERRY_DDLOG ("Compile a word boundary assertion\n");
+        append_opcode (bc_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY);
+        break;
+      }
+      case RE_TOK_ASSERT_NOT_WORD_BOUNDARY:
+      {
+        JERRY_DDLOG ("Compile a not word boundary assertion\n");
+        append_opcode (bc_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY);
+        break;
+      }
+      case RE_TOK_ASSERT_START_POS_LOOKAHEAD:
+      {
+        JERRY_DDLOG ("Compile a positive lookahead assertion\n");
+        idx = re_ctx_p->num_of_non_captures++;
+        append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS);
+
+        ret_value = parse_alternative (re_ctx_p, false);
+        if (ecma_is_completion_value_empty (ret_value))
+        {
+          append_opcode (bc_ctx_p, RE_OP_MATCH);
+
+          insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
+        }
+        else
+        {
+          return ret_value; /* error */
+        }
+        break;
+      }
+      case RE_TOK_ASSERT_START_NEG_LOOKAHEAD:
+      {
+        JERRY_DDLOG ("Compile a negative lookahead assertion\n");
+        idx = re_ctx_p->num_of_non_captures++;
+        append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG);
+
+        ret_value = parse_alternative (re_ctx_p, false);
+        if (ecma_is_completion_value_empty (ret_value))
+        {
+          append_opcode (bc_ctx_p, RE_OP_MATCH);
+
+          insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
+        }
+        else
+        {
+          return ret_value; /* error */
+        }
+        break;
+      }
+      case RE_TOK_BACKREFERENCE:
+      {
+        uint32_t backref = (uint32_t) re_ctx_p->current_token.value;
+        idx = re_ctx_p->num_of_non_captures++;
+        if (backref > re_ctx_p->highest_backref)
+        {
+          re_ctx_p->highest_backref = backref;
+        }
+        JERRY_DDLOG ("Compile a backreference: %d\n", backref);
+        append_opcode (bc_ctx_p, RE_OP_BACKREFERENCE);
+        append_u32 (bc_ctx_p, backref);
+
+        insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
+        break;
+      }
+      case RE_TOK_START_CHAR_CLASS:
+      case RE_TOK_START_INV_CHAR_CLASS:
+      {
+        JERRY_DDLOG ("Compile a character class\n");
+        append_opcode (bc_ctx_p,
+                       re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS
+                                                    ? RE_OP_CHAR_CLASS
+                                                    : RE_OP_INV_CHAR_CLASS);
+        uint32_t offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p);
+
+        ECMA_TRY_CATCH (empty,
+                        re_parse_char_class (re_ctx_p->parser_ctx_p,
+                                             append_char_class,
+                                             re_ctx_p,
+                                             &(re_ctx_p->current_token)),
+                        ret_value);
+        insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->num_of_classes);
+
+        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
+        {
+          insert_simple_iterator (re_ctx_p, new_atom_start_offset);
+        }
+        ECMA_FINALIZE (empty);
+        break;
+      }
+      case RE_TOK_END_GROUP:
+      {
+        JERRY_DDLOG ("Compile a group end\n");
+
+        if (expect_eof)
+        {
+          ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of paren.");
+        }
+        else
+        {
+          insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset);
+          re_ctx_p->recursion_depth--;
+        }
+
+        return ret_value;
+      }
+      case RE_TOK_EOF:
+      {
+        if (!expect_eof)
+        {
+          ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of pattern.");
+        }
+        else
+        {
+          insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset);
+          re_ctx_p->recursion_depth--;
+        }
+
+        return ret_value;
+      }
+      default:
+      {
+        ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected RegExp token.");
+        return ret_value;
+      }
+    }
+  }
+
+  JERRY_UNREACHABLE ();
+  return ret_value;
+} /* parse_alternative */
+
+/**
+ * Compilation of RegExp bytecode
+ *
+ * @return completion value
+ *         Returned value must be freed with ecma_free_completion_value
+ */
+ecma_completion_value_t
+re_compile_bytecode (ecma_property_t *bytecode_p, /**< bytecode */
+                         ecma_string_t *pattern_str_p, /**< pattern */
+                         uint8_t flags) /**< flags */
+{
+  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
+  re_compiler_ctx_t re_ctx;
+  re_ctx.flags = flags;
+  re_ctx.highest_backref = 0;
+  re_ctx.num_of_non_captures = 0;
+  re_ctx.recursion_depth = 0;
+
+  re_bytecode_ctx_t bc_ctx;
+  bc_ctx.block_start_p = NULL;
+  bc_ctx.block_end_p = NULL;
+  bc_ctx.current_p = NULL;
+
+  re_ctx.bytecode_ctx_p = &bc_ctx;
+
+  int32_t pattern_str_len = ecma_string_get_length (pattern_str_p);
+  MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_len + 1, ecma_char_t);
+  ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (pattern_str_len + 1);
+  ecma_string_to_zt_string (pattern_str_p, pattern_start_p, zt_str_size);
+
+  re_parser_ctx_t parser_ctx;
+  parser_ctx.pattern_start_p = pattern_start_p;
+  parser_ctx.current_char_p = pattern_start_p;
+  parser_ctx.num_of_groups = -1;
+  re_ctx.parser_ctx_p = &parser_ctx;
+
+  /* 1. Parse RegExp pattern */
+  re_ctx.num_of_captures = 1;
+  append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
+
+  ECMA_TRY_CATCH (empty, parse_alternative (&re_ctx, true), ret_value);
+
+  /* 2. Check for invalid backreference */
+  if (re_ctx.highest_backref >= re_ctx.num_of_captures)
+  {
+    ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid backreference.\n");
+  }
+  else
+  {
+    append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
+    append_opcode (&bc_ctx, RE_OP_EOF);
+
+    /* 3. Insert extra informations for bytecode header */
+    insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_non_captures);
+    insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_captures * 2);
+    insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.flags);
+  }
+  ECMA_FINALIZE (empty);
+
+  /* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
+  JERRY_ASSERT (bc_ctx.block_start_p != NULL);
+  ECMA_SET_POINTER (bytecode_p->u.internal_property.value, bc_ctx.block_start_p);
+
+  MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
+
+#ifdef JERRY_ENABLE_LOG
+  regexp_dump_bytecode (&bc_ctx);
+#endif
+
+  return ret_value;
+} /* re_compile_bytecode */
+
+#ifdef JERRY_ENABLE_LOG
+/**
+ * RegExp bytecode dumper
+ */
+void
+regexp_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p)
+{
+  re_bytecode_t *bytecode_p = bc_ctx_p->block_start_p;
+  JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+  JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+  JERRY_DLOG ("%d | ", re_get_value (&bytecode_p));
+
+  re_opcode_t op;
+  while ((op = re_get_opcode (&bytecode_p)))
+  {
+    switch (op)
+    {
+      case RE_OP_MATCH:
+      {
+        JERRY_DLOG ("MATCH, ");
+        break;
+      }
+      case RE_OP_CHAR:
+      {
+        JERRY_DLOG ("CHAR ");
+        JERRY_DLOG ("%c, ", (char) re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
+      {
+        JERRY_DLOG ("N");
+        /* FALLTHRU */
+      }
+      case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START:
+      {
+        JERRY_DLOG ("GZ_START ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_CAPTURE_GROUP_START:
+      {
+        JERRY_DLOG ("START ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
+      {
+        JERRY_DLOG ("N");
+        /* FALLTHRU */
+      }
+      case RE_OP_CAPTURE_GREEDY_GROUP_END:
+      {
+        JERRY_DLOG ("G_END ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
+      {
+        JERRY_DLOG ("N");
+        /* FALLTHRU */
+      }
+      case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START:
+      {
+        JERRY_DLOG ("GZ_NC_START ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_NON_CAPTURE_GROUP_START:
+      {
+        JERRY_DLOG ("NC_START ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END:
+      {
+        JERRY_DLOG ("N");
+        /* FALLTHRU */
+      }
+      case RE_OP_NON_CAPTURE_GREEDY_GROUP_END:
+      {
+        JERRY_DLOG ("G_NC_END ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_SAVE_AT_START:
+      {
+        JERRY_DLOG ("RE_START ");
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_SAVE_AND_MATCH:
+      {
+        JERRY_DLOG ("RE_END, ");
+        break;
+      }
+      case RE_OP_GREEDY_ITERATOR:
+      {
+        JERRY_DLOG ("GREEDY_ITERATOR ");
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_NON_GREEDY_ITERATOR:
+      {
+        JERRY_DLOG ("NON_GREEDY_ITERATOR ");
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_PERIOD:
+      {
+        JERRY_DLOG ("PERIOD ");
+        break;
+      }
+      case RE_OP_ALTERNATIVE:
+      {
+        JERRY_DLOG ("ALTERNATIVE ");
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_ASSERT_START:
+      {
+        JERRY_DLOG ("ASSERT_START ");
+        break;
+      }
+      case RE_OP_ASSERT_END:
+      {
+        JERRY_DLOG ("ASSERT_END ");
+        break;
+      }
+      case RE_OP_ASSERT_WORD_BOUNDARY:
+      {
+        JERRY_DLOG ("ASSERT_WORD_BOUNDARY ");
+        break;
+      }
+      case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
+      {
+        JERRY_DLOG ("ASSERT_NOT_WORD_BOUNDARY ");
+        break;
+      }
+      case RE_OP_LOOKAHEAD_POS:
+      {
+        JERRY_DLOG ("LOOKAHEAD_POS ");
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_LOOKAHEAD_NEG:
+      {
+        JERRY_DLOG ("LOOKAHEAD_NEG ");
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_BACKREFERENCE:
+      {
+        JERRY_DLOG ("BACKREFERENCE ");
+        JERRY_DLOG ("%d, ", re_get_value (&bytecode_p));
+        break;
+      }
+      case RE_OP_INV_CHAR_CLASS:
+      {
+        JERRY_DLOG ("INV_");
+        /* FALLTHRU */
+      }
+      case RE_OP_CHAR_CLASS:
+      {
+        JERRY_DLOG ("CHAR_CLASS ");
+        uint32_t num_of_class = re_get_value (&bytecode_p);
+        JERRY_DLOG ("%d", num_of_class);
+        while (num_of_class)
+        {
+          JERRY_DLOG (" %d", re_get_value (&bytecode_p));
+          JERRY_DLOG ("-%d", re_get_value (&bytecode_p));
+          num_of_class--;
+        }
+        JERRY_DLOG (", ");
+        break;
+      }
+      default:
+      {
+        JERRY_DLOG ("UNKNOWN(%d), ", (uint32_t) op);
+        break;
+      }
+    }
+  }
+  JERRY_DLOG ("EOF\n");
+} /* regexp_dump_bytecode */
+#endif /* JERRY_ENABLE_LOG */
+
+#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
@@ -0,0 +1,108 @@
+/* Copyright 2015 Samsung Electronics Co., Ltd.
+ * Copyright 2015 University of Szeged.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RE_COMPILER_H
+#define RE_COMPILER_H
+
+#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
+
+#include "ecma-globals.h"
+#include "re-parser.h"
+
+/* RegExp opcodes
+ * Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
+ * Change it carfully. Capture opcodes should be at first.
+ */
+#define RE_OP_EOF                                           0
+
+#define RE_OP_CAPTURE_GROUP_START                           1
+#define RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START               2
+#define RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START           3
+#define RE_OP_CAPTURE_GREEDY_GROUP_END                      4
+#define RE_OP_CAPTURE_NON_GREEDY_GROUP_END                  5
+#define RE_OP_NON_CAPTURE_GROUP_START                       6
+#define RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START           7
+#define RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START       8
+#define RE_OP_NON_CAPTURE_GREEDY_GROUP_END                  9
+#define RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END              10
+
+#define RE_OP_MATCH                                         11
+#define RE_OP_CHAR                                          12
+#define RE_OP_SAVE_AT_START                                 13
+#define RE_OP_SAVE_AND_MATCH                                14
+#define RE_OP_PERIOD                                        15
+#define RE_OP_ALTERNATIVE                                   16
+#define RE_OP_GREEDY_ITERATOR                               17
+#define RE_OP_NON_GREEDY_ITERATOR                           18
+#define RE_OP_ASSERT_START                                  19
+#define RE_OP_ASSERT_END                                    20
+#define RE_OP_ASSERT_WORD_BOUNDARY                          21
+#define RE_OP_ASSERT_NOT_WORD_BOUNDARY                      22
+#define RE_OP_LOOKAHEAD_POS                                 23
+#define RE_OP_LOOKAHEAD_NEG                                 24
+#define RE_OP_BACKREFERENCE                                 25
+#define RE_OP_CHAR_CLASS                                    26
+#define RE_OP_INV_CHAR_CLASS                                27
+
+#define RE_COMPILE_RECURSION_LIMIT  100
+
+#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
+
+typedef uint8_t re_opcode_t; /* type of RegExp opcodes */
+typedef uint8_t re_bytecode_t; /* type of standard bytecode elements (ex.: opcode parameters) */
+
+/**
+ * Context of RegExp bytecode container
+ *
+ * FIXME:
+ *       Add comments with description of the structure members
+ */
+typedef struct
+{
+  re_bytecode_t *block_start_p;
+  re_bytecode_t *block_end_p;
+  re_bytecode_t *current_p;
+} re_bytecode_ctx_t;
+
+/**
+ * Context of RegExp compiler
+ *
+ * FIXME:
+ *       Add comments with description of the structure members
+ */
+typedef struct
+{
+  uint8_t flags;
+  uint32_t recursion_depth;
+  uint32_t num_of_captures;
+  uint32_t num_of_non_captures;
+  uint32_t highest_backref;
+  re_bytecode_ctx_t *bytecode_ctx_p;
+  re_token_t current_token;
+  re_parser_ctx_t *parser_ctx_p;
+} re_compiler_ctx_t;
+
+ecma_completion_value_t
+re_compile_bytecode (ecma_property_t *bytecode_p, ecma_string_t *pattern_str_p, uint8_t flags);
+
+re_opcode_t
+re_get_opcode (re_bytecode_t **bc_p);
+
+uint32_t
+re_get_value (re_bytecode_t **bc_p);
+
+#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
+#endif /* RE_COMPILER_H */
@@ -0,0 +1,808 @@
+/* Copyright 2015 Samsung Electronics Co., Ltd.
+ * Copyright 2015 University of Szeged.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ecma-exceptions.h"
+#include "ecma-globals.h"
+#include "ecma-helpers.h"
+#include "ecma-try-catch-macro.h"
+#include "jrt-libc-includes.h"
+#include "re-parser.h"
+#include "syntax-errors.h"
+
+#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
+
+/* FIXME: change it, when unicode support would be implemented  */
+#define RE_LOOKUP(str_p, lookup)  (ecma_zt_string_length (str_p) > lookup ? str_p[lookup] : '\0')
+
+/* FIXME: change it, when unicode support would be implemented  */
+#define RE_ADVANCE(str_p, advance) do { str_p += advance; } while (0)
+
+static ecma_char_t
+get_ecma_char (ecma_char_t** char_p)
+{
+  /* FIXME: change to string iterator with unicode support, when it would be implemented */
+  ecma_char_t ch = **char_p;
+  RE_ADVANCE (*char_p, 1);
+  return ch;
+} /* get_ecma_char */
+
+/**
+ * Parse RegExp iterators
+ *
+ * @return completion value
+ *         Returned value must be freed with ecma_free_completion_value
+ */
+static ecma_completion_value_t
+parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */
+                   re_token_t *re_token_p, /**< output token */
+                   uint32_t lookup, /**< size of lookup */
+                   uint32_t *advance_p) /**< output length of current advance */
+{
+  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
+
+  ecma_char_t ch0 = RE_LOOKUP (pattern_p, lookup);
+  ecma_char_t ch1 = RE_LOOKUP (pattern_p, lookup + 1);
+
+  switch (ch0)
+  {
+    case '?':
+    {
+      re_token_p->qmin = 0;
+      re_token_p->qmax = 1;
+      if (ch1 == '?')
+      {
+        *advance_p = 2;
+        re_token_p->greedy = false;
+      }
+      else
+      {
+        *advance_p = 1;
+        re_token_p->greedy = true;
+      }
+      break;
+    }
+    case '*':
+    {
+      re_token_p->qmin = 0;
+      re_token_p->qmax = RE_ITERATOR_INFINITE;
+      if (ch1 == '?')
+      {
+        *advance_p = 2;
+        re_token_p->greedy = false;
+      }
+      else
+      {
+        *advance_p = 1;
+        re_token_p->greedy = true;
+      }
+      break;
+    }
+    case '+':
+    {
+      re_token_p->qmin = 1;
+      re_token_p->qmax = RE_ITERATOR_INFINITE;
+      if (ch1 == '?')
+      {
+        *advance_p = 2;
+        re_token_p->greedy = false;
+      }
+      else
+      {
+        *advance_p = 1;
+        re_token_p->greedy = true;
+      }
+      break;
+    }
+    case '{':
+    {
+      uint32_t qmin = 0;
+      uint32_t qmax = RE_ITERATOR_INFINITE;
+      uint32_t digits = 0;
+      while (true)
+      {
+        (*advance_p)++;
+        ch1 = RE_LOOKUP (pattern_p, lookup + *advance_p);
+
+        if (isdigit (ch1))
+        {
+          if (digits >= ECMA_NUMBER_MAX_DIGITS)
+          {
+            ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: too many digits.");
+            return ret_value;
+          }
+          digits++;
+          qmin = qmin * 10 + ecma_char_hex_to_int (ch1);
+        }
+        else if (ch1 == ',')
+        {
+          if (qmax != RE_ITERATOR_INFINITE)
+          {
+            ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: double comma.");
+            return ret_value;
+          }
+          if ((RE_LOOKUP (pattern_p, lookup + *advance_p + 1)) == '}')
+          {
+            if (digits == 0)
+            {
+              ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits.");
+              return ret_value;
+            }
+
+            re_token_p->qmin = qmin;
+            re_token_p->qmax = RE_ITERATOR_INFINITE;
+            *advance_p += 2;
+            break;
+          }
+          qmax = qmin;
+          qmin = 0;
+          digits = 0;
+        }
+        else if (ch1 == '}')
+        {
+          if (digits == 0)
+          {
+            ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits.");
+            return ret_value;
+          }
+
+          if (qmax != RE_ITERATOR_INFINITE)
+          {
+            re_token_p->qmin = qmax;
+            re_token_p->qmax = qmin;
+          }
+          else
+          {
+            re_token_p->qmin = qmin;
+            re_token_p->qmax = qmin;
+          }
+
+          *advance_p += 1;
+          break;
+        }
+        else
+        {
+          ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: unknown char.");
+          return ret_value;
+        }
+      }
+
+      if ((RE_LOOKUP (pattern_p, lookup + *advance_p)) == '?')
+      {
+        re_token_p->greedy = false;
+        *advance_p += 1;
+      }
+      else
+      {
+        re_token_p->greedy = true;
+      }
+      break;
+
+      JERRY_UNREACHABLE ();
+      break;
+    }
+    default:
+    {
+      re_token_p->qmin = 1;
+      re_token_p->qmax = 1;
+      re_token_p->greedy = true;
+      break;
+    }
+  }
+
+  JERRY_ASSERT (ecma_is_completion_value_empty (ret_value));
+
+  if (re_token_p->qmin > re_token_p->qmax)
+  {
+    ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: qmin > qmax.");
+  }
+
+  return ret_value;
+} /* parse_re_iterator */
+
+/**
+ * Count the number of groups in pattern
+ */
+static void
+re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */
+{
+  ecma_char_t *pattern_p = parser_ctx_p->pattern_start_p;
+  ecma_char_t ch1;
+  int char_class_in = 0;
+  parser_ctx_p->num_of_groups = 0;
+
+  ch1 = get_ecma_char (&pattern_p);
+  while (ch1 != '\0')
+  {
+    ecma_char_t ch0 = ch1;
+    ch1 = get_ecma_char (&pattern_p);
+    switch (ch0)
+    {
+      case '\\':
+      {
+        ch1 = get_ecma_char (&pattern_p);
+        break;
+      }
+      case '[':
+      {
+        char_class_in++;
+        break;
+      }
+      case ']':
+      {
+        if (!char_class_in)
+        {
+          char_class_in--;
+        }
+        break;
+      }
+      case '(':
+      {
+        if (ch1 != '?' && !char_class_in)
+        {
+          parser_ctx_p->num_of_groups++;
+        }
+        break;
+      }
+    }
+  }
+} /* re_count_num_of_groups */
+
+/**
+ * Read the input pattern and parse the range of character class
+ *
+ * @return completion value
+ *         Returned value must be freed with ecma_free_completion_value
+ */
+ecma_completion_value_t
+re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
+                     re_char_class_callback append_char_class, /**< callback function,
+                                                                *   which adds the char-ranges
+                                                                *   to the bytecode */
+                     void* re_ctx_p, /**< regexp compiler context */
+                     re_token_t *out_token_p) /**< output token */
+{
+  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
+  ecma_char_t **pattern_p = &(parser_ctx_p->current_char_p);
+
+  out_token_p->qmax = out_token_p->qmin = 1;
+  ecma_char_t start = RE_CHAR_UNDEF;
+  bool is_range = false;
+  parser_ctx_p->num_of_classes = 0;
+
+  do
+  {
+    ecma_char_t ch = get_ecma_char (pattern_p);
+    if (ch == ']')
+    {
+      if (start != RE_CHAR_UNDEF)
+      {
+        append_char_class (re_ctx_p, start, start);
+      }
+      break;
+    }
+    else if (ch == '-')
+    {
+      if (start != RE_CHAR_UNDEF && !is_range && RE_LOOKUP (*pattern_p, 0) != ']')
+      {
+        is_range = true;
+        continue;
+      }
+    }
+    else if (ch == '\\')
+    {
+      ch = get_ecma_char (pattern_p);
+
+      if (ch == 'b')
+      {
+        ch = RE_CONTROL_CHAR_BEL;
+      }
+      else if (ch == 'f')
+      {
+        ch = RE_CONTROL_CHAR_FF;
+      }
+      else if (ch == 'n')
+      {
+        ch = RE_CONTROL_CHAR_EOL;
+      }
+      else if (ch == 't')
+      {
+        ch = RE_CONTROL_CHAR_TAB;
+      }
+      else if (ch == 'r')
+      {
+        ch = RE_CONTROL_CHAR_CR;
+      }
+      else if (ch == 'v')
+      {
+        ch = RE_CONTROL_CHAR_VT;
+      }
+      else if (ch == 'c')
+      {
+        ch = get_ecma_char (pattern_p);
+        if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
+        {
+          ch = (ch % 32);
+        }
+        else
+        {
+          ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape");
+          return ret_value;
+        }
+      }
+      else if (ch == 'x')
+      {
+        /* FIXME: get unicode char from hex-digits */
+        /* ch = ...; */
+      }
+      else if (ch == 'u')
+      {
+        /* FIXME: get unicode char from digits */
+        /* ch = ...; */
+      }
+      else if (ch == 'd')
+      {
+        /* append digits from '0' to '9'. */
+        append_char_class (re_ctx_p, 0x0030UL, 0x0039UL);
+        ch = RE_CHAR_UNDEF;
+      }
+      else if (ch == 'D')
+      {
+        append_char_class (re_ctx_p, 0x0000UL, 0x002FUL);
+        append_char_class (re_ctx_p, 0x003AUL, 0xFFFFUL);
+        ch = RE_CHAR_UNDEF;
+      }
+      else if (ch == 's')
+      {
+        append_char_class (re_ctx_p, 0x0009UL, 0x000DUL);
+        append_char_class (re_ctx_p, 0x0020UL, 0x0020UL);
+        append_char_class (re_ctx_p, 0x00A0UL, 0x00A0UL);
+        append_char_class (re_ctx_p, 0x1680UL, 0x1680UL);
+        append_char_class (re_ctx_p, 0x180EUL, 0x180EUL);
+        append_char_class (re_ctx_p, 0x2000UL, 0x200AUL);
+        append_char_class (re_ctx_p, 0x2028UL, 0x2029UL);
+        append_char_class (re_ctx_p, 0x202FUL, 0x202FUL);
+        append_char_class (re_ctx_p, 0x205FUL, 0x205FUL);
+        append_char_class (re_ctx_p, 0x3000UL, 0x3000UL);
+        append_char_class (re_ctx_p, 0xFEFFUL, 0xFEFFUL);
+        ch = RE_CHAR_UNDEF;
+      }
+      else if (ch == 'S')
+      {
+        append_char_class (re_ctx_p, 0x0000UL, 0x0008UL);
+        append_char_class (re_ctx_p, 0x000EUL, 0x001FUL);
+        append_char_class (re_ctx_p, 0x0021UL, 0x009FUL);
+        append_char_class (re_ctx_p, 0x00A1UL, 0x167FUL);
+        append_char_class (re_ctx_p, 0x1681UL, 0x180DUL);
+        append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL);
+        append_char_class (re_ctx_p, 0x200BUL, 0x2027UL);
+        append_char_class (re_ctx_p, 0x202AUL, 0x202EUL);
+        append_char_class (re_ctx_p, 0x2030UL, 0x205EUL);
+        append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL);
+        append_char_class (re_ctx_p, 0x3001UL, 0xFEFEUL);
+        append_char_class (re_ctx_p, 0xFF00UL, 0xFFFFUL);
+        ch = RE_CHAR_UNDEF;
+      }
+      else if (ch == 'w')
+      {
+        append_char_class (re_ctx_p, 0x0030UL, 0x0039UL);
+        append_char_class (re_ctx_p, 0x0041UL, 0x005AUL);
+        append_char_class (re_ctx_p, 0x005FUL, 0x005FUL);
+        append_char_class (re_ctx_p, 0x0061UL, 0x007AUL);
+        ch = RE_CHAR_UNDEF;
+      }
+      else if (ch == 'W')
+      {
+        append_char_class (re_ctx_p, 0x0000UL, 0x002FUL);
+        append_char_class (re_ctx_p, 0x003AUL, 0x0040UL);
+        append_char_class (re_ctx_p, 0x005BUL, 0x005EUL);
+        append_char_class (re_ctx_p, 0x0060UL, 0x0060UL);
+        append_char_class (re_ctx_p, 0x007BUL, 0xFFFFUL);
+        ch = RE_CHAR_UNDEF;
+      }
+      else if (isdigit (ch))
+      {
+        if (ch != '\0' || isdigit (RE_LOOKUP (*pattern_p, 1)))
+        {
+          /* FIXME: octal support */
+        }
+      }
+      /* FIXME: depends on the unicode support
+      else if (!jerry_unicode_identifier (ch))
+      {
+        JERRY_ERROR_MSG ("RegExp escape pattern error. (Char class)");
+      }
+      */
+    }
+
+    if (ch == RE_CHAR_UNDEF)
+    {
+      if (start != RE_CHAR_UNDEF)
+      {
+        if (is_range)
+        {
+          ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range");
+          return ret_value;
+        }
+        else
+        {
+          append_char_class (re_ctx_p, start, start);
+          start = RE_CHAR_UNDEF;
+        }
+      }
+    }
+    else
+    {
+      if (start != RE_CHAR_UNDEF)
+      {
+        if (is_range)
+        {
+          if (start > ch)
+          {
+            ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range");
+            return ret_value;
+          }
+          else
+          {
+            append_char_class (re_ctx_p, start, ch);
+            start = RE_CHAR_UNDEF;
+            is_range = false;
+          }
+        }
+        else
+        {
+          append_char_class (re_ctx_p, start, start);
+          start = ch;
+        }
+      }
+      else
+      {
+        start = ch;
+      }
+    }
+  }
+  while (true);
+
+  uint32_t advance = 0;
+  ECMA_TRY_CATCH (empty,
+                  parse_re_iterator (parser_ctx_p->current_char_p,
+                                     out_token_p,
+                                     0,
+                                     &advance),
+                  ret_value);
+  RE_ADVANCE (parser_ctx_p->current_char_p, advance);
+  ECMA_FINALIZE (empty);
+
+  return ret_value;
+} /* re_parse_char_class */
+
+/**
+ * Read the input pattern and parse the next token for the RegExp compiler
+ *
+ * @return completion value
+ *         Returned value must be freed with ecma_free_completion_value
+ */
+ecma_completion_value_t
+re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */
+                     re_token_t *out_token_p) /**< output token */
+{
+  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
+  uint32_t advance = 0;
+  ecma_char_t ch0 = *(parser_ctx_p->current_char_p);
+
+  switch (ch0)
+  {
+    case '|':
+    {
+      advance = 1;
+      out_token_p->type = RE_TOK_ALTERNATIVE;
+      break;
+    }
+    case '^':
+    {
+      advance = 1;
+      out_token_p->type = RE_TOK_ASSERT_START;
+      break;
+    }
+    case '$':
+    {
+      advance = 1;
+      out_token_p->type = RE_TOK_ASSERT_END;
+      break;
+    }
+    case '.':
+    {
+      ECMA_TRY_CATCH (empty,
+                      parse_re_iterator (parser_ctx_p->current_char_p,
+                                         out_token_p,
+                                         1,
+                                         &advance),
+                      ret_value);
+      advance += 1;
+      out_token_p->type = RE_TOK_PERIOD;
+      ECMA_FINALIZE (empty);
+      break;
+    }
+    case '\\':
+    {
+      advance = 2;
+      out_token_p->type = RE_TOK_CHAR;
+      ecma_char_t ch1 = RE_LOOKUP (parser_ctx_p->current_char_p, 1);
+
+      if (ch1 == 'b')
+      {
+        out_token_p->type = RE_TOK_ASSERT_WORD_BOUNDARY;
+      }
+      else if (ch1 == 'B')
+      {
+        out_token_p->type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY;
+      }
+      else if (ch1 == 'f')
+      {
+        out_token_p->value = RE_CONTROL_CHAR_FF;
+      }
+      else if (ch1 == 'n')
+      {
+        out_token_p->value = RE_CONTROL_CHAR_EOL;
+      }
+      else if (ch1 == 't')
+      {
+        out_token_p->value = RE_CONTROL_CHAR_TAB;
+      }
+      else if (ch1 == 'r')
+      {
+        out_token_p->value = RE_CONTROL_CHAR_CR;
+      }
+      else if (ch1 == 'v')
+      {
+        out_token_p->value = RE_CONTROL_CHAR_VT;
+      }
+      else if (ch1 == 'c')
+      {
+        ecma_char_t ch2 = RE_LOOKUP (parser_ctx_p->current_char_p, 2);
+        if ((ch2 >= 'A' && ch2 <= 'Z') || (ch2 >= 'a' && ch2 <= 'z'))
+        {
+          advance = 3;
+          out_token_p->type = RE_TOK_CHAR;
+          out_token_p->value = (ch2 % 32);
+        }
+        else
+        {
+          ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape");
+          break;
+        }
+      }
+      else if (ch1 == 'x'
+               && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2))
+               && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 3)))
+      {
+        advance = 4;
+        out_token_p->type = RE_TOK_CHAR;
+        /* FIXME: get unicode char from hex-digits */
+        /* result.value = ...; */
+      }
+      else if (ch1 == 'u'
+               && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2))
+               && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 3))
+               && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 4))
+               && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 5)))
+      {
+        advance = 4;
+        out_token_p->type = RE_TOK_CHAR;
+        /* FIXME: get unicode char from digits */
+        /* result.value = ...; */
+      }
+      else if (ch1 == 'd')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_DIGIT;
+      }
+      else if (ch1 == 'D')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_NOT_DIGIT;
+      }
+      else if (ch1 == 's')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_WHITE;
+      }
+      else if (ch1 == 'S')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_NOT_WHITE;
+      }
+      else if (ch1 == 'w')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_WORD_CHAR;
+      }
+      else if (ch1 == 'W')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_NOT_WORD_CHAR;
+      }
+      else if (isdigit (ch1))
+      {
+        if (ch1 == '0')
+        {
+          if (isdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2)))
+          {
+            ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp escape pattern error.");
+            break;
+          }
+
+          advance = 2;
+          out_token_p->value = RE_CONTROL_CHAR_NUL;
+        }
+        else
+        {
+          if (parser_ctx_p->num_of_groups == -1)
+          {
+            re_count_num_of_groups (parser_ctx_p);
+          }
+
+          if (parser_ctx_p->num_of_groups)
+          {
+            uint32_t number = 0;
+            int index = 0;
+            advance = 0;
+
+            do
+            {
+              if (index >= RE_MAX_RE_DECESC_DIGITS)
+              {
+                ret_value = ecma_raise_syntax_error ((const ecma_char_t *)
+                                                     "RegExp escape pattern error: decimal escape too long.");
+                return ret_value;
+              }
+
+              advance++;
+              ecma_char_t digit = RE_LOOKUP (parser_ctx_p->current_char_p, advance);
+              if (!isdigit (digit))
+              {
+                break;
+              }
+              number = number * 10 + ecma_char_hex_to_int (digit);
+              index++;
+            }
+            while (true);
+
+            if ((int) number <= parser_ctx_p->num_of_groups)
+            {
+              out_token_p->type = RE_TOK_BACKREFERENCE;
+            }
+
+            out_token_p->value = number;
+          }
+          else
+          {
+            out_token_p->value = ch1;
+          }
+        }
+      }
+      else
+      {
+        out_token_p->value = ch1;
+      }
+
+      uint32_t iter_adv = 0;
+      ECMA_TRY_CATCH (empty,
+                      parse_re_iterator (parser_ctx_p->current_char_p,
+                                         out_token_p,
+                                         advance,
+                                         &iter_adv),
+                      ret_value);
+      advance += iter_adv;
+      ECMA_FINALIZE (empty);
+      break;
+    }
+    case '(':
+    {
+      if (RE_LOOKUP (parser_ctx_p->current_char_p, 1) == '?')
+      {
+        ecma_char_t ch2 = RE_LOOKUP (parser_ctx_p->current_char_p, 2);
+        if (ch2 == '=')
+        {
+          /* (?= */
+          advance = 3;
+          out_token_p->type = RE_TOK_ASSERT_START_POS_LOOKAHEAD;
+        }
+        else if (ch2 == '!')
+        {
+          /* (?! */
+          advance = 3;
+          out_token_p->type = RE_TOK_ASSERT_START_NEG_LOOKAHEAD;
+        }
+        else if (ch2 == ':')
+        {
+          /* (?: */
+          advance = 3;
+          out_token_p->type = RE_TOK_START_NON_CAPTURE_GROUP;
+        }
+      }
+      else
+      {
+        /* ( */
+        advance = 1;
+        out_token_p->type = RE_TOK_START_CAPTURE_GROUP;
+      }
+      break;
+    }
+    case ')':
+    {
+      ECMA_TRY_CATCH (empty,
+                      parse_re_iterator (parser_ctx_p->current_char_p,
+                                         out_token_p,
+                                         1,
+                                         &advance),
+                      ret_value);
+      advance += 1;
+      out_token_p->type = RE_TOK_END_GROUP;
+      ECMA_FINALIZE (empty);
+      break;
+    }
+    case '[':
+    {
+      advance = 1;
+      out_token_p->type = RE_TOK_START_CHAR_CLASS;
+      if (RE_LOOKUP (parser_ctx_p->current_char_p, 1) == '^')
+      {
+        advance = 2;
+        out_token_p->type = RE_TOK_START_INV_CHAR_CLASS;
+      }
+      break;
+    }
+    case ']':
+    case '}':
+    case '?':
+    case '*':
+    case '+':
+    case '{':
+    {
+      JERRY_UNREACHABLE ();
+      break;
+    }
+    case '\0':
+    {
+      advance = 0;
+      out_token_p->type = RE_TOK_EOF;
+      break;
+    }
+    default:
+    {
+      ECMA_TRY_CATCH (empty,
+                      parse_re_iterator (parser_ctx_p->current_char_p,
+                                         out_token_p,
+                                         1,
+                                         &advance),
+                      ret_value);
+      advance += 1;
+      out_token_p->type = RE_TOK_CHAR;
+      out_token_p->value = ch0;
+      ECMA_FINALIZE (empty);
+      break;
+    }
+  }
+
+  if (ecma_is_completion_value_empty (ret_value))
+  {
+    RE_ADVANCE (parser_ctx_p->current_char_p, advance);
+  }
+
+  return ret_value;
+} /* re_parse_next_token */
+
+#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
@@ -0,0 +1,91 @@
+/* Copyright 2015 Samsung Electronics Co., Ltd.
+ * Copyright 2015 University of Szeged.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RE_PARSER_H
+#define RE_PARSER_H
+
+#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
+
+#include "opcodes-dumper.h"
+
+typedef uint8_t token_type_t;
+
+#define RE_TOK_EOF                            0 /* EOF */
+#define RE_TOK_BACKREFERENCE                  1 /* \[0..9] */
+#define RE_TOK_CHAR                           2 /* any character */
+#define RE_TOK_ALTERNATIVE                    3 /* | */
+#define RE_TOK_ASSERT_START                   4 /* ^ */
+#define RE_TOK_ASSERT_END                     5 /* $ */
+#define RE_TOK_PERIOD                         6 /* . */
+#define RE_TOK_START_CAPTURE_GROUP            7 /* ( */
+#define RE_TOK_START_NON_CAPTURE_GROUP        8 /* (?: */
+#define RE_TOK_END_GROUP                      9 /* ')' */
+#define RE_TOK_ASSERT_START_POS_LOOKAHEAD    10 /* (?= */
+#define RE_TOK_ASSERT_START_NEG_LOOKAHEAD    11 /* (?! */
+#define RE_TOK_ASSERT_WORD_BOUNDARY          12 /* \b */
+#define RE_TOK_ASSERT_NOT_WORD_BOUNDARY      13 /* \B */
+#define RE_TOK_DIGIT                         14 /* \d */
+#define RE_TOK_NOT_DIGIT                     15 /* \D */
+#define RE_TOK_WHITE                         16 /* \s */
+#define RE_TOK_NOT_WHITE                     17 /* \S */
+#define RE_TOK_WORD_CHAR                     18 /* \w */
+#define RE_TOK_NOT_WORD_CHAR                 19 /* \W */
+#define RE_TOK_START_CHAR_CLASS              20 /* [ ] */
+#define RE_TOK_START_INV_CHAR_CLASS          21 /* [^ ] */
+
+#define RE_ITERATOR_INFINITE ((uint32_t)-1)
+#define RE_MAX_RE_DECESC_DIGITS 9
+
+/* FIXME: depends on unicode support */
+#define RE_CHAR_UNDEF ((ecma_char_t)-1)
+
+#define RE_CONTROL_CHAR_NUL  0x0000 /* \0 */
+#define RE_CONTROL_CHAR_BEL  0x0008 /* \b */
+#define RE_CONTROL_CHAR_TAB  0x0009 /* \t */
+#define RE_CONTROL_CHAR_EOL  0x000a /* \n */
+#define RE_CONTROL_CHAR_VT   0x000b /* \v */
+#define RE_CONTROL_CHAR_FF   0x000c /* \f */
+#define RE_CONTROL_CHAR_CR   0x000d /* \r */
+
+typedef struct
+{
+  token_type_t type;
+  uint32_t value;
+  uint32_t qmin;
+  uint32_t qmax;
+  bool greedy;
+} re_token_t;
+
+typedef struct
+{
+  ecma_char_t *pattern_start_p;
+  ecma_char_t *current_char_p;
+  int num_of_groups;
+  uint32_t num_of_classes;
+} re_parser_ctx_t;
+
+typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end);
+
+ecma_completion_value_t
+re_parse_char_class (re_parser_ctx_t *parser_ctx_p,
+                     re_char_class_callback append_char_class,
+                     void *re_ctx_p, re_token_t *out_token_p);
+
+ecma_completion_value_t
+re_parse_next_token (re_parser_ctx_t *parser_ctx_p, re_token_t *out_token_p);
+
+#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
+#endif /* RE_PARSER_H */