Invalid regexp patterns should not throw syntax error during parsing (#4038)

JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
Robert Fancsik
2020-07-27 09:28:26 +02:00
committed by GitHub
parent da5b058dec
commit 11c2ae30d1
11 changed files with 175 additions and 36 deletions
+1 -1
View File
@@ -27,7 +27,7 @@ JERRY_STATIC_ASSERT ((sizeof (cbc_uint16_arguments_t) % sizeof (jmem_cpointer_t)
*/
JERRY_STATIC_ASSERT (CBC_END == 238,
number_of_cbc_opcodes_changed);
JERRY_STATIC_ASSERT (CBC_EXT_END == 135,
JERRY_STATIC_ASSERT (CBC_EXT_END == 136,
number_of_cbc_ext_opcodes_changed);
#if ENABLED (JERRY_PARSER)
+2
View File
@@ -606,6 +606,8 @@
VM_OC_LINE) \
CBC_OPCODE (CBC_EXT_THROW_REFERENCE_ERROR, CBC_NO_FLAG, 1, \
VM_OC_THROW_REFERENCE_ERROR) \
CBC_OPCODE (CBC_EXT_THROW_SYNTAX_ERROR, CBC_HAS_LITERAL_ARG, 1, \
VM_OC_THROW_SYNTAX_ERROR | VM_OC_GET_LITERAL) \
CBC_OPCODE (CBC_EXT_THROW_ASSIGN_CONST_ERROR, CBC_NO_FLAG, 0, \
VM_OC_THROW_CONST_ERROR) \
CBC_OPCODE (CBC_EXT_REQUIRE_OBJECT_COERCIBLE, CBC_NO_FLAG, 0, \
+85 -10
View File
@@ -94,6 +94,42 @@ lexer_hex_to_code_point (const uint8_t *source_p, /**< current source position *
#if ENABLED (JERRY_ESNEXT)
/**
* Find a string literal in the literal pool matching with the given buffer's content
*
* @return PARSER_INVALID_LITERAL_INDEX - if the literal is not present in the literal pool
* literal's index in the pool - otherwise
*/
static uint16_t
parser_find_string_literal (parser_context_t *context_p, /**< context */
lexer_literal_t **out_literal_p, /**< [out] found literal */
uint8_t *buffer_p, /**< character buffer */
lit_utf8_size_t size) /**< buffer's size */
{
JERRY_ASSERT (out_literal_p != NULL);
JERRY_ASSERT (buffer_p != NULL);
uint16_t literal_index = 0;
lexer_literal_t *literal_p;
parser_list_iterator_t literal_iterator;
parser_list_iterator_init (&context_p->literal_pool, &literal_iterator);
while ((literal_p = (lexer_literal_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
{
if (literal_p->type == LEXER_STRING_LITERAL
&& literal_p->prop.length == size
&& memcmp (literal_p->u.char_p, buffer_p, size) == 0)
{
*out_literal_p = literal_p;
return literal_index;
}
literal_index++;
}
return PARSER_INVALID_LITERAL_INDEX;
} /* parser_find_string_literal */
/**
* Parse hexadecimal character sequence enclosed in braces
*
@@ -2697,6 +2733,9 @@ lexer_construct_function_object (parser_context_t *context_p, /**< context */
/**
* Construct a regular expression object.
*
* Note: In ESNEXT the constructed literal's type can be LEXER_STRING_LITERAL which represents
* invalid pattern. The string literal contains the thrown error message.
*/
void
lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
@@ -2708,7 +2747,6 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
const uint8_t *regex_end_p = regex_start_p;
const uint8_t *source_end_p = context_p->source_end_p;
parser_line_counter_t column = context_p->column;
lexer_literal_t *literal_p;
bool in_class = false;
uint16_t current_flags;
lit_utf8_size_t length;
@@ -2865,13 +2903,6 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
parser_raise_error (context_p, PARSER_ERR_LITERAL_LIMIT_REACHED);
}
literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
literal_p->prop.length = (prop_length_t) length;
literal_p->type = LEXER_UNUSED_LITERAL;
literal_p->status_flags = 0;
context_p->literal_count++;
/* Compile the RegExp literal and store the RegExp bytecode pointer */
ecma_string_t *pattern_str_p = NULL;
@@ -2888,13 +2919,57 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
re_compiled_code_t *re_bytecode_p = re_compile_bytecode (pattern_str_p, current_flags);
ecma_deref_ecma_string (pattern_str_p);
lexer_literal_t *literal_p = NULL;
uint8_t literal_type = LEXER_REGEXP_LITERAL;
if (JERRY_UNLIKELY (re_bytecode_p == NULL))
{
#if ENABLED (JERRY_ESNEXT)
ecma_value_t error = jcontext_take_exception ();
ecma_property_t *prop_p = ecma_find_named_property (ecma_get_object_from_value (error),
ecma_get_magic_string (LIT_MAGIC_STRING_MESSAGE));
ecma_free_value (error);
const char default_msg[] = "Invalid regular expression";
lit_utf8_byte_t *buffer_p = (lit_utf8_byte_t *) default_msg;
lit_utf8_size_t size = sizeof (buffer_p) - 1;
if (prop_p != NULL)
{
ecma_string_t *message_p = ecma_get_string_from_value (ECMA_PROPERTY_VALUE_PTR (prop_p)->value);
JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (message_p));
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (message_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING);
buffer_p = ECMA_ASCII_STRING_GET_BUFFER (message_p);
size = ((ecma_ascii_string_t *) message_p)->size;
}
uint16_t literal_index = parser_find_string_literal (context_p, &literal_p, buffer_p, size);
if (literal_index != PARSER_INVALID_LITERAL_INDEX)
{
context_p->lit_object.literal_p = literal_p;
context_p->lit_object.index = literal_index;
return;
}
literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
literal_p->u.char_p = (uint8_t *) jmem_heap_alloc_block (size);
memcpy ((uint8_t *) literal_p->u.char_p, buffer_p, size);
literal_type = LEXER_STRING_LITERAL;
length = size;
#else /* !ENABLED (JERRY_ESNEXT) */
parser_raise_error (context_p, PARSER_ERR_INVALID_REGEXP);
#endif /* ENABLED (JERRY_ESNEXT) */
}
else
{
literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
literal_p->u.bytecode_p = (ecma_compiled_code_t *) re_bytecode_p;
}
literal_p->type = LEXER_REGEXP_LITERAL;
literal_p->u.bytecode_p = (ecma_compiled_code_t *) re_bytecode_p;
literal_p->type = literal_type;
literal_p->prop.length = (prop_length_t) length;
literal_p->status_flags = 0;
context_p->literal_count++;
context_p->token.type = LEXER_LITERAL;
context_p->token.keyword_type = LEXER_EOS;
+17 -9
View File
@@ -782,8 +782,8 @@ parser_parse_class (parser_context_t *context_p, /**< context */
{
JERRY_ASSERT (context_p->token.type == LEXER_KEYW_CLASS);
uint16_t class_ident_index = UINT16_MAX;
uint16_t class_name_index = UINT16_MAX;
uint16_t class_ident_index = PARSER_INVALID_LITERAL_INDEX;
uint16_t class_name_index = PARSER_INVALID_LITERAL_INDEX;
parser_class_literal_opts_t opts = PARSER_CLASS_LITERAL_NO_OPTS;
if (context_p->next_scanner_info_p->source_p == context_p->source_p)
@@ -830,7 +830,7 @@ parser_parse_class (parser_context_t *context_p, /**< context */
}
}
if (class_name_index != UINT16_MAX)
if (class_name_index != PARSER_INVALID_LITERAL_INDEX)
{
parser_emit_cbc_ext_literal (context_p, CBC_EXT_PUSH_NAMED_CLASS_ENV, class_name_index);
}
@@ -864,7 +864,7 @@ parser_parse_class (parser_context_t *context_p, /**< context */
/* ClassDeclaration is parsed. Continue with class body. */
parser_parse_class_literal (context_p, opts);
if (class_name_index != UINT16_MAX)
if (class_name_index != PARSER_INVALID_LITERAL_INDEX)
{
parser_emit_cbc_ext_literal (context_p, CBC_EXT_FINALIZE_NAMED_CLASS, class_name_index);
parser_emit_cbc_ext_literal (context_p, CBC_EXT_SET_FUNCTION_NAME, class_name_index);
@@ -1976,6 +1976,14 @@ parser_parse_unary_expression (parser_context_t *context_p, /**< context */
uint16_t literal_index = (uint16_t) (context_p->literal_count - 1);
#if ENABLED (JERRY_ESNEXT)
if (JERRY_UNLIKELY (context_p->lit_object.literal_p->type == LEXER_STRING_LITERAL))
{
parser_emit_cbc_ext_literal (context_p, CBC_EXT_THROW_SYNTAX_ERROR, literal_index);
break;
}
#endif /* ENABLED (JERRY_ESNEXT) */
if (context_p->last_cbc_opcode == CBC_PUSH_LITERAL)
{
context_p->last_cbc_opcode = CBC_PUSH_TWO_LITERALS;
@@ -2892,7 +2900,7 @@ parser_process_binary_opcodes (parser_context_t *context_p, /**< context */
opcode = (cbc_opcode_t) context_p->stack_top_uint8;
parser_stack_pop_uint8 (context_p);
uint16_t index = UINT16_MAX;
uint16_t index = PARSER_INVALID_LITERAL_INDEX;
if (cbc_flags[opcode] & CBC_HAS_LITERAL_ARG)
{
@@ -2923,7 +2931,7 @@ parser_process_binary_opcodes (parser_context_t *context_p, /**< context */
}
#endif /* ENABLED (JERRY_ESNEXT) */
if (index != UINT16_MAX)
if (index != PARSER_INVALID_LITERAL_INDEX)
{
#if ENABLED (JERRY_ESNEXT)
if (!group_expr_assingment)
@@ -3039,7 +3047,7 @@ typedef struct
/**
* Literal index should not be emitted while processing rhs target value
*/
#define PARSER_PATTERN_RHS_NO_LIT UINT16_MAX
#define PARSER_PATTERN_RHS_NO_LIT PARSER_INVALID_LITERAL_INDEX
/**
* Process the target of an initializer pattern.
@@ -3161,7 +3169,7 @@ parser_pattern_form_assignment (parser_context_t *context_p, /**< context */
{
JERRY_UNUSED (ident_line_counter);
uint16_t name_index = UINT16_MAX;
uint16_t name_index = PARSER_INVALID_LITERAL_INDEX;
if ((flags & PARSER_PATTERN_BINDING)
|| (context_p->last_cbc_opcode == CBC_PUSH_LITERAL
@@ -3191,7 +3199,7 @@ parser_pattern_form_assignment (parser_context_t *context_p, /**< context */
parser_parse_expression (context_p, PARSE_EXPR_NO_COMMA);
if (name_index != UINT16_MAX)
if (name_index != PARSER_INVALID_LITERAL_INDEX)
{
uint16_t function_literal_index = parser_check_anonymous_function_declaration (context_p);
+6 -1
View File
@@ -433,10 +433,15 @@ typedef struct
*/
#define PARSER_REGISTER_START 0x8000
/**
* Invalid literal index
*/
#define PARSER_INVALID_LITERAL_INDEX UINT16_MAX
/**
* Lastly emitted opcode is not a function literal
*/
#define PARSER_NOT_FUNCTION_LITERAL UINT16_MAX
#define PARSER_NOT_FUNCTION_LITERAL PARSER_INVALID_LITERAL_INDEX
/**
* Lastly emitted opcode is not a named function literal
+3 -3
View File
@@ -393,7 +393,7 @@ parser_emit_cbc_push_number (parser_context_t *context_p, /**< context */
bool is_negative_number) /**< sign is negative */
{
uint16_t value = context_p->lit_object.index;
uint16_t lit_value = UINT16_MAX;
uint16_t lit_value = PARSER_INVALID_LITERAL_INDEX;
if (context_p->last_cbc_opcode != PARSER_CBC_UNAVAILABLE)
{
@@ -420,7 +420,7 @@ parser_emit_cbc_push_number (parser_context_t *context_p, /**< context */
if (value == 0)
{
if (lit_value == UINT16_MAX)
if (lit_value == PARSER_INVALID_LITERAL_INDEX)
{
context_p->last_cbc_opcode = CBC_PUSH_NUMBER_0;
return;
@@ -433,7 +433,7 @@ parser_emit_cbc_push_number (parser_context_t *context_p, /**< context */
uint16_t opcode;
if (lit_value == UINT16_MAX)
if (lit_value == PARSER_INVALID_LITERAL_INDEX)
{
opcode = (is_negative_number ? CBC_PUSH_NUMBER_NEG_BYTE
: CBC_PUSH_NUMBER_POS_BYTE);
+25 -11
View File
@@ -2924,18 +2924,27 @@ parser_parse_script (const uint8_t *arg_list_p, /**< function argument list */
return NULL;
}
#if ENABLED (JERRY_ERROR_MESSAGES)
ecma_string_t *err_str_p;
#if !ENABLED (JERRY_ESNEXT)
if (parser_error.error == PARSER_ERR_INVALID_REGEXP)
{
/* The RegExp compiler has already raised an exception. */
JERRY_ASSERT (jcontext_has_pending_exception ());
return NULL;
ecma_value_t error = jcontext_take_exception ();
ecma_property_t *prop_p = ecma_find_named_property (ecma_get_object_from_value (error),
ecma_get_magic_string (LIT_MAGIC_STRING_MESSAGE));
ecma_free_value (error);
JERRY_ASSERT (prop_p);
err_str_p = ecma_get_string_from_value (ECMA_PROPERTY_VALUE_PTR (prop_p)->value);
ecma_ref_ecma_string (err_str_p);
}
else
#endif /* !ENABLED (JERRY_ESNEXT) */
{
const lit_utf8_byte_t *err_bytes_p = (const lit_utf8_byte_t *) parser_error_to_string (parser_error.error);
lit_utf8_size_t err_bytes_size = lit_zt_utf8_string_size (err_bytes_p);
err_str_p = ecma_new_ecma_string_from_utf8 (err_bytes_p, err_bytes_size);
}
#if ENABLED (JERRY_ERROR_MESSAGES)
const lit_utf8_byte_t *err_bytes_p = (const lit_utf8_byte_t *) parser_error_to_string (parser_error.error);
lit_utf8_size_t err_bytes_size = lit_zt_utf8_string_size (err_bytes_p);
ecma_string_t *err_str_p = ecma_new_ecma_string_from_utf8 (err_bytes_p, err_bytes_size);
ecma_value_t err_str_val = ecma_make_string_value (err_str_p);
ecma_value_t line_str_val = ecma_make_uint32_value (parser_error.line);
ecma_value_t col_str_val = ecma_make_uint32_value (parser_error.column);
@@ -2949,9 +2958,14 @@ parser_parse_script (const uint8_t *arg_list_p, /**< function argument list */
ecma_free_value (col_str_val);
ecma_free_value (line_str_val);
ecma_free_value (err_str_val);
ecma_deref_ecma_string (err_str_p);
#else /* !ENABLED (JERRY_ERROR_MESSAGES) */
ecma_raise_syntax_error ("");
#if !ENABLED (JERRY_ESNEXT)
if (parser_error.error != PARSER_ERR_INVALID_REGEXP)
#endif /* !ENABLED (JERRY_ESNEXT) */
{
ecma_raise_syntax_error ("");
}
#endif /* ENABLED (JERRY_ERROR_MESSAGES) */
return NULL;
+8
View File
@@ -1640,6 +1640,14 @@ vm_loop (vm_frame_ctx_t *frame_ctx_p) /**< frame context */
result = ecma_raise_type_error (ECMA_ERR_MSG ("Constant bindings cannot be reassigned."));
goto error;
}
case VM_OC_THROW_SYNTAX_ERROR:
{
ecma_string_t *msg_p = ecma_get_string_from_value (left_value);
ecma_object_t *error_obj_p = ecma_new_standard_error_with_message (ECMA_ERROR_SYNTAX, msg_p);
jcontext_raise_exception (ecma_make_object_value (error_obj_p));
result = ECMA_VALUE_ERROR;
goto error;
}
case VM_OC_COPY_TO_GLOBAL:
{
uint32_t literal_index;
+2
View File
@@ -241,6 +241,7 @@ typedef enum
VM_OC_ASSIGN_LET_CONST, /**< assign values to let/const declarations */
VM_OC_INIT_BINDING, /**< create and intialize a binding */
VM_OC_THROW_CONST_ERROR, /**< throw invalid assignment to const variable error */
VM_OC_THROW_SYNTAX_ERROR, /**< throw syntax error */
VM_OC_COPY_TO_GLOBAL, /**< copy value to global lex env */
VM_OC_COPY_FROM_ARG, /**< copy value from arg lex env */
VM_OC_CLONE_CONTEXT, /**< clone lexical environment with let/const declarations */
@@ -316,6 +317,7 @@ typedef enum
VM_OC_ASSIGN_LET_CONST = VM_OC_NONE, /**< assign values to let/const declarations */
VM_OC_INIT_BINDING = VM_OC_NONE, /**< create and intialize a binding */
VM_OC_THROW_CONST_ERROR = VM_OC_NONE, /**< throw invalid assignment to const variable error */
VM_OC_THROW_SYNTAX_ERROR = VM_OC_NONE, /**< throw syntax error */
VM_OC_COPY_TO_GLOBAL = VM_OC_NONE, /**< copy value to global lex env */
VM_OC_COPY_FROM_ARG = VM_OC_NONE, /**< copy value from arg lex env */
VM_OC_CLONE_CONTEXT = VM_OC_NONE, /**< clone lexical environment with let/const declarations */
@@ -12,4 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/?:/
try {
/?:/
assert(false);
} catch (e) {
assert(e instanceof SyntaxError);
}
@@ -0,0 +1,20 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
try {
eval("/?:/");
assert(false);
} catch (e) {
assert(e instanceof SyntaxError);
}