Improve RegExp compatibility with web browsers (#3339)
The modification adds support to parse /A{/ like RegExps.
That is: if the iterator is invalid it should be treated as normal
character.
This behaviour is defined in the ES2015 standard Annex B 1.4 point
This only works if the `JERRY_REGEXP_STRICT_MODE` is disabled
(set to zero).
JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
This commit is contained in:
committed by
Robert Fancsik
parent
7f6f562adb
commit
aadfa13c38
@@ -630,11 +630,22 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
|
||||
}
|
||||
case LIT_CHAR_NULL:
|
||||
{
|
||||
out_token_p->type = RE_TOK_EOF;
|
||||
break;
|
||||
}
|
||||
case LIT_CHAR_LEFT_BRACE:
|
||||
{
|
||||
#if ENABLED (JERRY_REGEXP_STRICT_MODE)
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
|
||||
#else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */
|
||||
|
||||
/* Make sure that the current '{' does not start an iterator.
|
||||
*
|
||||
* E.g: /\s+{3,4}/ should fail as there is nothing to iterate.
|
||||
* However /\s+{3,4/ should be valid in web compatibility mode.
|
||||
*/
|
||||
const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
|
||||
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
@@ -648,9 +659,25 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
ecma_free_value (JERRY_CONTEXT (error_value));
|
||||
|
||||
parser_ctx_p->input_curr_p = input_curr_p;
|
||||
|
||||
/* It was not an iterator, continue the parsing. */
|
||||
#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
|
||||
/* FALLTHRU */
|
||||
}
|
||||
default:
|
||||
{
|
||||
out_token_p->type = RE_TOK_CHAR;
|
||||
out_token_p->value = ch;
|
||||
#if ENABLED (JERRY_REGEXP_STRICT_MODE)
|
||||
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
|
||||
#else
|
||||
/* In case of compatiblity mode try the following:
|
||||
* 1. Try parsing an iterator after the character.
|
||||
* 2.a. If no error is reported: it was an iterator so return an empty value.
|
||||
* 2.b. If there was an error: it was not an iterator thus return the current position
|
||||
* to the start of the iterator parsing and set the return value to the empty value.
|
||||
* 3. The next 're_parse_next_token' call will handle the further parsing of characters.
|
||||
*/
|
||||
const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
|
||||
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
|
||||
|
||||
if (!ecma_is_value_empty (ret_value))
|
||||
@@ -659,19 +686,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
parser_ctx_p->input_curr_p = input_curr_p;
|
||||
ret_value = ECMA_VALUE_EMPTY;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
|
||||
break;
|
||||
}
|
||||
case LIT_CHAR_NULL:
|
||||
{
|
||||
out_token_p->type = RE_TOK_EOF;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
out_token_p->type = RE_TOK_CHAR;
|
||||
out_token_p->value = ch;
|
||||
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user