Improve RegExp compatibility with web browsers (#3339)

The modification adds support to parse /A{/ like RegExps.
That is: if the iterator is invalid it should be treated as normal
character.

This behaviour is defined in the ES2015 standard Annex B 1.4 point

This only works if the `JERRY_REGEXP_STRICT_MODE` is disabled
(set to zero).

JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
This commit is contained in:
Péter Gál
2019-11-21 10:59:21 +01:00
committed by Robert Fancsik
parent 7f6f562adb
commit aadfa13c38
2 changed files with 222 additions and 14 deletions
+29 -14
View File
@@ -630,11 +630,22 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
}
case LIT_CHAR_NULL:
{
out_token_p->type = RE_TOK_EOF;
break;
}
case LIT_CHAR_LEFT_BRACE:
{
#if ENABLED (JERRY_REGEXP_STRICT_MODE)
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
#else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */
/* Make sure that the current '{' does not start an iterator.
*
* E.g: /\s+{3,4}/ should fail as there is nothing to iterate.
* However /\s+{3,4/ should be valid in web compatibility mode.
*/
const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
lit_utf8_decr (&parser_ctx_p->input_curr_p);
@@ -648,9 +659,25 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
ecma_free_value (JERRY_CONTEXT (error_value));
parser_ctx_p->input_curr_p = input_curr_p;
/* It was not an iterator, continue the parsing. */
#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
/* FALLTHRU */
}
default:
{
out_token_p->type = RE_TOK_CHAR;
out_token_p->value = ch;
#if ENABLED (JERRY_REGEXP_STRICT_MODE)
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
#else
/* In case of compatiblity mode try the following:
* 1. Try parsing an iterator after the character.
* 2.a. If no error is reported: it was an iterator so return an empty value.
* 2.b. If there was an error: it was not an iterator thus return the current position
* to the start of the iterator parsing and set the return value to the empty value.
* 3. The next 're_parse_next_token' call will handle the further parsing of characters.
*/
const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
if (!ecma_is_value_empty (ret_value))
@@ -659,19 +686,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
parser_ctx_p->input_curr_p = input_curr_p;
ret_value = ECMA_VALUE_EMPTY;
}
#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
break;
}
case LIT_CHAR_NULL:
{
out_token_p->type = RE_TOK_EOF;
break;
}
default:
{
out_token_p->type = RE_TOK_CHAR;
out_token_p->value = ch;
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
#endif
break;
}
}