Implement RegExp octal support
JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
This commit is contained in:
committed by
Dániel Bátyai
parent
cf1eda0981
commit
e22a085fc1
@@ -70,6 +70,27 @@ re_parse_non_greedy_char (lit_utf8_iterator_t *iter_p) /**< RegExp pattern */
|
|||||||
return false;
|
return false;
|
||||||
} /* re_parse_non_greedy_char */
|
} /* re_parse_non_greedy_char */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a max 3 digit long octal number from input string iterator.
|
||||||
|
*
|
||||||
|
* @return uint32_t - parsed octal number
|
||||||
|
*/
|
||||||
|
static uint32_t
|
||||||
|
re_parse_octal (lit_utf8_iterator_t *iter) /**< input string iterator */
|
||||||
|
{
|
||||||
|
uint32_t number = 0;
|
||||||
|
for (int index = 0;
|
||||||
|
index < 3
|
||||||
|
&& !lit_utf8_iterator_is_eos (iter)
|
||||||
|
&& lit_char_is_octal_digit (lit_utf8_iterator_peek_next (iter));
|
||||||
|
index++)
|
||||||
|
{
|
||||||
|
number = number * 8 + lit_char_hex_to_int (lit_utf8_iterator_read_next (iter));
|
||||||
|
}
|
||||||
|
|
||||||
|
return number;
|
||||||
|
} /* re_parse_octal */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse RegExp iterators
|
* Parse RegExp iterators
|
||||||
*
|
*
|
||||||
@@ -465,26 +486,13 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
|
|||||||
ch = RE_CHAR_UNDEF;
|
ch = RE_CHAR_UNDEF;
|
||||||
}
|
}
|
||||||
else if (ch <= LIT_UTF16_CODE_UNIT_MAX
|
else if (ch <= LIT_UTF16_CODE_UNIT_MAX
|
||||||
&& lit_char_is_decimal_digit ((ecma_char_t) ch))
|
&& lit_char_is_octal_digit ((ecma_char_t) ch)
|
||||||
|
&& ch != LIT_CHAR_0)
|
||||||
{
|
{
|
||||||
if (lit_utf8_iterator_is_eos (iter_p))
|
lit_utf8_iterator_decr (iter_p);
|
||||||
{
|
ch = re_parse_octal (iter_p);
|
||||||
return ecma_raise_syntax_error ("invalid character class, end of string after '\\<digits>'");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ch != LIT_CHAR_0
|
|
||||||
|| lit_char_is_decimal_digit (lit_utf8_iterator_peek_next (iter_p)))
|
|
||||||
{
|
|
||||||
/* FIXME: octal support */
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/* FIXME: depends on the unicode support
|
} /* ch == LIT_CHAR_BACKSLASH */
|
||||||
else if (!jerry_unicode_identifier (ch))
|
|
||||||
{
|
|
||||||
JERRY_ERROR_MSG ("RegExp escape pattern error. (Char class)");
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ch == RE_CHAR_UNDEF)
|
if (ch == RE_CHAR_UNDEF)
|
||||||
{
|
{
|
||||||
@@ -745,12 +753,43 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
|||||||
{
|
{
|
||||||
out_token_p->type = RE_TOK_BACKREFERENCE;
|
out_token_p->type = RE_TOK_BACKREFERENCE;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
/* Invalid backreference, fallback to octal */
|
||||||
|
{
|
||||||
|
/* Rewind to start of number. */
|
||||||
|
while (index-- > 0)
|
||||||
|
{
|
||||||
|
lit_utf8_iterator_decr (iter_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to reparse as octal. */
|
||||||
|
ecma_char_t digit = lit_utf8_iterator_peek_next (iter_p);
|
||||||
|
|
||||||
|
if (!lit_char_is_octal_digit (digit))
|
||||||
|
{
|
||||||
|
/* Not octal, keep digit character value. */
|
||||||
|
number = lit_utf8_iterator_read_next (iter_p);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
number = re_parse_octal (iter_p);
|
||||||
|
}
|
||||||
|
}
|
||||||
out_token_p->value = number;
|
out_token_p->value = number;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
/* Invalid backreference, fallback to octal if possible */
|
||||||
{
|
{
|
||||||
out_token_p->value = ch;
|
if (!lit_char_is_octal_digit (ch))
|
||||||
|
{
|
||||||
|
/* Not octal, keep character value. */
|
||||||
|
out_token_p->value = ch;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lit_utf8_iterator_decr (iter_p);
|
||||||
|
out_token_p->value = re_parse_octal (iter_p);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,3 +56,21 @@ assert (r.exec ("abcdefghijkl") == "abcdefghijkl");
|
|||||||
|
|
||||||
r = /\n/;
|
r = /\n/;
|
||||||
assert (r.exec ("\n") == "\n");
|
assert (r.exec ("\n") == "\n");
|
||||||
|
|
||||||
|
assert (/[\12]+/.exec ("1\n\n\n\n\n2") == "\n\n\n\n\n");
|
||||||
|
assert (/[\1284]+/.exec ("1\n\n8\n4\n\n2") == "\n\n8\n4\n\n");
|
||||||
|
assert (/[\89]12/.exec ("1\9128123") == "912");
|
||||||
|
assert (/[\11]/.exec ("1\n\n\t\n\n2") == "\t");
|
||||||
|
assert (/[\142][\143][\144]/.exec ("abcde") == "bcd");
|
||||||
|
|
||||||
|
assert (/\12+/.exec ("1\n\n\n\n\n2") == "\n\n\n\n\n");
|
||||||
|
assert (/\11/.exec ("1\n\n\t\n\n2") == "\t");
|
||||||
|
assert (/\142\143\144/.exec ("abcde") == "bcd");
|
||||||
|
assert (/\942\143\144/.exec ("a942cde") == "942cd");
|
||||||
|
assert (/\14234/.exec ("b34") == "b34");
|
||||||
|
|
||||||
|
assert (/(\d+)\2([abc]+)\1\2/.exec("123abc123abc") == "123abc123abc,123,abc");
|
||||||
|
assert (/([abc]+)\40([d-f]+)\12\1/.exec("abc def\nabc") == "abc def\nabc,abc,def");
|
||||||
|
|
||||||
|
var expected = "8765432911,8,7,6,5,4,3,2,9,1";
|
||||||
|
assert (/(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)\9(\d)\9/.exec("8765432911") == expected);
|
||||||
|
|||||||
Reference in New Issue
Block a user