Support unicode whitespace and line terminators in lexer, fix parse of comments.

JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com
This commit is contained in:
Ruben Ayrapetyan
2015-07-05 23:27:14 +03:00
parent 32094622ec
commit 23b847015c
+67 -63
View File
@@ -95,15 +95,20 @@ dump_current_line (void)
while (!lit_utf8_iterator_is_eos (&iter)) while (!lit_utf8_iterator_is_eos (&iter))
{ {
ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
if (code_unit == '\n') if (lit_char_is_line_terminator (code_unit))
{ {
if (code_unit == LIT_CHAR_CR
&& lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
{
lit_utf8_iterator_incr (&iter);
}
break; break;
} }
lit_put_ecma_char (code_unit); lit_put_ecma_char (code_unit);
} }
lit_put_ecma_char ('\n'); lit_put_ecma_char (LIT_CHAR_LF);
} }
static token static token
@@ -1122,29 +1127,17 @@ lexer_parse_regexp (void)
return result; return result;
} /* lexer_parse_regexp */ } /* lexer_parse_regexp */
static void
grobble_whitespaces (void)
{
ecma_char_t c = LA (0);
while ((isspace (c) && c != '\n'))
{
consume_char ();
c = LA (0);
}
}
static bool static bool
replace_comment_by_newline (void) lexer_parse_comment (void)
{ {
ecma_char_t c = LA (0); ecma_char_t c = LA (0);
bool multiline; bool multiline;
bool was_newlines = false; bool was_newlines = false;
JERRY_ASSERT (LA (0) == '/'); JERRY_ASSERT (LA (0) == LIT_CHAR_SLASH);
JERRY_ASSERT (LA (1) == '/' || LA (1) == '*'); JERRY_ASSERT (LA (1) == LIT_CHAR_SLASH || LA (1) == LIT_CHAR_ASTERISK);
multiline = (LA (1) == '*'); multiline = (LA (1) == LIT_CHAR_ASTERISK);
consume_char (); consume_char ();
consume_char (); consume_char ();
@@ -1152,41 +1145,69 @@ replace_comment_by_newline (void)
while (true) while (true)
{ {
c = LA (0); c = LA (0);
if (!multiline && (c == '\n' || c == '\0'))
{
return false;
}
if (multiline && c == '*' && LA (1) == '/')
{
consume_char ();
consume_char ();
if (was_newlines) if (!multiline)
{
if (lit_char_is_line_terminator (c))
{ {
return true; return true;
} }
else else if (c == LIT_CHAR_NULL)
{ {
return false; return false;
} }
} }
if (multiline && c == '\n') else
{ {
was_newlines = true; if (c == LIT_CHAR_ASTERISK
} && LA (1) == LIT_CHAR_SLASH)
if (multiline && c == '\0') {
{ consume_char ();
PARSE_ERROR ("Unclosed multiline comment", lit_utf8_iterator_get_offset (&src_iter)); consume_char ();
return was_newlines;
}
else if (lit_char_is_line_terminator (c))
{
was_newlines = true;
}
else if (c == LIT_CHAR_NULL)
{
PARSE_ERROR ("Unclosed multiline comment", lit_utf8_iterator_get_offset (&src_iter));
}
} }
consume_char (); consume_char ();
} }
} } /* lexer_parse_comment */
static token static token
lexer_next_token_private (void) lexer_next_token_private (void)
{ {
ecma_char_t c = LA (0); ecma_char_t c = LA (0);
if (lit_char_is_white_space (c))
{
while (lit_char_is_white_space (c))
{
consume_char ();
c = LA (0);
}
}
if (lit_char_is_line_terminator (c))
{
while (lit_char_is_line_terminator (c))
{
consume_char ();
c = LA (0);
}
return create_token (TOK_NEWLINE, 0);
}
JERRY_ASSERT (token_start == NULL); JERRY_ASSERT (token_start == NULL);
/* ECMA-262 v5, 7.6, Identifier */ /* ECMA-262 v5, 7.6, Identifier */
@@ -1203,7 +1224,7 @@ lexer_next_token_private (void)
return lexer_parse_number (); return lexer_parse_number ();
} }
if (c == '\n') if (c == LIT_CHAR_LF)
{ {
consume_char (); consume_char ();
return create_token (TOK_NEWLINE, 0); return create_token (TOK_NEWLINE, 0);
@@ -1220,22 +1241,14 @@ lexer_next_token_private (void)
return lexer_parse_string (); return lexer_parse_string ();
} }
if (isspace (c)) /* ECMA-262 v5, 7.4, SingleLineComment or MultiLineComment */
if (c == LIT_CHAR_SLASH
&& (LA (1) == LIT_CHAR_SLASH
|| LA (1) == LIT_CHAR_ASTERISK))
{ {
grobble_whitespaces (); if (lexer_parse_comment ())
return lexer_next_token_private ();
}
if (c == '/' && LA (1) == '*')
{
if (replace_comment_by_newline ())
{ {
token ret; return create_token (TOK_NEWLINE, 0);
ret.type = TOK_NEWLINE;
ret.uid = 0;
return ret;
} }
else else
{ {
@@ -1243,15 +1256,6 @@ lexer_next_token_private (void)
} }
} }
if (c == '/')
{
if (LA (1) == '/')
{
replace_comment_by_newline ();
return lexer_next_token_private ();
}
}
if (c == LIT_CHAR_SLASH if (c == LIT_CHAR_SLASH
&& !(sent_token.type == TOK_NAME && !(sent_token.type == TOK_NAME
|| sent_token.type == TOK_NULL || sent_token.type == TOK_NULL
@@ -1421,7 +1425,7 @@ lexer_locus_to_line_and_column (size_t locus, size_t *line, size_t *column)
size_t l = 0, c = 0; size_t l = 0, c = 0;
for (buf = buffer_start; (size_t) (buf - buffer_start) < locus; buf++) for (buf = buffer_start; (size_t) (buf - buffer_start) < locus; buf++)
{ {
if (*buf == '\n') if (*buf == LIT_CHAR_LF)
{ {
c = 0; c = 0;
l++; l++;
@@ -1444,17 +1448,17 @@ void
lexer_dump_line (size_t line) lexer_dump_line (size_t line)
{ {
size_t l = 0; size_t l = 0;
for (const lit_utf8_byte_t *buf = buffer_start; *buf != '\0'; buf++) for (const lit_utf8_byte_t *buf = buffer_start; *buf != LIT_CHAR_NULL; buf++)
{ {
if (l == line) if (l == line)
{ {
for (; *buf != '\n' && *buf != '\0'; buf++) for (; *buf != LIT_CHAR_LF && *buf != LIT_CHAR_NULL; buf++)
{ {
putchar (*buf); putchar (*buf);
} }
return; return;
} }
if (*buf == '\n') if (*buf == LIT_CHAR_LF)
{ {
l++; l++;
} }