Implement \u{hex} support. (#3447)
A large rework because surrogate pairs must be combined. Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
committed by
Dániel Bátyai
parent
1db16c3a1c
commit
40d930d62c
@@ -21,6 +21,39 @@
|
||||
|
||||
#include "test-common.h"
|
||||
|
||||
static lit_code_point_t
|
||||
lexer_hex_to_character (const uint8_t *source_p) /**< current source position */
|
||||
{
|
||||
lit_code_point_t result = 0;
|
||||
|
||||
do
|
||||
{
|
||||
uint32_t byte = *source_p++;
|
||||
|
||||
result <<= 4;
|
||||
|
||||
if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
|
||||
{
|
||||
result += byte - LIT_CHAR_0;
|
||||
}
|
||||
else
|
||||
{
|
||||
byte = LEXER_TO_ASCII_LOWERCASE (byte);
|
||||
if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
|
||||
{
|
||||
result += byte - (LIT_CHAR_LOWERCASE_A - 10);
|
||||
}
|
||||
else
|
||||
{
|
||||
return UINT32_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (*source_p);
|
||||
|
||||
return result;
|
||||
} /* lexer_hex_to_character */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
@@ -29,50 +62,59 @@ main (void)
|
||||
jmem_init ();
|
||||
ecma_init ();
|
||||
|
||||
const uint8_t _1_byte_long1[] = "\\u007F";
|
||||
const uint8_t _1_byte_long2[] = "\\u0000";
|
||||
const uint8_t _1_byte_long3[] = "\\u0065";
|
||||
const uint8_t _1_byte_long1[] = "007F";
|
||||
const uint8_t _1_byte_long2[] = "0000";
|
||||
const uint8_t _1_byte_long3[] = "0065";
|
||||
|
||||
const uint8_t _2_byte_long1[] = "\\u008F";
|
||||
const uint8_t _2_byte_long2[] = "\\u00FF";
|
||||
const uint8_t _2_byte_long3[] = "\\u07FF";
|
||||
const uint8_t _2_byte_long1[] = "008F";
|
||||
const uint8_t _2_byte_long2[] = "00FF";
|
||||
const uint8_t _2_byte_long3[] = "07FF";
|
||||
|
||||
const uint8_t _3_byte_long1[] = "\\u08FF";
|
||||
const uint8_t _3_byte_long2[] = "\\u0FFF";
|
||||
const uint8_t _3_byte_long3[] = "\\uFFFF";
|
||||
const uint8_t _3_byte_long1[] = "08FF";
|
||||
const uint8_t _3_byte_long2[] = "0FFF";
|
||||
const uint8_t _3_byte_long3[] = "FFFF";
|
||||
|
||||
const uint8_t _6_byte_long1[] = "10000";
|
||||
const uint8_t _6_byte_long2[] = "10FFFF";
|
||||
|
||||
size_t length;
|
||||
|
||||
/* Test 1-byte-long unicode sequences. */
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long1 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long1));
|
||||
TEST_ASSERT (length == 1);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long2 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long2));
|
||||
TEST_ASSERT (length == 1);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long3 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long3));
|
||||
TEST_ASSERT (length == 1);
|
||||
|
||||
/* Test 2-byte-long unicode sequences. */
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long1 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long1));
|
||||
TEST_ASSERT (length == 2);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long2 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long2));
|
||||
TEST_ASSERT (length == 2);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long3 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long3));
|
||||
TEST_ASSERT (length == 2);
|
||||
|
||||
/* Test 3-byte-long unicode sequences. */
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long1 + 2, 4));
|
||||
TEST_ASSERT (length != 2);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long2 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long1));
|
||||
TEST_ASSERT (length == 3);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long3 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long2));
|
||||
TEST_ASSERT (length == 3);
|
||||
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long3));
|
||||
TEST_ASSERT (length == 3);
|
||||
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_6_byte_long1));
|
||||
TEST_ASSERT (length == 6);
|
||||
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_6_byte_long2));
|
||||
TEST_ASSERT (length == 6);
|
||||
|
||||
ecma_finalize ();
|
||||
jmem_finalize ();
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ main (void)
|
||||
|
||||
while (curr_p < end_p)
|
||||
{
|
||||
code_units[code_units_count] = lit_utf8_peek_next (curr_p);
|
||||
code_units[code_units_count] = lit_cesu8_peek_next (curr_p);
|
||||
saved_positions[code_units_count] = curr_p;
|
||||
code_units_count++;
|
||||
calculated_length++;
|
||||
@@ -147,7 +147,7 @@ main (void)
|
||||
{
|
||||
ecma_length_t index = (ecma_length_t) rand () % code_units_count;
|
||||
curr_p = saved_positions[index];
|
||||
TEST_ASSERT (lit_utf8_peek_next (curr_p) == code_units[index]);
|
||||
TEST_ASSERT (lit_cesu8_peek_next (curr_p) == code_units[index]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ main (void)
|
||||
{
|
||||
TEST_ASSERT (code_units_count > 0);
|
||||
calculated_length--;
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_utf8_peek_prev (curr_p));
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_cesu8_peek_prev (curr_p));
|
||||
lit_utf8_decr (&curr_p);
|
||||
}
|
||||
|
||||
@@ -164,7 +164,7 @@ main (void)
|
||||
|
||||
while (curr_p < end_p)
|
||||
{
|
||||
ecma_char_t code_unit = lit_utf8_read_next (&curr_p);
|
||||
ecma_char_t code_unit = lit_cesu8_read_next (&curr_p);
|
||||
TEST_ASSERT (code_unit == code_units[calculated_length]);
|
||||
calculated_length++;
|
||||
}
|
||||
@@ -175,7 +175,7 @@ main (void)
|
||||
{
|
||||
TEST_ASSERT (code_units_count > 0);
|
||||
calculated_length--;
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_utf8_read_prev (&curr_p));
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_cesu8_read_prev (&curr_p));
|
||||
}
|
||||
|
||||
TEST_ASSERT (calculated_length == 0);
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "jerryscript.h"
|
||||
#include "test-common.h"
|
||||
|
||||
static bool
|
||||
test_syntax_error (char *script_p) /**< script */
|
||||
{
|
||||
jerry_value_t parse_result = jerry_parse (NULL,
|
||||
0,
|
||||
(const jerry_char_t *) script_p,
|
||||
strlen (script_p),
|
||||
JERRY_PARSE_NO_OPTS);
|
||||
|
||||
bool result = false;
|
||||
|
||||
if (jerry_value_is_error (parse_result))
|
||||
{
|
||||
result = true;
|
||||
TEST_ASSERT (jerry_get_error_type (parse_result) == JERRY_ERROR_SYNTAX);
|
||||
}
|
||||
|
||||
jerry_release_value (parse_result);
|
||||
return result;
|
||||
} /* test_syntax_error */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
jerry_init (JERRY_INIT_EMPTY);
|
||||
|
||||
if (!test_syntax_error ("\\u{61}"))
|
||||
{
|
||||
TEST_ASSERT (!test_syntax_error ("\xF0\x90\xB2\x80: break \\u{10C80}"));
|
||||
/* The \u surrogate pairs are ignored. The \u{hex} form must be used. */
|
||||
TEST_ASSERT (test_syntax_error ("\xF0\x90\xB2\x80: break \\ud803\\udc80"));
|
||||
/* The utf8 code point and the cesu8 surrogate pair must match. */
|
||||
TEST_ASSERT (!test_syntax_error ("\xF0\x90\xB2\x80: break \xed\xa0\x83\xed\xb2\x80"));
|
||||
|
||||
TEST_ASSERT (!test_syntax_error ("$\xF0\x90\xB2\x80$: break $\\u{10C80}$"));
|
||||
TEST_ASSERT (test_syntax_error ("$\xF0\x90\xB2\x80$: break $\\ud803\\udc80$"));
|
||||
TEST_ASSERT (!test_syntax_error ("$\xF0\x90\xB2\x80$: break $\xed\xa0\x83\xed\xb2\x80$"));
|
||||
}
|
||||
|
||||
jerry_cleanup ();
|
||||
|
||||
return 0;
|
||||
} /* main */
|
||||
Reference in New Issue
Block a user