Support Unicode supplementary planes (#3928)

JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
Robert Fancsik
2020-07-06 14:21:13 +02:00
committed by GitHub
parent 7353b253ab
commit c1e90da0b4
16 changed files with 1105 additions and 861 deletions
+14 -4
View File
@@ -2605,6 +2605,19 @@ ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, /**< string buil
memcpy (dest_p, data_p, data_size); memcpy (dest_p, data_p, data_size);
} /* ecma_stringbuilder_append_raw */ } /* ecma_stringbuilder_append_raw */
/**
* Append a codepoint to a string builder
*/
void
ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, /**< string builder */
lit_code_point_t cp) /**< code point */
{
const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (cp);
lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
lit_code_point_to_cesu8_bytes (dest_p, cp);
} /* ecma_stringbuilder_append_codepoint */
/** /**
* Append an ecma_char_t to a string builder * Append an ecma_char_t to a string builder
*/ */
@@ -2612,10 +2625,7 @@ void
ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */ ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
const ecma_char_t c) /**< ecma char */ const ecma_char_t c) /**< ecma char */
{ {
const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c); ecma_stringbuilder_append_codepoint (builder_p, c);
lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
lit_code_point_to_cesu8_bytes (dest_p, c);
} /* ecma_stringbuilder_append_char */ } /* ecma_stringbuilder_append_char */
/** /**
+1
View File
@@ -393,6 +393,7 @@ void ecma_stringbuilder_append_magic (ecma_stringbuilder_t *builder_p, const lit
void ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, void ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p,
const lit_utf8_byte_t *data_p, const lit_utf8_byte_t *data_p,
const lit_utf8_size_t data_size); const lit_utf8_size_t data_size);
void ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, lit_code_point_t cp);
void ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, const ecma_char_t c); void ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, const ecma_char_t c);
void ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, const lit_utf8_byte_t); void ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, const lit_utf8_byte_t);
ecma_string_t *ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p); ecma_string_t *ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p);
@@ -988,96 +988,42 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
bool lower_case) /**< convert to lower (true) bool lower_case) /**< convert to lower (true)
* or upper (false) case */ * or upper (false) case */
{ {
ecma_value_t ret_value = ECMA_VALUE_EMPTY; ecma_stringbuilder_t builder = ecma_stringbuilder_create ();
/* 3. */
ECMA_STRING_TO_UTF8_STRING (input_string_p, input_start_p, input_start_size); ECMA_STRING_TO_UTF8_STRING (input_string_p, input_start_p, input_start_size);
/* const lit_utf8_byte_t *input_curr_p = input_start_p;
* The URI encoding has two major phases: first we compute
* the length of the lower case string, then we encode it.
*/
lit_utf8_size_t output_length = 0;
const lit_utf8_byte_t *input_str_curr_p = input_start_p;
const lit_utf8_byte_t *input_str_end_p = input_start_p + input_start_size; const lit_utf8_byte_t *input_str_end_p = input_start_p + input_start_size;
while (input_str_curr_p < input_str_end_p) while (input_curr_p < input_str_end_p)
{ {
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p); lit_code_point_t cp = lit_cesu8_read_next (&input_curr_p);
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
ecma_length_t character_length; #if ENABLED (JERRY_ESNEXT)
lit_utf8_byte_t utf8_byte_buffer[LIT_CESU8_MAX_BYTES_IN_CODE_POINT]; if (lit_is_code_point_utf16_high_surrogate (cp))
{
const ecma_char_t next_ch = lit_cesu8_peek_next (input_curr_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
cp = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) cp, next_ch);
input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
}
}
#endif /* ENABLED (JERRY_ESNEXT) */
if (lower_case) if (lower_case)
{ {
character_length = lit_char_to_lower_case (character, lit_char_to_lower_case (cp, &builder);
character_buffer,
LIT_MAXIMUM_OTHER_CASE_LENGTH);
} }
else else
{ {
character_length = lit_char_to_upper_case (character, lit_char_to_upper_case (cp, &builder);
character_buffer,
LIT_MAXIMUM_OTHER_CASE_LENGTH);
}
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
for (ecma_length_t i = 0; i < character_length; i++)
{
output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
} }
} }
/* Second phase. */
JMEM_DEFINE_LOCAL_ARRAY (output_start_p,
output_length,
lit_utf8_byte_t);
lit_utf8_byte_t *output_char_p = output_start_p;
/* Encoding the output. */
input_str_curr_p = input_start_p;
while (input_str_curr_p < input_str_end_p)
{
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
ecma_length_t character_length;
if (lower_case)
{
character_length = lit_char_to_lower_case (character,
character_buffer,
LIT_MAXIMUM_OTHER_CASE_LENGTH);
}
else
{
character_length = lit_char_to_upper_case (character,
character_buffer,
LIT_MAXIMUM_OTHER_CASE_LENGTH);
}
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
for (ecma_length_t i = 0; i < character_length; i++)
{
output_char_p += lit_code_unit_to_utf8 (character_buffer[i], output_char_p);
}
}
JERRY_ASSERT (output_start_p + output_length == output_char_p);
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
ret_value = ecma_make_string_value (output_string_p);
JMEM_FINALIZE_LOCAL_ARRAY (output_start_p);
ECMA_FINALIZE_UTF8_STRING (input_start_p, input_start_size); ECMA_FINALIZE_UTF8_STRING (input_start_p, input_start_size);
return ret_value; return ecma_make_string_value (ecma_stringbuilder_finalize (&builder));
} /* ecma_builtin_string_prototype_object_conversion_helper */ } /* ecma_builtin_string_prototype_object_conversion_helper */
/** /**
@@ -413,23 +413,13 @@ ecma_regexp_canonicalize_char (lit_code_point_t ch, /**< character */
return ch; return ch;
} }
#if ENABLED (JERRY_ESNEXT) lit_code_point_t cu = lit_char_to_upper_case (ch, NULL);
/* TODO: Implement case folding for code points in the upper planes. */
if (JERRY_UNLIKELY (ch > LIT_UTF16_CODE_UNIT_MAX))
{
return ch;
}
#endif /* ENABLED (JERRY_ESNEXT) */
ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH]; if (cu == LIT_MULTIPLE_CU)
const ecma_length_t size = lit_char_to_upper_case ((ecma_char_t) ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
if (size != 1)
{ {
return ch; return ch;
} }
const ecma_char_t cu = u[0];
if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode) if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode)
{ {
/* 6. */ /* 6. */
+341 -259
View File
@@ -14,12 +14,15 @@
*/ */
#include "config.h" #include "config.h"
#include "ecma-helpers.h"
#include "lit-char-helpers.h" #include "lit-char-helpers.h"
#include "lit-unicode-ranges.inc.h" #include "lit-unicode-ranges.inc.h"
#include "lit-unicode-ranges-sup.inc.h"
#include "lit-strings.h" #include "lit-strings.h"
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
#include "lit-unicode-conversions.inc.h" #include "lit-unicode-conversions.inc.h"
#include "lit-unicode-conversions-sup.inc.h"
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0])) #define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
@@ -31,36 +34,43 @@
* @return true - if the character is in the given array * @return true - if the character is in the given array
* false - otherwise * false - otherwise
*/ */
static bool #define LIT_SEARCH_CHAR_IN_ARRAY_FN(function_name, char_type, array_type) \
search_char_in_char_array (ecma_char_t c, /**< code unit */ static bool \
const ecma_char_t *array, /**< array */ function_name (char_type c, /**< code unit */ \
int size_of_array) /**< length of the array */ const array_type *array, /**< array */ \
{ int size_of_array) /**< length of the array */\
int bottom = 0; { \
int top = size_of_array - 1; int bottom = 0; \
int top = size_of_array - 1; \
\
while (bottom <= top) \
{ \
int middle = (bottom + top) / 2; \
char_type current = array[middle]; \
\
if (current == c) \
{ \
return true; \
} \
\
if (c < current) \
{ \
top = middle - 1; \
} \
else \
{ \
bottom = middle + 1; \
} \
} \
\
return false; \
} /* __function_name */
while (bottom <= top) LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_char_in_array, ecma_char_t, uint16_t)
{
int middle = (bottom + top) / 2;
ecma_char_t current = array[middle];
if (current == c) #if ENABLED (JERRY_ESNEXT)
{ LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_codepoint_in_array, lit_code_point_t, uint32_t)
return true; #endif /* ENABLED (JERRY_ESNEXT) */
}
if (c < current)
{
top = middle - 1;
}
else
{
bottom = middle + 1;
}
}
return false;
} /* search_char_in_char_array */
/** /**
* Binary search algorithm that searches a character in the given intervals. * Binary search algorithm that searches a character in the given intervals.
@@ -70,37 +80,44 @@ search_char_in_char_array (ecma_char_t c, /**< code unit */
* @return true - if the the character is included (inclusively) in one of the intervals in the given array * @return true - if the the character is included (inclusively) in one of the intervals in the given array
* false - otherwise * false - otherwise
*/ */
static bool #define LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN(function_name, char_type, array_type, interval_type) \
search_char_in_interval_array (ecma_char_t c, /**< code unit */ static bool \
const ecma_char_t *array_sp, /**< array of interval starting points */ function_name (char_type c, /**< code unit */ \
const uint8_t *lengths, /**< array of interval lengths */ const array_type *array_sp, /**< array of interval starting points */ \
int size_of_array) /**< length of the array */ const interval_type *lengths, /**< array of interval lengths */ \
{ int size_of_array) /**< length of the array */ \
int bottom = 0; { \
int top = size_of_array - 1; int bottom = 0; \
int top = size_of_array - 1; \
\
while (bottom <= top) \
{ \
int middle = (bottom + top) / 2; \
char_type current_sp = array_sp[middle]; \
\
if (current_sp <= c && c <= current_sp + lengths[middle]) \
{ \
return true; \
} \
\
if (c > current_sp) \
{ \
bottom = middle + 1; \
} \
else \
{ \
top = middle - 1; \
} \
} \
\
return false; \
} /* function_name */
while (bottom <= top) LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_char_in_interval_array, ecma_char_t, uint16_t, uint8_t)
{
int middle = (bottom + top) / 2;
ecma_char_t current_sp = array_sp[middle];
if (current_sp <= c && c <= current_sp + lengths[middle]) #if ENABLED (JERRY_ESNEXT)
{ LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_codepoint_in_interval_array, lit_code_point_t, uint32_t, uint16_t)
return true; #endif /* ENABLED (JERRY_ESNEXT) */
}
if (c > current_sp)
{
bottom = middle + 1;
}
else
{
top = middle - 1;
}
}
return false;
} /* search_char_in_interval_array */
/** /**
* Check if specified character is one of the Whitespace characters including those that fall into * Check if specified character is one of the Whitespace characters including those that fall into
@@ -116,20 +133,18 @@ lit_char_is_white_space (lit_code_point_t c) /**< code point */
{ {
return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR)); return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
} }
else
{
if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS) if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
{ {
return true; return true;
} }
return (c <= LIT_UTF16_CODE_UNIT_MAX return (c <= LIT_UTF16_CODE_UNIT_MAX
&& ((c >= lit_unicode_separator_char_interval_sps[0] && ((c >= lit_unicode_white_space_interval_starts[0]
&& c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0]) && c < lit_unicode_white_space_interval_starts[0] + lit_unicode_white_space_interval_lengths[0])
|| search_char_in_char_array ((ecma_char_t) c, || lit_search_char_in_array ((ecma_char_t) c,
lit_unicode_separator_chars, lit_unicode_white_space_chars,
NUM_OF_ELEMENTS (lit_unicode_separator_chars)))); NUM_OF_ELEMENTS (lit_unicode_white_space_chars))));
}
} /* lit_char_is_white_space */ } /* lit_char_is_white_space */
/** /**
@@ -148,58 +163,84 @@ lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
} /* lit_char_is_line_terminator */ } /* lit_char_is_line_terminator */
/** /**
* Check if specified character is a unicode letter * Check if specified character is a Unicode ID_Start
*
* Note:
* Unicode letter is a character, included into one of the following categories:
* - Uppercase letter (Lu);
* - Lowercase letter (Ll);
* - Titlecase letter (Lt);
* - Modifier letter (Lm);
* - Other letter (Lo);
* - Letter number (Nl).
* *
* See also: * See also:
* ECMA-262 v5, 7.6 * ECMA-262 v1, 11.6: UnicodeIDStart
* *
* @return true - if specified character falls into one of the listed categories, * @return true - if the codepoint has Unicode property "ID_Start"
* false - otherwise * false - otherwise
*/ */
static bool static bool
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */ lit_char_is_unicode_id_start (lit_code_point_t code_point) /**< code unit */
{ {
return (search_char_in_interval_array (c, #if ENABLED (JERRY_ESNEXT)
lit_unicode_letter_interval_sps, if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
lit_unicode_letter_interval_lengths, {
NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps)) return (lit_search_codepoint_in_interval_array (code_point,
|| search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars))); lit_unicode_id_start_interval_starts_sup,
} /* lit_char_is_unicode_letter */ lit_unicode_id_start_interval_lengths_sup,
NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts_sup))
|| lit_search_codepoint_in_array (code_point,
lit_unicode_id_start_chars_sup,
NUM_OF_ELEMENTS (lit_unicode_id_start_chars_sup)));
}
#else /* !ENABLED (JERRY_ESNEXT) */
JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
#endif /* ENABLED (JERRY_ESNEXT) */
ecma_char_t c = (ecma_char_t) code_point;
return (lit_search_char_in_interval_array (c,
lit_unicode_id_start_interval_starts,
lit_unicode_id_start_interval_lengths,
NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts))
|| lit_search_char_in_array (c, lit_unicode_id_start_chars, NUM_OF_ELEMENTS (lit_unicode_id_start_chars)));
} /* lit_char_is_unicode_id_start */
/** /**
* Check if specified character is a non-letter character and can be used as a * Check if specified character is a Unicode ID_Continue
* non-first character of an identifier.
* These characters coverd by the following unicode categories:
* - digit (Nd)
* - punctuation mark (Mn, Mc)
* - connector punctuation (Pc)
* *
* See also: * See also:
* ECMA-262 v5, 7.6 * ECMA-262 v1, 11.6: UnicodeIDContinue
* *
* @return true - if specified character falls into one of the listed categories, * @return true - if the codepoint has Unicode property "ID_Continue"
* false - otherwise * false - otherwise
*/ */
static bool static bool
lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */ lit_char_is_unicode_id_continue (lit_code_point_t code_point) /**< code unit */
{ {
return (search_char_in_interval_array (c, /* Each ID_Start codepoint is ID_Continue as well. */
lit_unicode_non_letter_ident_part_interval_sps, if (lit_char_is_unicode_id_start (code_point))
lit_unicode_non_letter_ident_part_interval_lengths, {
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps)) return true;
|| search_char_in_char_array (c, }
lit_unicode_non_letter_ident_part_chars,
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars))); #if ENABLED (JERRY_ESNEXT)
} /* lit_char_is_unicode_non_letter_ident_part */ if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
{
return (lit_search_codepoint_in_interval_array (code_point,
lit_unicode_id_continue_interval_starts_sup,
lit_unicode_id_continue_interval_lengths_sup,
NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts_sup))
|| lit_search_codepoint_in_array (code_point,
lit_unicode_id_continue_chars_sup,
NUM_OF_ELEMENTS (lit_unicode_id_continue_chars_sup)));
}
#else /* !ENABLED (JERRY_ESNEXT) */
JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
#endif /* ENABLED (JERRY_ESNEXT) */
ecma_char_t c = (ecma_char_t) code_point;
return (lit_search_char_in_interval_array (c,
lit_unicode_id_continue_interval_starts,
lit_unicode_id_continue_interval_lengths,
NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts))
|| lit_search_char_in_array (c,
lit_unicode_id_continue_chars,
NUM_OF_ELEMENTS (lit_unicode_id_continue_chars)));
} /* lit_char_is_unicode_id_continue */
/** /**
* Checks whether the character is a valid identifier start. * Checks whether the character is a valid identifier start.
@@ -218,17 +259,7 @@ lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point
|| code_point == LIT_CHAR_UNDERSCORE); || code_point == LIT_CHAR_UNDERSCORE);
} }
#if ENABLED (JERRY_ESNEXT) return lit_char_is_unicode_id_start (code_point);
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
{
/* TODO: detect these ranges correctly. */
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
}
#else /* !ENABLED (JERRY_ESNEXT) */
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
#endif /* ENABLED (JERRY_ESNEXT) */
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
} /* lit_code_point_is_identifier_start */ } /* lit_code_point_is_identifier_start */
/** /**
@@ -249,18 +280,7 @@ lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point
|| code_point == LIT_CHAR_UNDERSCORE); || code_point == LIT_CHAR_UNDERSCORE);
} }
#if ENABLED (JERRY_ESNEXT) return lit_char_is_unicode_id_continue (code_point);
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
{
/* TODO: detect these ranges correctly. */
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
}
#else /* !ENABLED (JERRY_ESNEXT) */
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
#endif /* ENABLED (JERRY_ESNEXT) */
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
|| lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
} /* lit_code_point_is_identifier_part */ } /* lit_code_point_is_identifier_part */
/** /**
@@ -519,16 +539,27 @@ lit_char_is_word_char (lit_code_point_t c) /**< code point */
/** /**
* Check if the specified character is in one of those tables which contain bidirectional conversions. * Check if the specified character is in one of those tables which contain bidirectional conversions.
* *
* @return the mapped character sequence of an ecma character, if it's in the table. * @return codepoint of the converted character if it is found the the tables
* 0 - otherwise. * LIT_INVALID_CP - otherwise.
*/ */
static ecma_length_t static lit_code_point_t
search_in_bidirectional_conversion_tables (ecma_char_t character, /**< code unit */ lit_search_in_bidirectional_conversion_tables (lit_code_point_t cp, /**< code point */
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
bool is_lowercase) /**< is lowercase conversion */ bool is_lowercase) /**< is lowercase conversion */
{ {
/* 1, Check if the specified character is part of the lit_character_case_ranges table. */ /* 1, Check if the specified character is part of the lit_unicode_character_case_ranges_{sup} table. */
int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges); int number_of_case_ranges;
#if ENABLED (JERRY_ESNEXT)
bool is_supplementary = cp > LIT_UTF16_CODE_UNIT_MAX;
if (is_supplementary)
{
number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges_sup);
}
else
#endif /* ENABLED (JERRY_ESNEXT) */
{
number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges);
}
int conv_counter = 0; int conv_counter = 0;
for (int i = 0; i < number_of_case_ranges; i++) for (int i = 0; i < number_of_case_ranges; i++)
@@ -538,54 +569,92 @@ search_in_bidirectional_conversion_tables (ecma_char_t character, /**< co
conv_counter++; conv_counter++;
} }
int range_length = lit_character_case_range_lengths[conv_counter]; size_t range_length;
ecma_char_t start_point = lit_character_case_ranges[i]; lit_code_point_t start_point;
#if ENABLED (JERRY_ESNEXT)
if (is_supplementary)
{
range_length = lit_unicode_character_case_range_lengths_sup[conv_counter];
start_point = lit_unicode_character_case_ranges_sup[i];
}
else
#endif /* ENABLED (JERRY_ESNEXT) */
{
range_length = lit_unicode_character_case_range_lengths[conv_counter];
start_point = lit_unicode_character_case_ranges[i];
}
if (start_point > character || character >= start_point + range_length) if (start_point > cp || cp >= start_point + range_length)
{ {
continue; continue;
} }
int char_dist = character - start_point; uint32_t char_dist = (uint32_t) cp - start_point;
int offset;
if (i % 2 == 0) if (i % 2 == 0)
{ {
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character; if (!is_lowercase)
{
return cp;
}
offset = i + 1;
} }
else else
{ {
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist); if (is_lowercase)
{
return cp;
} }
return 1; offset = i - 1;
} }
#if ENABLED (JERRY_ESNEXT)
if (is_supplementary)
{
start_point = lit_unicode_character_case_ranges_sup[offset];
}
else
#endif /* ENABLED (JERRY_ESNEXT) */
{
start_point = lit_unicode_character_case_ranges[offset];
}
return (lit_code_point_t) (start_point + char_dist);
}
/* Note: After this point based on the latest unicode standard(13.0.0.6) no conversion characters are
defined for supplementary planes */
#if ENABLED (JERRY_ESNEXT)
if (is_supplementary)
{
return cp;
}
#endif /* ENABLED (JERRY_ESNEXT) */
/* 2, Check if the specified character is part of the character_pair_ranges table. */ /* 2, Check if the specified character is part of the character_pair_ranges table. */
int bottom = 0; int bottom = 0;
int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1; int top = NUM_OF_ELEMENTS (lit_unicode_character_pair_ranges) - 1;
while (bottom <= top) while (bottom <= top)
{ {
int middle = (bottom + top) / 2; int middle = (bottom + top) / 2;
ecma_char_t current_sp = lit_character_pair_ranges[middle]; lit_code_point_t current_sp = lit_unicode_character_pair_ranges[middle];
if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle]) if (current_sp <= cp && cp < current_sp + lit_unicode_character_pair_range_lengths[middle])
{ {
int char_dist = character - current_sp; uint32_t char_dist = (uint32_t) (cp - current_sp);
if ((character - current_sp) % 2 == 0) if ((cp - current_sp) % 2 == 0)
{ {
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character; return is_lowercase ? (lit_code_point_t) (current_sp + char_dist + 1) : cp;
}
else
{
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
} }
return 1; return is_lowercase ? cp : (lit_code_point_t) (current_sp + char_dist - 1);
} }
if (character > current_sp) if (cp > current_sp)
{ {
bottom = middle + 1; bottom = middle + 1;
} }
@@ -596,39 +665,35 @@ search_in_bidirectional_conversion_tables (ecma_char_t character, /**< co
} }
/* 3, Check if the specified character is part of the character_pairs table. */ /* 3, Check if the specified character is part of the character_pairs table. */
int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs); int number_of_character_pairs = NUM_OF_ELEMENTS (lit_unicode_character_pairs);
for (int i = 0; i < number_of_character_pairs; i++) for (int i = 0; i < number_of_character_pairs; i++)
{ {
if (character != lit_character_pairs[i]) if (cp != lit_unicode_character_pairs[i])
{ {
continue; continue;
} }
if (i % 2 == 0) if (i % 2 == 0)
{ {
output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character; return is_lowercase ? lit_unicode_character_pairs[i + 1] : cp;
}
else
{
output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1];
} }
return 1; return is_lowercase ? cp : lit_unicode_character_pairs[i - 1];
} }
return 0; return LIT_INVALID_CP;
} /* search_in_bidirectional_conversion_tables */ } /* lit_search_in_bidirectional_conversion_tables */
/** /**
* Check if the specified character is in the given conversion table. * Check if the specified character is in the given conversion table.
* *
* @return the mapped character sequence of an ecma character, if it's in the table. * @return LIT_MULTIPLE_CU if the converted character consist more than a single code unit
* 0 - otherwise. * converted code point - otherwise
*/ */
static ecma_length_t static lit_code_point_t
search_in_conversion_table (ecma_char_t character, /**< code unit */ lit_search_in_conversion_table (ecma_char_t character, /**< code unit */
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */ ecma_stringbuilder_t *builder_p, /**< string builder */
const ecma_char_t *array, /**< array */ const ecma_char_t *array, /**< array */
const uint8_t *counters) /**< case_values counter */ const uint8_t *counters) /**< case_values counter */
{ {
@@ -653,28 +718,21 @@ search_in_conversion_table (ecma_char_t character, /**< code unit */
if (current == character) if (current == character)
{ {
ecma_length_t char_sequence = 1; if (builder_p != NULL)
{
ecma_stringbuilder_append_char (builder_p, array[middle + 1]);
switch (size_of_case_value) if (size_of_case_value > 1)
{ {
case 3: ecma_stringbuilder_append_char (builder_p, array[middle + 2]);
{
output_buffer_p[2] = array[middle + 3];
char_sequence++;
/* FALLTHRU */
} }
case 2: if (size_of_case_value > 2)
{ {
output_buffer_p[1] = array[middle + 2]; ecma_stringbuilder_append_char (builder_p, array[middle + 3]);
char_sequence++;
/* FALLTHRU */
}
default:
{
output_buffer_p[0] = array[middle + 1];
return char_sequence;
} }
} }
return size_of_case_value == 1 ? array[middle + 1]: LIT_MULTIPLE_CU;
} }
if (character < current) if (character < current)
@@ -688,127 +746,151 @@ search_in_conversion_table (ecma_char_t character, /**< code unit */
} }
} }
return 0; if (builder_p != NULL)
} /* search_in_conversion_table */ {
ecma_stringbuilder_append_char (builder_p, character);
}
return (lit_code_point_t) character;
} /* lit_search_in_conversion_table */
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
/** /**
* Returns the lowercase character sequence of an ecma character. * Append the converted lowercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
* *
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters. * @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
* * converted code point - otherwise
* @return the length of the lowercase character sequence
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
*/ */
ecma_length_t lit_code_point_t
lit_char_to_lower_case (ecma_char_t character, /**< input character value */ lit_char_to_lower_case (lit_code_point_t cp, /**< code point */
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */ ecma_stringbuilder_t *builder_p) /**< string builder */
ecma_length_t buffer_size) /**< buffer size */
{ {
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH); if (cp >= LIT_CHAR_UPPERCASE_A && cp <= LIT_CHAR_UPPERCASE_Z)
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
{ {
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A)); lit_utf8_byte_t lowercase_char = (lit_utf8_byte_t) (cp + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
return 1;
if (builder_p != NULL)
{
ecma_stringbuilder_append_byte (builder_p, lowercase_char);
}
return lowercase_char;
} }
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
lit_code_point_t lowercase_cp = lit_search_in_bidirectional_conversion_tables (cp, true);
ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true); if (lowercase_cp != LIT_INVALID_CP)
if (lowercase_sequence != 0)
{ {
return lowercase_sequence; if (builder_p != NULL)
{
ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
} }
int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges); return lowercase_cp;
}
JERRY_ASSERT (cp < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_unicode_lower_case_ranges);
for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++) for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
{ {
int range_length = lit_lower_case_range_lengths[j] - 1; JERRY_ASSERT (lit_unicode_lower_case_range_lengths[j] > 0);
ecma_char_t start_point = lit_lower_case_ranges[i]; uint32_t range_length = (uint32_t) (lit_unicode_lower_case_range_lengths[j] - 1);
lit_code_point_t start_point = lit_unicode_lower_case_ranges[i];
if (start_point <= character && character <= start_point + range_length) if (start_point <= cp && cp <= start_point + range_length)
{ {
output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point)); lowercase_cp = lit_unicode_lower_case_ranges[i + 1] + (cp - start_point);
return 1; if (builder_p != NULL)
}
}
lowercase_sequence = search_in_conversion_table (character,
output_buffer_p,
lit_lower_case_conversions,
lit_lower_case_conversion_counters);
if (lowercase_sequence != 0)
{ {
return lowercase_sequence; ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
} }
return lowercase_cp;
}
}
return lit_search_in_conversion_table ((ecma_char_t) cp,
builder_p,
lit_unicode_lower_case_conversions,
lit_unicode_lower_case_conversion_counters);
#else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
if (builder_p != NULL)
{
ecma_stringbuilder_append_codepoint (builder_p, cp);
}
return cp;
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
output_buffer_p[0] = character;
return 1;
} /* lit_char_to_lower_case */ } /* lit_char_to_lower_case */
/** /**
* Returns the uppercase character sequence of an ecma character. * Append the converted uppercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
* *
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters. * @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
* * converted code point - otherwise
* @return the length of the uppercase character sequence
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
*/ */
ecma_length_t lit_code_point_t
lit_char_to_upper_case (ecma_char_t character, /**< input character value */ lit_char_to_upper_case (lit_code_point_t cp, /**< code point */
ecma_char_t *output_buffer_p, /**< buffer for the result characters */ ecma_stringbuilder_t *builder_p) /**< string builder */
ecma_length_t buffer_size) /**< buffer size */
{ {
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH); if (cp >= LIT_CHAR_LOWERCASE_A && cp <= LIT_CHAR_LOWERCASE_Z)
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
{ {
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A)); lit_utf8_byte_t uppercase_char = (lit_utf8_byte_t) (cp - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
return 1;
if (builder_p != NULL)
{
ecma_stringbuilder_append_byte (builder_p, uppercase_char);
}
return uppercase_char;
} }
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) #if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
lit_code_point_t uppercase_cp = lit_search_in_bidirectional_conversion_tables (cp, false);
ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false); if (uppercase_cp != LIT_INVALID_CP)
if (uppercase_sequence != 0)
{ {
return uppercase_sequence; if (builder_p != NULL)
{
ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
} }
int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges); return uppercase_cp;
}
int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_unicode_upper_case_special_ranges);
for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++) for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
{ {
int range_length = lit_upper_case_special_range_lengths[j]; uint32_t range_length = lit_unicode_upper_case_special_range_lengths[j];
ecma_char_t start_point = lit_upper_case_special_ranges[i]; ecma_char_t start_point = lit_unicode_upper_case_special_ranges[i];
if (start_point <= character && character <= start_point + range_length) if (start_point <= cp && cp <= start_point + range_length)
{ {
output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point)); if (builder_p != NULL)
output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]);
return 2;
}
}
uppercase_sequence = search_in_conversion_table (character,
output_buffer_p,
lit_upper_case_conversions,
lit_upper_case_conversion_counters);
if (uppercase_sequence != 0)
{ {
return uppercase_sequence; uppercase_cp = lit_unicode_upper_case_special_ranges[i + 1] + (cp - start_point);
ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
ecma_stringbuilder_append_codepoint (builder_p, lit_unicode_upper_case_special_ranges[i + 2]);
} }
return LIT_MULTIPLE_CU;
}
}
return lit_search_in_conversion_table ((ecma_char_t) cp,
builder_p,
lit_unicode_upper_case_conversions,
lit_unicode_upper_case_conversion_counters);
#else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
if (builder_p != NULL)
{
ecma_stringbuilder_append_codepoint (builder_p, cp);
}
return cp;
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ #endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
output_buffer_p[0] = character;
return 1;
} /* lit_char_to_upper_case */ } /* lit_char_to_upper_case */
+12 -7
View File
@@ -18,6 +18,16 @@
#include "lit-globals.h" #include "lit-globals.h"
/**
* Invalid character code point
*/
#define LIT_INVALID_CP 0xFFFFFFFF
/**
* Result of lit_char_to_lower_case/lit_char_to_upper_case consist more than of a single code unit
*/
#define LIT_MULTIPLE_CU 0xFFFFFFFE
/* /*
* Format control characters (ECMA-262 v5, Table 1) * Format control characters (ECMA-262 v5, Table 1)
*/ */
@@ -234,12 +244,7 @@ bool lit_char_is_word_char (lit_code_point_t c);
* Utility functions for uppercasing / lowercasing * Utility functions for uppercasing / lowercasing
*/ */
/** lit_code_point_t lit_char_to_lower_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
* Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions. lit_code_point_t lit_char_to_upper_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
*/
#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
#endif /* !LIT_CHAR_HELPERS_H */ #endif /* !LIT_CHAR_HELPERS_H */
@@ -0,0 +1,30 @@
/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* This file is automatically generated by the gen-unicode.py script
* from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
/* Contains start points of character case ranges (these are bidirectional conversions). */
static const uint32_t lit_unicode_character_case_ranges_sup[] JERRY_ATTR_CONST_DATA =
{
0x010400, 0x010428, 0x0104b0, 0x0104d8, 0x010c80, 0x010cc0, 0x0118a0, 0x0118c0, 0x016e40, 0x016e60,
0x01e900, 0x01e922
};
/* Interval lengths of start points in `character_case_ranges` table. */
static const uint16_t lit_unicode_character_case_range_lengths_sup[] JERRY_ATTR_CONST_DATA =
{
0x000028, 0x000024, 0x000033, 0x000020, 0x000020, 0x000022
};
+14 -14
View File
@@ -14,10 +14,10 @@
*/ */
/* This file is automatically generated by the gen-unicode.py script /* This file is automatically generated by the gen-unicode.py script
* from UnicodeData-13.0.0d6.txt and SpecialCasing-13.0.0d1.txt files. Do not edit! */ * from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
/* Contains start points of character case ranges (these are bidirectional conversions). */ /* Contains start points of character case ranges (these are bidirectional conversions). */
static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_character_case_ranges[] JERRY_ATTR_CONST_DATA =
{ {
0x00c0, 0x00e0, 0x00d8, 0x00f8, 0x0189, 0x0256, 0x01b1, 0x028a, 0x0388, 0x03ad, 0x00c0, 0x00e0, 0x00d8, 0x00f8, 0x0189, 0x0256, 0x01b1, 0x028a, 0x0388, 0x03ad,
0x038e, 0x03cd, 0x0391, 0x03b1, 0x03a3, 0x03c3, 0x03fd, 0x037b, 0x0400, 0x0450, 0x038e, 0x03cd, 0x0391, 0x03b1, 0x03a3, 0x03c3, 0x03fd, 0x037b, 0x0400, 0x0450,
@@ -30,7 +30,7 @@ static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
}; };
/* Interval lengths of start points in `character_case_ranges` table. */ /* Interval lengths of start points in `character_case_ranges` table. */
static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x0017, 0x0007, 0x0002, 0x0002, 0x0003, 0x0002, 0x0011, 0x0009, 0x0003, 0x0010, 0x0017, 0x0007, 0x0002, 0x0002, 0x0003, 0x0002, 0x0011, 0x0009, 0x0003, 0x0010,
0x0020, 0x0026, 0x0026, 0x0050, 0x0006, 0x002b, 0x0003, 0x0008, 0x0006, 0x0008, 0x0020, 0x0026, 0x0026, 0x0050, 0x0006, 0x002b, 0x0003, 0x0008, 0x0006, 0x0008,
@@ -39,7 +39,7 @@ static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
}; };
/* Contains the start points of bidirectional conversion ranges. */ /* Contains the start points of bidirectional conversion ranges. */
static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
{ {
0x0100, 0x0132, 0x0139, 0x014a, 0x0179, 0x0182, 0x0187, 0x018b, 0x0191, 0x0198, 0x0100, 0x0132, 0x0139, 0x014a, 0x0179, 0x0182, 0x0187, 0x018b, 0x0191, 0x0198,
0x01a0, 0x01a7, 0x01ac, 0x01af, 0x01b3, 0x01b8, 0x01bc, 0x01cd, 0x01de, 0x01f4, 0x01a0, 0x01a7, 0x01ac, 0x01af, 0x01b3, 0x01b8, 0x01bc, 0x01cd, 0x01de, 0x01f4,
@@ -50,7 +50,7 @@ static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
}; };
/* Interval lengths of start points in `character_pair_ranges` table. */ /* Interval lengths of start points in `character_pair_ranges` table. */
static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x0030, 0x0006, 0x0010, 0x002e, 0x0006, 0x0004, 0x0002, 0x0002, 0x0002, 0x0002, 0x0030, 0x0006, 0x0010, 0x002e, 0x0006, 0x0004, 0x0002, 0x0002, 0x0002, 0x0002,
0x0006, 0x0002, 0x0002, 0x0002, 0x0004, 0x0002, 0x0002, 0x0010, 0x0012, 0x0002, 0x0006, 0x0002, 0x0002, 0x0002, 0x0004, 0x0002, 0x0002, 0x0010, 0x0012, 0x0002,
@@ -61,7 +61,7 @@ static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
}; };
/* Contains lower/upper case bidirectional conversion pairs. */ /* Contains lower/upper case bidirectional conversion pairs. */
static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_character_pairs[] JERRY_ATTR_CONST_DATA =
{ {
0x0178, 0x00ff, 0x0181, 0x0253, 0x0186, 0x0254, 0x018e, 0x01dd, 0x018f, 0x0259, 0x0178, 0x00ff, 0x0181, 0x0253, 0x0186, 0x0254, 0x018e, 0x01dd, 0x018f, 0x0259,
0x0190, 0x025b, 0x0193, 0x0260, 0x0194, 0x0263, 0x0196, 0x0269, 0x0197, 0x0268, 0x0190, 0x025b, 0x0193, 0x0260, 0x0194, 0x0263, 0x0196, 0x0269, 0x0197, 0x0268,
@@ -81,20 +81,20 @@ static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
/* Contains start points of one-to-two uppercase ranges where the second character /* Contains start points of one-to-two uppercase ranges where the second character
* is always the same. * is always the same.
*/ */
static const uint16_t lit_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
{ {
0x1f80, 0x1f08, 0x0399, 0x1f88, 0x1f08, 0x0399, 0x1f90, 0x1f28, 0x0399, 0x1f98, 0x1f80, 0x1f08, 0x0399, 0x1f88, 0x1f08, 0x0399, 0x1f90, 0x1f28, 0x0399, 0x1f98,
0x1f28, 0x0399, 0x1fa0, 0x1f68, 0x0399, 0x1fa8, 0x1f68, 0x0399 0x1f28, 0x0399, 0x1fa0, 0x1f68, 0x0399, 0x1fa8, 0x1f68, 0x0399
}; };
/* Interval lengths for start points in `upper_case_special_ranges` table. */ /* Interval lengths for start points in `upper_case_special_ranges` table. */
static const uint8_t lit_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x0007, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007 0x0007, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007
}; };
/* Contains start points of lowercase ranges. */ /* Contains start points of lowercase ranges. */
static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
{ {
0x1e96, 0x1e96, 0x1f80, 0x1f80, 0x1f88, 0x1f80, 0x1f90, 0x1f90, 0x1f98, 0x1f90, 0x1e96, 0x1e96, 0x1f80, 0x1f80, 0x1f88, 0x1f80, 0x1f90, 0x1f90, 0x1f98, 0x1f90,
0x1fa0, 0x1fa0, 0x1fa8, 0x1fa0, 0x1fb2, 0x1fb2, 0x1fb6, 0x1fb6, 0x1fc2, 0x1fc2, 0x1fa0, 0x1fa0, 0x1fa8, 0x1fa0, 0x1fb2, 0x1fb2, 0x1fb6, 0x1fb6, 0x1fc2, 0x1fc2,
@@ -103,14 +103,14 @@ static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
}; };
/* Interval lengths for start points in `lower_case_ranges` table. */ /* Interval lengths for start points in `lower_case_ranges` table. */
static const uint8_t lit_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x0005, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0003, 0x0002, 0x0003, 0x0005, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0003, 0x0002, 0x0003,
0x0002, 0x0002, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0007, 0x0005 0x0002, 0x0002, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0007, 0x0005
}; };
/* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */ /* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */
static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
{ {
0x00df, 0x00df, 0x0149, 0x0149, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc, 0x00df, 0x00df, 0x0149, 0x0149, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc,
0x01f0, 0x01f0, 0x01f2, 0x01f3, 0x0390, 0x0390, 0x03b0, 0x03b0, 0x03f4, 0x03b8, 0x01f0, 0x01f0, 0x01f2, 0x01f3, 0x0390, 0x0390, 0x03b0, 0x03b0, 0x03f4, 0x03b8,
@@ -120,13 +120,13 @@ static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
}; };
/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */ /* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */
static const uint8_t lit_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
{ {
0x0016, 0x0001, 0x0000 0x0016, 0x0001, 0x0000
}; };
/* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */ /* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */
static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
{ {
0x00b5, 0x039c, 0x0130, 0x0130, 0x0131, 0x0049, 0x017f, 0x0053, 0x01c5, 0x01c4, 0x00b5, 0x039c, 0x0130, 0x0130, 0x0131, 0x0049, 0x017f, 0x0053, 0x01c5, 0x01c4,
0x01c8, 0x01c7, 0x01cb, 0x01ca, 0x01f2, 0x01f1, 0x0345, 0x0399, 0x03c2, 0x03a3, 0x01c8, 0x01c7, 0x01cb, 0x01ca, 0x01f2, 0x01f1, 0x0345, 0x0399, 0x03c2, 0x03a3,
@@ -157,7 +157,7 @@ static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
}; };
/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */ /* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */
static const uint8_t lit_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
{ {
0x001c, 0x002c, 0x0010 0x001c, 0x002c, 0x0010
}; };
+129
View File
@@ -0,0 +1,129 @@
/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* This file is automatically generated by the gen-unicode.py script
* from DerivedCoreProperties.txt. Do not edit! */
/**
* Character interval starting points for ID_Start.
*/
static const uint32_t lit_unicode_id_start_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
{
0x010000, 0x01000d, 0x010028, 0x01003c, 0x01003f, 0x010050, 0x010080, 0x010140, 0x010280, 0x0102a0,
0x010300, 0x01032d, 0x010350, 0x010380, 0x0103a0, 0x0103c8, 0x0103d1, 0x010400, 0x0104b0, 0x0104d8,
0x010500, 0x010530, 0x010600, 0x010740, 0x010760, 0x010800, 0x01080a, 0x010837, 0x01083f, 0x010860,
0x010880, 0x0108e0, 0x0108f4, 0x010900, 0x010920, 0x010980, 0x0109be, 0x010a10, 0x010a15, 0x010a19,
0x010a60, 0x010a80, 0x010ac0, 0x010ac9, 0x010b00, 0x010b40, 0x010b60, 0x010b80, 0x010c00, 0x010c80,
0x010cc0, 0x010d00, 0x010e80, 0x010eb0, 0x010f00, 0x010f30, 0x010fb0, 0x010fe0, 0x011003, 0x011083,
0x0110d0, 0x011103, 0x011150, 0x011183, 0x0111c1, 0x011200, 0x011213, 0x011280, 0x01128a, 0x01128f,
0x01129f, 0x0112b0, 0x011305, 0x01130f, 0x011313, 0x01132a, 0x011332, 0x011335, 0x01135d, 0x011400,
0x011447, 0x01145f, 0x011480, 0x0114c4, 0x011580, 0x0115d8, 0x011600, 0x011680, 0x011700, 0x011800,
0x0118a0, 0x0118ff, 0x01190c, 0x011915, 0x011918, 0x0119a0, 0x0119aa, 0x011a0b, 0x011a5c, 0x011ac0,
0x011c00, 0x011c0a, 0x011c72, 0x011d00, 0x011d08, 0x011d0b, 0x011d60, 0x011d67, 0x011d6a, 0x011ee0,
0x012000, 0x012400, 0x012480, 0x013000, 0x014400, 0x016800, 0x016a40, 0x016ad0, 0x016b00, 0x016b40,
0x016b63, 0x016b7d, 0x016e40, 0x016f00, 0x016f93, 0x016fe0, 0x017000, 0x018800, 0x018d00, 0x01b000,
0x01b150, 0x01b164, 0x01b170, 0x01bc00, 0x01bc70, 0x01bc80, 0x01bc90, 0x01d400, 0x01d456, 0x01d49e,
0x01d4a5, 0x01d4a9, 0x01d4ae, 0x01d4bd, 0x01d4c5, 0x01d507, 0x01d50d, 0x01d516, 0x01d51e, 0x01d53b,
0x01d540, 0x01d54a, 0x01d552, 0x01d6a8, 0x01d6c2, 0x01d6dc, 0x01d6fc, 0x01d716, 0x01d736, 0x01d750,
0x01d770, 0x01d78a, 0x01d7aa, 0x01d7c4, 0x01e100, 0x01e137, 0x01e2c0, 0x01e800, 0x01e900, 0x01ee00,
0x01ee05, 0x01ee21, 0x01ee29, 0x01ee34, 0x01ee4d, 0x01ee51, 0x01ee61, 0x01ee67, 0x01ee6c, 0x01ee74,
0x01ee79, 0x01ee80, 0x01ee8b, 0x01eea1, 0x01eea5, 0x01eeab, 0x020000, 0x02a700, 0x02b740, 0x02b820,
0x02ceb0, 0x02f800, 0x030000
};
/**
* Character interval lengths for ID_Start.
*/
static const uint16_t lit_unicode_id_start_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
{
0x00000b, 0x000019, 0x000012, 0x000001, 0x00000e, 0x00000d, 0x00007a, 0x000034, 0x00001c, 0x000030,
0x00001f, 0x00001d, 0x000025, 0x00001d, 0x000023, 0x000007, 0x000004, 0x00009d, 0x000023, 0x000023,
0x000027, 0x000033, 0x000136, 0x000015, 0x000007, 0x000005, 0x00002b, 0x000001, 0x000016, 0x000016,
0x00001e, 0x000012, 0x000001, 0x000015, 0x000019, 0x000037, 0x000001, 0x000003, 0x000002, 0x00001c,
0x00001c, 0x00001c, 0x000007, 0x00001b, 0x000035, 0x000015, 0x000012, 0x000011, 0x000048, 0x000032,
0x000032, 0x000023, 0x000029, 0x000001, 0x00001c, 0x000015, 0x000014, 0x000016, 0x000034, 0x00002c,
0x000018, 0x000023, 0x000022, 0x00002f, 0x000003, 0x000011, 0x000018, 0x000006, 0x000003, 0x00000e,
0x000009, 0x00002e, 0x000007, 0x000001, 0x000015, 0x000006, 0x000001, 0x000004, 0x000004, 0x000034,
0x000003, 0x000002, 0x00002f, 0x000001, 0x00002e, 0x000003, 0x00002f, 0x00002a, 0x00001a, 0x00002b,
0x00003f, 0x000007, 0x000007, 0x000001, 0x000017, 0x000007, 0x000026, 0x000027, 0x00002d, 0x000038,
0x000008, 0x000024, 0x00001d, 0x000006, 0x000001, 0x000025, 0x000005, 0x000001, 0x00001f, 0x000012,
0x000399, 0x00006e, 0x0000c3, 0x00042e, 0x000246, 0x000238, 0x00001e, 0x00001d, 0x00002f, 0x000003,
0x000014, 0x000012, 0x00003f, 0x00004a, 0x00000c, 0x000001, 0x0017f7, 0x0004d5, 0x000008, 0x00011e,
0x000002, 0x000003, 0x00018b, 0x00006a, 0x00000c, 0x000008, 0x000009, 0x000054, 0x000046, 0x000001,
0x000001, 0x000003, 0x00000b, 0x000006, 0x000040, 0x000003, 0x000007, 0x000006, 0x00001b, 0x000003,
0x000004, 0x000006, 0x000153, 0x000018, 0x000018, 0x00001e, 0x000018, 0x00001e, 0x000018, 0x00001e,
0x000018, 0x00001e, 0x000018, 0x000007, 0x00002c, 0x000006, 0x00002b, 0x0000c4, 0x000043, 0x000003,
0x00001a, 0x000001, 0x000009, 0x000003, 0x000002, 0x000001, 0x000001, 0x000003, 0x000006, 0x000003,
0x000003, 0x000009, 0x000010, 0x000002, 0x000004, 0x000010, 0x00a6dd, 0x001034, 0x0000dd, 0x001681,
0x001d30, 0x00021d, 0x00134a
};
/**
* Non-interval characters for ID_Start.
*/
static const uint32_t lit_unicode_id_start_chars_sup[] JERRY_ATTR_CONST_DATA =
{
0x010808, 0x01083c, 0x010a00, 0x010f27, 0x011144, 0x011147, 0x011176, 0x0111da, 0x0111dc, 0x011288,
0x01133d, 0x011350, 0x0114c7, 0x011644, 0x0116b8, 0x011909, 0x01193f, 0x011941, 0x0119e1, 0x0119e3,
0x011a00, 0x011a3a, 0x011a50, 0x011a9d, 0x011c40, 0x011d46, 0x011d98, 0x011fb0, 0x016f50, 0x016fe3,
0x01d4a2, 0x01d4bb, 0x01d546, 0x01e14e, 0x01e94b, 0x01ee24, 0x01ee27, 0x01ee39, 0x01ee3b, 0x01ee42,
0x01ee47, 0x01ee49, 0x01ee4b, 0x01ee54, 0x01ee57, 0x01ee59, 0x01ee5b, 0x01ee5d, 0x01ee5f, 0x01ee64,
0x01ee7e
};
/**
* Character interval starting points for ID_Continue.
*/
static const uint32_t lit_unicode_id_continue_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
{
0x010376, 0x0104a0, 0x010a01, 0x010a05, 0x010a0c, 0x010a38, 0x010ae5, 0x010d24, 0x010d30, 0x010eab,
0x010f46, 0x011000, 0x011038, 0x011066, 0x01107f, 0x0110b0, 0x0110f0, 0x011100, 0x011127, 0x011136,
0x011145, 0x011180, 0x0111b3, 0x0111c9, 0x0111ce, 0x01122c, 0x0112df, 0x0112f0, 0x011300, 0x01133b,
0x01133e, 0x011347, 0x01134b, 0x011362, 0x011366, 0x011370, 0x011435, 0x011450, 0x0114b0, 0x0114d0,
0x0115af, 0x0115b8, 0x0115dc, 0x011630, 0x011650, 0x0116ab, 0x0116c0, 0x01171d, 0x011730, 0x01182c,
0x0118e0, 0x011930, 0x011937, 0x01193b, 0x011942, 0x011950, 0x0119d1, 0x0119da, 0x011a01, 0x011a33,
0x011a3b, 0x011a51, 0x011a8a, 0x011c2f, 0x011c38, 0x011c50, 0x011c92, 0x011ca9, 0x011d31, 0x011d3c,
0x011d3f, 0x011d50, 0x011d8a, 0x011d90, 0x011d93, 0x011da0, 0x011ef3, 0x016a60, 0x016af0, 0x016b30,
0x016b50, 0x016f51, 0x016f8f, 0x016ff0, 0x01bc9d, 0x01d165, 0x01d16d, 0x01d17b, 0x01d185, 0x01d1aa,
0x01d242, 0x01d7ce, 0x01da00, 0x01da3b, 0x01da9b, 0x01daa1, 0x01e000, 0x01e008, 0x01e01b, 0x01e023,
0x01e026, 0x01e130, 0x01e140, 0x01e2ec, 0x01e8d0, 0x01e944, 0x01e950, 0x01fbf0, 0x0e0100
};
/**
* Character interval lengths for ID_Continue.
*/
static const uint16_t lit_unicode_id_continue_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
{
0x000004, 0x000009, 0x000002, 0x000001, 0x000003, 0x000002, 0x000001, 0x000003, 0x000009, 0x000001,
0x00000a, 0x000002, 0x00000e, 0x000009, 0x000003, 0x00000a, 0x000009, 0x000002, 0x00000d, 0x000009,
0x000001, 0x000002, 0x00000d, 0x000003, 0x00000b, 0x00000b, 0x00000b, 0x000009, 0x000003, 0x000001,
0x000006, 0x000001, 0x000002, 0x000001, 0x000006, 0x000004, 0x000011, 0x000009, 0x000013, 0x000009,
0x000006, 0x000008, 0x000001, 0x000010, 0x000009, 0x00000c, 0x000009, 0x00000e, 0x000009, 0x00000e,
0x000009, 0x000005, 0x000001, 0x000003, 0x000001, 0x000009, 0x000006, 0x000006, 0x000009, 0x000006,
0x000003, 0x00000a, 0x00000f, 0x000007, 0x000007, 0x000009, 0x000015, 0x00000d, 0x000005, 0x000001,
0x000006, 0x000009, 0x000004, 0x000001, 0x000004, 0x000009, 0x000003, 0x000009, 0x000004, 0x000006,
0x000009, 0x000036, 0x000003, 0x000001, 0x000001, 0x000004, 0x000005, 0x000007, 0x000006, 0x000003,
0x000002, 0x000031, 0x000036, 0x000031, 0x000004, 0x00000e, 0x000006, 0x000010, 0x000006, 0x000001,
0x000004, 0x000006, 0x000009, 0x00000d, 0x000006, 0x000006, 0x000009, 0x000009, 0x0000ef
};
/**
* Non-interval characters for ID_Continue.
*/
static const uint32_t lit_unicode_id_continue_chars_sup[] JERRY_ATTR_CONST_DATA =
{
0x0101fd, 0x0102e0, 0x010a3f, 0x011173, 0x01123e, 0x011357, 0x01145e, 0x011940, 0x0119e4, 0x011a47,
0x011d3a, 0x011d47, 0x016f4f, 0x016fe4, 0x01da75, 0x01da84
};
+107 -106
View File
@@ -14,15 +14,12 @@
*/ */
/* This file is automatically generated by the gen-unicode.py script /* This file is automatically generated by the gen-unicode.py script
* from UnicodeData-13.0.0d6.txt. Do not edit! */ * from DerivedCoreProperties.txt. Do not edit! */
/** /**
* Character interval starting points for the unicode letters. * Character interval starting points for ID_Start.
*
* The characters covered by these intervals are from
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
*/ */
static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_id_start_interval_starts[] JERRY_ATTR_CONST_DATA =
{ {
0x00c0, 0x00d8, 0x00f8, 0x01f8, 0x02c6, 0x02e0, 0x0370, 0x0376, 0x037a, 0x0388, 0x00c0, 0x00d8, 0x00f8, 0x01f8, 0x02c6, 0x02e0, 0x0370, 0x0376, 0x037a, 0x0388,
0x038e, 0x03a3, 0x03f7, 0x048a, 0x0531, 0x0560, 0x05d0, 0x05ef, 0x0620, 0x066e, 0x038e, 0x03a3, 0x03f7, 0x048a, 0x0531, 0x0560, 0x05d0, 0x05ef, 0x0620, 0x066e,
@@ -39,46 +36,43 @@ static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
0x10fc, 0x11fc, 0x124a, 0x1250, 0x125a, 0x1260, 0x128a, 0x1290, 0x12b2, 0x12b8, 0x10fc, 0x11fc, 0x124a, 0x1250, 0x125a, 0x1260, 0x128a, 0x1290, 0x12b2, 0x12b8,
0x12c2, 0x12c8, 0x12d8, 0x1312, 0x1318, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x1501, 0x12c2, 0x12c8, 0x12d8, 0x1312, 0x1318, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x1501,
0x1601, 0x166f, 0x1681, 0x16a0, 0x16ee, 0x1700, 0x170e, 0x1720, 0x1740, 0x1760, 0x1601, 0x166f, 0x1681, 0x16a0, 0x16ee, 0x1700, 0x170e, 0x1720, 0x1740, 0x1760,
0x176e, 0x1780, 0x1820, 0x1880, 0x1887, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980, 0x176e, 0x1780, 0x1820, 0x1880, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980, 0x19b0,
0x19b0, 0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d, 0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d, 0x1c5a,
0x1c5a, 0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00, 0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00, 0x1f18,
0x1f18, 0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0, 0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0, 0x1fd6,
0x1fd6, 0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2119, 0x212a, 0x212f, 0x213c, 0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2118, 0x212a, 0x213c, 0x2145, 0x2160,
0x2145, 0x2160, 0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80, 0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80, 0x2da0, 0x2da8,
0x2da0, 0x2da8, 0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021, 0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021, 0x3031, 0x3038,
0x3031, 0x3038, 0x3041, 0x309d, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0, 0x3041, 0x309b, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0, 0x3400, 0x3500,
0x3400, 0x3500, 0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00, 0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00, 0x3e00, 0x3f00,
0x3e00, 0x3f00, 0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900,
0x4800, 0x4900, 0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100, 0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100, 0x5200, 0x5300,
0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
0x5c00, 0x5d00, 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, 0x6700,
0x6600, 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, 0x7000, 0x7100,
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900, 0x7a00, 0x7b00,
0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300, 0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300, 0x8400, 0x8500,
0x8400, 0x8500, 0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00, 0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00, 0x8e00, 0x8f00,
0x8e00, 0x8f00, 0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700, 0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700, 0x9800, 0x9900,
0x9800, 0x9900, 0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100, 0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100, 0xa200, 0xa300,
0xa200, 0xa300, 0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f, 0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f, 0xa6a0, 0xa717,
0xa6a0, 0xa717, 0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840, 0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840, 0xa882, 0xa8f2,
0xa882, 0xa8f2, 0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa, 0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa, 0xaa00, 0xaa40,
0xaa00, 0xaa40, 0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2, 0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2, 0xab01, 0xab09,
0xab01, 0xab09, 0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00, 0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00, 0xae00, 0xaf00,
0xae00, 0xaf00, 0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700, 0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700, 0xb800, 0xb900,
0xb800, 0xb900, 0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100, 0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100, 0xc200, 0xc300,
0xc200, 0xc300, 0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00, 0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00, 0xcc00, 0xcd00,
0xcc00, 0xcd00, 0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500, 0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500, 0xd600, 0xd700,
0xd600, 0xd700, 0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f, 0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f, 0xfb2a, 0xfb38,
0xfb2a, 0xfb38, 0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0, 0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0, 0xfe70, 0xfe76,
0xfe70, 0xfe76, 0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda 0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
}; };
/** /**
* Character lengths for the unicode letters. * Character interval lengths for ID_Start.
*
* The characters covered by these intervals are from
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
*/ */
static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_id_start_interval_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x0016, 0x001e, 0x00ff, 0x00c9, 0x000b, 0x0004, 0x0004, 0x0001, 0x0003, 0x0002, 0x0016, 0x001e, 0x00ff, 0x00c9, 0x000b, 0x0004, 0x0004, 0x0001, 0x0003, 0x0002,
0x0013, 0x0052, 0x008a, 0x00a5, 0x0025, 0x0028, 0x001a, 0x0003, 0x002a, 0x0001, 0x0013, 0x0052, 0x008a, 0x00a5, 0x0025, 0x0028, 0x001a, 0x0003, 0x002a, 0x0001,
@@ -95,17 +89,17 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
0x00ff, 0x004c, 0x0003, 0x0006, 0x0003, 0x0028, 0x0003, 0x0020, 0x0003, 0x0006, 0x00ff, 0x004c, 0x0003, 0x0006, 0x0003, 0x0028, 0x0003, 0x0020, 0x0003, 0x0006,
0x0003, 0x000e, 0x0038, 0x0003, 0x0042, 0x000f, 0x0055, 0x0005, 0x00ff, 0x00ff, 0x0003, 0x000e, 0x0038, 0x0003, 0x0042, 0x000f, 0x0055, 0x0005, 0x00ff, 0x00ff,
0x006b, 0x0010, 0x0019, 0x004a, 0x000a, 0x000c, 0x0003, 0x0011, 0x0011, 0x000c, 0x006b, 0x0010, 0x0019, 0x004a, 0x000a, 0x000c, 0x0003, 0x0011, 0x0011, 0x000c,
0x0002, 0x0033, 0x0058, 0x0004, 0x0021, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b, 0x0002, 0x0033, 0x0058, 0x0028, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b, 0x0019,
0x0019, 0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002, 0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002, 0x0023,
0x0023, 0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015, 0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015, 0x0005,
0x0005, 0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003, 0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003, 0x0005,
0x0005, 0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0004, 0x0003, 0x000a, 0x0003, 0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0005, 0x000f, 0x0003, 0x0004, 0x0028,
0x0004, 0x0028, 0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016, 0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016, 0x0006, 0x0006,
0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008, 0x0004, 0x0004,
0x0004, 0x0004, 0x0055, 0x0002, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f, 0x0055, 0x0004, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
@@ -113,29 +107,25 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e, 0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e, 0x004f, 0x0008,
0x004f, 0x0008, 0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033, 0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033, 0x0031, 0x0005,
0x0031, 0x0005, 0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004, 0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004, 0x0028, 0x0002,
0x0028, 0x0002, 0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002, 0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002, 0x0005, 0x0005,
0x0005, 0x0005, 0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff, 0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00a3,
0x00ff, 0x00a3, 0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009, 0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009, 0x000c, 0x0004,
0x000c, 0x0004, 0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b, 0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b, 0x0004, 0x0086,
0x0004, 0x0086, 0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002 0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
}; };
/** /**
* Those unicode letter characters that are not inside any of * Non-interval characters for ID_Start.
* the intervals specified in lit_unicode_letter_interval_sps array.
*
* The characters are from the following Unicode categories:
* Lu, Ll, Lt, Lm, Lo, Nl
*/ */
static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_id_start_chars[] JERRY_ATTR_CONST_DATA =
{ {
0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5, 0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5,
0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2, 0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2,
@@ -144,18 +134,13 @@ static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
0x0ea5, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, 0x10c7, 0x10cd, 0x1258, 0x0ea5, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, 0x10c7, 0x10cd, 0x1258,
0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1cfa, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1cfa, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe,
0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2d27, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2d27,
0x2d2d, 0x2d6f, 0x2e2f, 0x3400, 0x4e00, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0, 0x2d2d, 0x2d6f, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e
0xaac2, 0xac00, 0xfb1d, 0xfb3e
}; };
/** /**
* Character interval starting points for non-letter character * Character interval starting points for ID_Continue.
* that can be used as a non-first character of an identifier.
*
* The characters covered by these intervals are from
* the following Unicode categories: Nd, Mn, Mc, Pc
*/ */
static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_id_continue_interval_starts[] JERRY_ATTR_CONST_DATA =
{ {
0x0300, 0x0483, 0x0591, 0x05c1, 0x05c4, 0x0610, 0x064b, 0x06d6, 0x06df, 0x06e7, 0x0300, 0x0483, 0x0591, 0x05c1, 0x05c4, 0x0610, 0x064b, 0x06d6, 0x06df, 0x06e7,
0x06ea, 0x06f0, 0x0730, 0x07a6, 0x07c0, 0x07eb, 0x0816, 0x081b, 0x0825, 0x0829, 0x06ea, 0x06f0, 0x0730, 0x07a6, 0x07c0, 0x07eb, 0x0816, 0x081b, 0x0825, 0x0829,
@@ -167,8 +152,8 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
0x0ce6, 0x0d00, 0x0d3b, 0x0d3e, 0x0d46, 0x0d4a, 0x0d62, 0x0d66, 0x0d81, 0x0dcf, 0x0ce6, 0x0d00, 0x0d3b, 0x0d3e, 0x0d46, 0x0d4a, 0x0d62, 0x0d66, 0x0d81, 0x0dcf,
0x0dd8, 0x0de6, 0x0df2, 0x0e34, 0x0e47, 0x0e50, 0x0eb4, 0x0ec8, 0x0ed0, 0x0f18, 0x0dd8, 0x0de6, 0x0df2, 0x0e34, 0x0e47, 0x0e50, 0x0eb4, 0x0ec8, 0x0ed0, 0x0f18,
0x0f20, 0x0f3e, 0x0f71, 0x0f86, 0x0f8d, 0x0f99, 0x102b, 0x1040, 0x1056, 0x105e, 0x0f20, 0x0f3e, 0x0f71, 0x0f86, 0x0f8d, 0x0f99, 0x102b, 0x1040, 0x1056, 0x105e,
0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1712, 0x1732, 0x1752, 0x1772, 0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1369, 0x1712, 0x1732, 0x1752,
0x17b4, 0x17e0, 0x180b, 0x1810, 0x1885, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17, 0x1772, 0x17b4, 0x17e0, 0x180b, 0x1810, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
0x1a55, 0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1abf, 0x1b00, 0x1b34, 0x1b50, 0x1b6b, 0x1a55, 0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1abf, 0x1b00, 0x1b34, 0x1b50, 0x1b6b,
0x1b80, 0x1ba1, 0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf7, 0x1b80, 0x1ba1, 0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf7,
0x1dc0, 0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099, 0x1dc0, 0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099,
@@ -179,13 +164,9 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
}; };
/** /**
* Character interval lengths for non-letter character * Character interval lengths for ID_Continue.
* that can be used as a non-first character of an identifier.
*
* The characters covered by these intervals are from
* the following Unicode categories: Nd, Mn, Mc, Pc
*/ */
static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_id_continue_interval_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x006f, 0x0004, 0x002c, 0x0001, 0x0001, 0x000a, 0x001e, 0x0006, 0x0005, 0x0001, 0x006f, 0x0004, 0x002c, 0x0001, 0x0001, 0x000a, 0x001e, 0x0006, 0x0005, 0x0001,
0x0003, 0x0009, 0x001a, 0x000a, 0x0009, 0x0008, 0x0003, 0x0008, 0x0002, 0x0004, 0x0003, 0x0009, 0x001a, 0x000a, 0x0009, 0x0008, 0x0003, 0x0008, 0x0002, 0x0004,
@@ -197,8 +178,8 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
0x0009, 0x0003, 0x0001, 0x0006, 0x0002, 0x0003, 0x0001, 0x0009, 0x0002, 0x0005, 0x0009, 0x0003, 0x0001, 0x0006, 0x0002, 0x0003, 0x0001, 0x0009, 0x0002, 0x0005,
0x0007, 0x0009, 0x0001, 0x0006, 0x0007, 0x0009, 0x0008, 0x0005, 0x0009, 0x0001, 0x0007, 0x0009, 0x0001, 0x0006, 0x0007, 0x0009, 0x0008, 0x0005, 0x0009, 0x0001,
0x0009, 0x0001, 0x0013, 0x0001, 0x000a, 0x0023, 0x0013, 0x0009, 0x0003, 0x0002, 0x0009, 0x0001, 0x0013, 0x0001, 0x000a, 0x0023, 0x0013, 0x0009, 0x0003, 0x0002,
0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0002, 0x0002, 0x0001, 0x0001, 0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0008, 0x0002, 0x0002, 0x0001,
0x001f, 0x0009, 0x0002, 0x0009, 0x0001, 0x000b, 0x000b, 0x0009, 0x0009, 0x0004, 0x0001, 0x001f, 0x0009, 0x0002, 0x0009, 0x000b, 0x000b, 0x0009, 0x000a, 0x0004,
0x0009, 0x001c, 0x000a, 0x0009, 0x000d, 0x0001, 0x0004, 0x0010, 0x0009, 0x0008, 0x0009, 0x001c, 0x000a, 0x0009, 0x000d, 0x0001, 0x0004, 0x0010, 0x0009, 0x0008,
0x0002, 0x000c, 0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002, 0x0002, 0x000c, 0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002,
0x0039, 0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001, 0x0039, 0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001,
@@ -209,45 +190,65 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
}; };
/** /**
* Those non-letter characters that can be used as a non-first * Non-interval characters for ID_Continue.
* character of an identifier and not included in any of the intervals
* specified in lit_unicode_non_letter_ident_part_interval_sps array.
*
* The characters are from the following Unicode categories:
* Nd, Mn, Mc, Pc
*/ */
static const uint16_t lit_unicode_non_letter_ident_part_chars[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_id_continue_chars[] JERRY_ATTR_CONST_DATA =
{ {
0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe, 0x0a3c, 0x0a51, 0x00b7, 0x0387, 0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe,
0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca, 0x0dd6, 0x0e31, 0x0a3c, 0x0a51, 0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca,
0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced, 0x1cf4, 0x2054, 0x0dd6, 0x0e31, 0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced,
0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5, 0xaa43, 0xaab0, 0x1cf4, 0x2054, 0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5,
0xaac1, 0xfb1e, 0xff3f 0xaa43, 0xaab0, 0xaac1, 0xfb1e, 0xff3f
}; };
#if ENABLED (JERRY_ESNEXT)
/** /**
* Unicode separator character interval starting points from Unicode category: Zs * Character interval starting points for White_Space.
*/ */
static const uint16_t lit_unicode_separator_char_interval_sps[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
{ {
0x2000 0x2000
}; };
/** /**
* Unicode separator character interval lengths from Unicode category: Zs * Character interval lengths for White_Space.
*/ */
static const uint8_t lit_unicode_separator_char_interval_lengths[] JERRY_ATTR_CONST_DATA = static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
{
0x000a
};
/**
* Non-interval characters for White_Space.
*/
static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
{
0x00a0, 0x1680, 0x202f, 0x205f, 0x3000
};
#else /* !ENABLED (JERRY_ESNEXT) */
/**
* Character interval starting points for White_Space.
*/
static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
{
0x2000
};
/**
* Character interval lengths for White_Space.
*/
static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
{ {
0x000b 0x000b
}; };
/** /**
* Unicode separator characters that are not in the * Non-interval characters for White_Space.
* lit_unicode_separator_char_intervals array.
*
* Unicode category: Zs
*/ */
static const uint16_t lit_unicode_separator_chars[] JERRY_ATTR_CONST_DATA = static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
{ {
0x1680, 0x180e, 0x202f, 0x205f, 0x3000 0x1680, 0x180e, 0x202f, 0x205f, 0x3000
}; };
#endif /* ENABLED (JERRY_ESNEXT) */
@@ -0,0 +1,66 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
let start = 0x10000
let end = 0x10FFFF
const lower_expected = [66560, 66561, 66562, 66563, 66564, 66565, 66566, 66567, 66568, 66569, 66570, 66571, 66572,
66573, 66574, 66575, 66576, 66577, 66578, 66579, 66580, 66581, 66582, 66583, 66584, 66585,
66586, 66587, 66588, 66589, 66590, 66591, 66592, 66593, 66594, 66595, 66596, 66597, 66598,
66599, 66736, 66737, 66738, 66739, 66740, 66741, 66742, 66743, 66744, 66745, 66746, 66747,
66748, 66749, 66750, 66751, 66752, 66753, 66754, 66755, 66756, 66757, 66758, 66759, 66760,
66761, 66762, 66763, 66764, 66765, 66766, 66767, 66768, 66769, 66770, 66771, 68736, 68737,
68738, 68739, 68740, 68741, 68742, 68743, 68744, 68745, 68746, 68747, 68748, 68749, 68750,
68751, 68752, 68753, 68754, 68755, 68756, 68757, 68758, 68759, 68760, 68761, 68762, 68763,
68764, 68765, 68766, 68767, 68768, 68769, 68770, 68771, 68772, 68773, 68774, 68775, 68776,
68777, 68778, 68779, 68780, 68781, 68782, 68783, 68784, 68785, 68786, 71840, 71841, 71842,
71843, 71844, 71845, 71846, 71847, 71848, 71849, 71850, 71851, 71852, 71853, 71854, 71855,
71856, 71857, 71858, 71859, 71860, 71861, 71862, 71863, 71864, 71865, 71866, 71867, 71868,
71869, 71870, 71871, 93760, 93761, 93762, 93763, 93764, 93765, 93766, 93767, 93768, 93769,
93770, 93771, 93772, 93773, 93774, 93775, 93776, 93777, 93778, 93779, 93780, 93781, 93782,
93783, 93784, 93785, 93786, 93787, 93788, 93789, 93790, 93791, 125184, 125185, 125186, 125187,
125188, 125189, 125190, 125191, 125192, 125193, 125194, 125195, 125196, 125197, 125198, 125199,
125200, 125201, 125202, 125203, 125204, 125205, 125206, 125207, 125208, 125209, 125210, 125211,
125212, 125213, 125214, 125215, 125216, 125217];
const upper_expected = [66600, 66601, 66602, 66603, 66604, 66605, 66606, 66607, 66608, 66609, 66610, 66611, 66612,
66613, 66614, 66615, 66616, 66617, 66618, 66619, 66620, 66621, 66622, 66623, 66624, 66625,
66626, 66627, 66628, 66629, 66630, 66631, 66632, 66633, 66634, 66635, 66636, 66637, 66638,
66639, 66776, 66777, 66778, 66779, 66780, 66781, 66782, 66783, 66784, 66785, 66786, 66787,
66788, 66789, 66790, 66791, 66792, 66793, 66794, 66795, 66796, 66797, 66798, 66799, 66800,
66801, 66802, 66803, 66804, 66805, 66806, 66807, 66808, 66809, 66810, 66811, 68800, 68801,
68802, 68803, 68804, 68805, 68806, 68807, 68808, 68809, 68810, 68811, 68812, 68813, 68814,
68815, 68816, 68817, 68818, 68819, 68820, 68821, 68822, 68823, 68824, 68825, 68826, 68827,
68828, 68829, 68830, 68831, 68832, 68833, 68834, 68835, 68836, 68837, 68838, 68839, 68840,
68841, 68842, 68843, 68844, 68845, 68846, 68847, 68848, 68849, 68850, 71872, 71873, 71874,
71875, 71876, 71877, 71878, 71879, 71880, 71881, 71882, 71883, 71884, 71885, 71886, 71887,
71888, 71889, 71890, 71891, 71892, 71893, 71894, 71895, 71896, 71897, 71898, 71899, 71900,
71901, 71902, 71903, 93792, 93793, 93794, 93795, 93796, 93797, 93798, 93799, 93800, 93801,
93802, 93803, 93804, 93805, 93806, 93807, 93808, 93809, 93810, 93811, 93812, 93813, 93814,
93815, 93816, 93817, 93818, 93819, 93820, 93821, 93822, 93823, 125218, 125219, 125220, 125221,
125222, 125223, 125224, 125225, 125226, 125227, 125228, 125229, 125230, 125231, 125232, 125233,
125234, 125235, 125236, 125237, 125238, 125239, 125240, 125241, 125242, 125243, 125244, 125245,
125246, 125247, 125248, 125249, 125250, 125251];
for (let iter of lower_expected) {
let cp = String.fromCodePoint(iter);
assert(cp !== cp.toLowerCase());
}
for (let iter of upper_expected) {
let cp = String.fromCodePoint(iter);
assert(cp !== cp.toUpperCase());
}
assert("\ud801A".toLowerCase() === "\ud801a");
@@ -0,0 +1,32 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
var \u{102C0} = 2;
assert(\u{102C0} === 2);
var o1 = { \u{102C0} : 3 };
assert(o1['\ud800\udec0'] === 3);
var o2 = { '\ud800\udec0' : 4 };
assert(o2.\u{102C0} === 4);
try {
eval('var ⸯ');
assert(false);
} catch(e) {
assert(e instanceof SyntaxError);
}
var 𐋀 = 5;
assert(𐋀 === 5);
@@ -0,0 +1,20 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
// we must not do their conversion in JavaScript. We must also ignore
// stray surrogates.
assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
@@ -84,13 +84,6 @@ assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toLower
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase() assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase()
== "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ"); == "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ");
// Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
// we must not do their conversion in JavaScript. We must also ignore
// stray surrogates.
assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
// Conversion of non-string objects. // Conversion of non-string objects.
assert (String.prototype.toUpperCase.call(true) == "TRUE"); assert (String.prototype.toUpperCase.call(true) == "TRUE");
+12 -4
View File
@@ -123,11 +123,7 @@
<test id="built-ins/String/prototype/normalize/return-normalized-string.js"><reason></reason></test> <test id="built-ins/String/prototype/normalize/return-normalized-string.js"><reason></reason></test>
<test id="built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js"><reason></reason></test> <test id="built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js"><reason></reason></test>
<test id="built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional.js"><reason></reason></test> <test id="built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional.js"><reason></reason></test>
<test id="built-ins/String/prototype/toLocaleLowerCase/supplementary_plane.js"><reason></reason></test>
<test id="built-ins/String/prototype/toLocaleUpperCase/supplementary_plane.js"><reason></reason></test>
<test id="built-ins/String/prototype/toLowerCase/special_casing_conditional.js"><reason></reason></test> <test id="built-ins/String/prototype/toLowerCase/special_casing_conditional.js"><reason></reason></test>
<test id="built-ins/String/prototype/toLowerCase/supplementary_plane.js"><reason></reason></test>
<test id="built-ins/String/prototype/toUpperCase/supplementary_plane.js"><reason></reason></test>
<test id="intl402/6.2.2_a.js"><reason></reason></test> <test id="intl402/6.2.2_a.js"><reason></reason></test>
<test id="intl402/6.2.2_b.js"><reason></reason></test> <test id="intl402/6.2.2_b.js"><reason></reason></test>
<test id="intl402/6.2.2_c.js"><reason></reason></test> <test id="intl402/6.2.2_c.js"><reason></reason></test>
@@ -377,4 +373,16 @@
<test id="language/statements/generators/prototype-value.js"><reason></reason></test> <test id="language/statements/generators/prototype-value.js"><reason></reason></test>
<test id="language/statements/let/syntax/identifier-let-disallowed-as-boundname.js"><reason></reason></test> <test id="language/statements/let/syntax/identifier-let-disallowed-as-boundname.js"><reason></reason></test>
<test id="language/statements/try/S12.14_A16_T4.js"><reason>ES2019 change: catch without parameter is allowed</reason></test> <test id="language/statements/try/S12.14_A16_T4.js"><reason>ES2019 change: catch without parameter is allowed</reason></test>
<test id="built-ins/Number/S9.3.1_A2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/Number/S9.3.1_A3_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/Number/S9.3.1_A3_T2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/RegExp/S15.10.2.12_A1_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/RegExp/S15.10.2.12_A2_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/String/prototype/trim/15.5.4.20-3-2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/String/prototype/trim/15.5.4.20-3-3.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/String/prototype/trim/15.5.4.20-3-4.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/String/prototype/trim/15.5.4.20-3-5.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/String/prototype/trim/15.5.4.20-3-6.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/parseFloat/S15.1.2.3_A2_T10.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
<test id="built-ins/parseInt/S15.1.2.2_A2_T10.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
</excludeList> </excludeList>
+298 -367
View File
@@ -17,10 +17,10 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import bisect
import csv import csv
import itertools import itertools
import os import os
import re
import warnings import warnings
from gen_c_source import LICENSE, format_code from gen_c_source import LICENSE, format_code
@@ -28,268 +28,286 @@ from settings import PROJECT_DIR
RANGES_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h') RANGES_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h')
RANGES_SUP_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges-sup.inc.h')
CONVERSIONS_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h') CONVERSIONS_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h')
CONVERSIONS_SUP_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions-sup.inc.h')
UNICODE_PLANE_TYPE_BASIC = 0
UNICODE_PLANE_TYPE_SUPPLEMENTARY = 1
# For ES5.1 profile we use a predefined subset of whitespace characters
ES5_1_WHITE_SPACE_UNITS = [0x1680, 0x180e]
ES5_1_WHITE_SPACE_UNITS.extend(range(0x2000, 0x200c))
ES5_1_WHITE_SPACE_UNITS.extend([0x202f, 0x205f, 0x3000])
# common code generation # common code generation
class UnicodeBasicSource(object):
# pylint: disable=too-many-instance-attributes
def __init__(self, filepath, character_type="uint16_t", length_type="uint8_t"):
self._filepath = filepath
self._header = [LICENSE, ""]
self._data = []
self._table_name_suffix = ""
self.character_type = character_type
self.length_type = length_type
class UniCodeSource(object): self._range_table_types = [self.character_type,
def __init__(self, filepath): self.length_type,
self.__filepath = filepath self.character_type]
self.__header = [LICENSE, ""] self._range_table_names = ["interval_starts",
self.__data = [] "interval_lengths",
"chars"]
self._range_table_descriptions = ["Character interval starting points for",
"Character interval lengths for",
"Non-interval characters for"]
self._conversion_range_types = [self.character_type,
self.length_type]
self._conversion_range_names = ["ranges",
"range_lengths"]
def complete_header(self, completion): def complete_header(self, completion):
self.__header.append(completion) self._header.append(completion)
self.__header.append("") # for an extra empty line self._header.append("") # for an extra empty line
def add_table(self, table, table_name, table_type, table_descr): def add_whitepace_range(self, category, categorizer, units):
self.__data.append(table_descr) self._data.append("#if ENABLED (JERRY_ESNEXT)")
self.__data.append("static const %s lit_%s[] JERRY_ATTR_CONST_DATA =" % (table_type, table_name)) self.add_range(category, categorizer.create_tables(units))
self.__data.append("{") self._data.append("#else /* !ENABLED (JERRY_ESNEXT) */")
self.__data.append(format_code(table, 1)) self.add_range(category, categorizer.create_tables(ES5_1_WHITE_SPACE_UNITS))
self.__data.append("};") self._data.append("#endif /* ENABLED (JERRY_ESNEXT) */\n")
self.__data.append("") # for an extra empty line
def add_range(self, category, tables):
idx = 0
for table in tables:
self.add_table(table,
"/**\n * %s %s.\n */" % (self._range_table_descriptions[idx], category),
self._range_table_types[idx],
category,
self._range_table_names[idx])
idx += 1
def add_conversion_range(self, category, tables, descriptions):
self.add_named_conversion_range(category, tables, self._conversion_range_names, descriptions)
def add_named_conversion_range(self, category, tables, table_names, descriptions):
idx = 0
for table in tables:
self.add_table(table,
descriptions[idx],
self._conversion_range_types[idx],
category,
table_names[idx])
idx += 1
def add_table(self, table, description, table_type, category, table_name):
if table and sum(table) != 0:
self._data.append(description)
self._data.append("static const %s lit_unicode_%s%s%s[] JERRY_ATTR_CONST_DATA ="
% (table_type,
category.lower(),
"_" + table_name if table_name else "",
self._table_name_suffix))
self._data.append("{")
self._data.append(format_code(table, 1, 6 if self._table_name_suffix else 4))
self._data.append("};")
self._data.append("") # for an extra empty line
def generate(self): def generate(self):
with open(self.__filepath, 'w') as generated_source: with open(self._filepath, 'w') as generated_source:
generated_source.write("\n".join(self.__header)) generated_source.write("\n".join(self._header))
generated_source.write("\n".join(self.__data)) generated_source.write("\n".join(self._data))
class UnicodeCategorizer(object):
class UnicodeSupplementarySource(UnicodeBasicSource):
def __init__(self, filepath):
UnicodeBasicSource.__init__(self, filepath, "uint32_t", "uint16_t")
self._table_name_suffix = "_sup"
def add_whitepace_range(self, category, categorizer, units):
self.add_range(category, categorizer.create_tables(units))
class UnicodeBasicCategorizer(object):
def __init__(self): def __init__(self):
# unicode categories: Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs self._length_limit = 0xff
# Co Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So self.extra_id_continue_units = set([0x200C, 0x200D])
# letter: Lu Ll Lt Lm Lo Nl
# non-letter-indent-part:
# digit: Nd
# punctuation mark: Mn Mc
# connector punctuation: Pc
# separators: Zs
self._unicode_categories = {
'letters_category' : ["Lu", "Ll", "Lt", "Lm", "Lo", "Nl"],
'non_letters_category' : ["Nd", "Mn", "Mc", "Pc"],
'separators_category' : ["Zs"]
}
self._categories = { #pylint: disable=no-self-use
'letters' : [], def in_range(self, i):
'non_letters' : [], return i >= 0x80 and i < 0x10000
'separators' : []
}
def _store_by_category(self, unicode_id, category): def _group_ranges(self, units):
"""
Store the given unicode_id by its category
"""
for target_category in self._categories:
if category in self._unicode_categories[target_category + '_category']:
self._categories[target_category].append(unicode_id)
def read_categories(self, unicode_data_file):
"""
Read the corresponding unicode values and store them in category lists.
:return: List of letters, non_letter and separators.
"""
range_start_id = 0
with open(unicode_data_file) as unicode_data:
for line in csv.reader(unicode_data, delimiter=';'):
unicode_id = int(line[0], 16)
# Skip supplementary planes and ascii chars
if unicode_id >= 0x10000 or unicode_id < 128:
continue
category = line[2]
if range_start_id != 0:
while range_start_id <= unicode_id:
self._store_by_category(range_start_id, category)
range_start_id += 1
range_start_id = 0
continue
if line[1].startswith('<'):
# Save the start position of the range
range_start_id = unicode_id
self._store_by_category(unicode_id, category)
# This separator char is handled separatly
separators = self._categories['separators']
non_breaking_space = 0x00A0
if non_breaking_space in separators:
separators.remove(int(non_breaking_space))
# These separator chars are not in the unicode data file or not in Zs category
mongolian_vowel_separator = 0x180E
medium_mathematical_space = 0x205F
zero_width_space = 0x200B
if mongolian_vowel_separator not in separators:
bisect.insort(separators, int(mongolian_vowel_separator))
if medium_mathematical_space not in separators:
bisect.insort(separators, int(medium_mathematical_space))
if zero_width_space not in separators:
bisect.insort(separators, int(zero_width_space))
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.1 format-control characters
non_letters = self._categories['non_letters']
zero_width_non_joiner = 0x200C
zero_width_joiner = 0x200D
bisect.insort(non_letters, int(zero_width_non_joiner))
bisect.insort(non_letters, int(zero_width_joiner))
return self._categories['letters'], self._categories['non_letters'], self._categories['separators']
def group_ranges(i):
""" """
Convert an increasing list of integers into a range list Convert an increasing list of integers into a range list
:return: List of ranges. :return: List of ranges.
""" """
for _, group in itertools.groupby(enumerate(i), lambda q: (q[1] - q[0])): for _, group in itertools.groupby(enumerate(units), lambda q: (q[1] - q[0])):
group = list(group) group = list(group)
yield group[0][1], group[-1][1] yield group[0][1], group[-1][1]
def create_tables(self, units):
def split_list(category_list):
""" """
Split list of ranges into intervals and single char lists. Split list of ranges into intervals and single char lists.
:return: A tuple containing the following info:
:return: List of interval starting points, interval lengths and single chars - list of interval starting points
- list of interval lengths
- list of single chars
""" """
interval_sps = [] interval_sps = []
interval_lengths = [] interval_lengths = []
chars = [] chars = []
for element in category_list: for element in self._group_ranges(units):
interval_length = element[1] - element[0] interval_length = element[1] - element[0]
if interval_length == 0: if interval_length == 0:
chars.append(element[0]) chars.append(element[0])
elif interval_length > 255: elif interval_length > self._length_limit:
for i in range(element[0], element[1], 256): for i in range(element[0], element[1], self._length_limit + 1):
length = 255 if (element[1] - i > 255) else (element[1] - i) length = min(self._length_limit, element[1] - i)
interval_sps.append(i) interval_sps.append(i)
interval_lengths.append(length) interval_lengths.append(length)
else: else:
interval_sps.append(element[0]) interval_sps.append(element[0])
interval_lengths.append(element[1] - element[0]) interval_lengths.append(interval_length)
return interval_sps, interval_lengths, chars return interval_sps, interval_lengths, chars
def read_units(self, file_path, categories, subcategories=None):
"""
Read the Unicode Derived Core Properties file and extract the ranges
for the given categories.
def generate_ranges(script_args): :param file_path: Path to the Unicode "DerivedCoreProperties.txt" file.
categorizer = UnicodeCategorizer() :param categories: A list of category strings to extract from the Unicode file.
letters, non_letters, separators = categorizer.read_categories(script_args.unicode_data) :param subcategories: A list of subcategory strings to restrict categories.
:return: A dictionary each string from the :param categories: is a key and for each
key list of code points are stored.
"""
# Create a dictionary in the format: { category[0]: [ ], ..., category[N]: [ ] }
units = {}
for category in categories:
units[category] = []
letter_tables = split_list(list(group_ranges(letters))) # Formats to match:
non_letter_tables = split_list(list(group_ranges(non_letters))) # <HEX> ; <category> #
separator_tables = split_list(list(group_ranges(separators))) # <HEX>..<HEX> ; <category> # <subcategory>
matcher = r"(?P<start>[\dA-F]+)(?:\.\.(?P<end>[\dA-F]+))?\s+; (?P<category>[\w]+) # (?P<subcategory>[\w&]{2})"
c_source = UniCodeSource(RANGES_C_SOURCE) with open(file_path, "r") as src_file:
for line in src_file:
match = re.match(matcher, line)
if (match
and match.group("category") in categories
and (not subcategories or match.group("subcategory") in subcategories)):
start = int(match.group("start"), 16)
# if no "end" found use the "start"
end = int(match.group("end") or match.group("start"), 16)
matching_code_points = [
code_point for code_point in range(start, end + 1) if self.in_range(code_point)
]
units[match.group("category")].extend(matching_code_points)
return units
def read_case_mappings(self, unicode_data_file, special_casing_file):
"""
Read the corresponding unicode values of lower and upper case letters and store these in tables.
:param unicode_data_file: Contains the default case mappings (one-to-one mappings).
:param special_casing_file: Contains additional informative case mappings that are either not one-to-one
or which are context-sensitive.
:return: Upper and lower case mappings.
"""
lower_case_mapping = {}
upper_case_mapping = {}
# Add one-to-one mappings
with open(unicode_data_file) as unicode_data:
reader = csv.reader(unicode_data, delimiter=';')
for line in reader:
letter_id = int(line[0], 16)
if not self.in_range(letter_id):
continue
capital_letter = line[12]
small_letter = line[13]
if capital_letter:
upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter)
if small_letter:
lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter)
# Update the conversion tables with the special cases
with open(special_casing_file) as special_casing:
reader = csv.reader(special_casing, delimiter=';')
for line in reader:
# Skip comment sections and empty lines
if not line or line[0].startswith('#'):
continue
# Replace '#' character with empty string
for idx, fragment in enumerate(line):
if fragment.find('#') >= 0:
line[idx] = ''
letter_id = int(line[0], 16)
condition_list = line[4]
if not self.in_range(letter_id) or condition_list:
continue
small_letter = parse_unicode_sequence(line[1])
capital_letter = parse_unicode_sequence(line[3])
lower_case_mapping[letter_id] = small_letter
upper_case_mapping[letter_id] = capital_letter
return lower_case_mapping, upper_case_mapping
class UnicodeSupplementaryCategorizer(UnicodeBasicCategorizer):
def __init__(self):
UnicodeBasicCategorizer.__init__(self)
self._length_limit = 0xffff
self.extra_id_continue_units = set()
def in_range(self, i):
return i >= 0x10000
def generate_ranges(script_args, plane_type):
if plane_type == UNICODE_PLANE_TYPE_SUPPLEMENTARY:
c_source = UnicodeSupplementarySource(RANGES_SUP_C_SOURCE)
categorizer = UnicodeSupplementaryCategorizer()
else:
c_source = UnicodeBasicSource(RANGES_C_SOURCE)
categorizer = UnicodeBasicCategorizer()
header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__), header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__),
" * from %s. Do not edit! */" % os.path.basename(script_args.unicode_data), " * from %s. Do not edit! */" % os.path.basename(script_args.derived_core_properties),
""] ""]
c_source.complete_header("\n".join(header_completion)) c_source.complete_header("\n".join(header_completion))
c_source.add_table(letter_tables[0], units = categorizer.read_units(script_args.derived_core_properties, ["ID_Start", "ID_Continue"])
"unicode_letter_interval_sps",
"uint16_t",
("/**\n"
" * Character interval starting points for the unicode letters.\n"
" *\n"
" * The characters covered by these intervals are from\n"
" * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n"
" */"))
c_source.add_table(letter_tables[1], units["ID_Continue"] = sorted(set(units["ID_Continue"]).union(categorizer.extra_id_continue_units)
"unicode_letter_interval_lengths", - set(units["ID_Start"]))
"uint8_t",
("/**\n"
" * Character lengths for the unicode letters.\n"
" *\n"
" * The characters covered by these intervals are from\n"
" * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n"
" */"))
c_source.add_table(letter_tables[2], for category, unit in units.items():
"unicode_letter_chars", c_source.add_range(category, categorizer.create_tables(unit))
"uint16_t",
("/**\n"
" * Those unicode letter characters that are not inside any of\n"
" * the intervals specified in lit_unicode_letter_interval_sps array.\n"
" *\n"
" * The characters are from the following Unicode categories:\n"
" * Lu, Ll, Lt, Lm, Lo, Nl\n"
" */"))
c_source.add_table(non_letter_tables[0], white_space_units = categorizer.read_units(script_args.prop_list, ["White_Space"], ["Zs"])["White_Space"]
"unicode_non_letter_ident_part_interval_sps",
"uint16_t",
("/**\n"
" * Character interval starting points for non-letter character\n"
" * that can be used as a non-first character of an identifier.\n"
" *\n"
" * The characters covered by these intervals are from\n"
" * the following Unicode categories: Nd, Mn, Mc, Pc\n"
" */"))
c_source.add_table(non_letter_tables[1], c_source.add_whitepace_range("White_Space", categorizer, white_space_units)
"unicode_non_letter_ident_part_interval_lengths",
"uint8_t",
("/**\n"
" * Character interval lengths for non-letter character\n"
" * that can be used as a non-first character of an identifier.\n"
" *\n"
" * The characters covered by these intervals are from\n"
" * the following Unicode categories: Nd, Mn, Mc, Pc\n"
" */"))
c_source.add_table(non_letter_tables[2],
"unicode_non_letter_ident_part_chars",
"uint16_t",
("/**\n"
" * Those non-letter characters that can be used as a non-first\n"
" * character of an identifier and not included in any of the intervals\n"
" * specified in lit_unicode_non_letter_ident_part_interval_sps array.\n"
" *\n"
" * The characters are from the following Unicode categories:\n"
" * Nd, Mn, Mc, Pc\n"
" */"))
c_source.add_table(separator_tables[0],
"unicode_separator_char_interval_sps",
"uint16_t",
("/**\n"
" * Unicode separator character interval starting points from Unicode category: Zs\n"
" */"))
c_source.add_table(separator_tables[1],
"unicode_separator_char_interval_lengths",
"uint8_t",
("/**\n"
" * Unicode separator character interval lengths from Unicode category: Zs\n"
" */"))
c_source.add_table(separator_tables[2],
"unicode_separator_chars",
"uint16_t",
("/**\n"
" * Unicode separator characters that are not in the\n"
" * lit_unicode_separator_char_intervals array.\n"
" *\n"
" * Unicode category: Zs\n"
" */"))
c_source.generate() c_source.generate()
@@ -320,70 +338,6 @@ def parse_unicode_sequence(raw_data):
return result return result
def read_case_mappings(unicode_data_file, special_casing_file):
"""
Read the corresponding unicode values of lower and upper case letters and store these in tables.
:param unicode_data_file: Contains the default case mappings (one-to-one mappings).
:param special_casing_file: Contains additional informative case mappings that are either not one-to-one
or which are context-sensitive.
:return: Upper and lower case mappings.
"""
lower_case_mapping = {}
upper_case_mapping = {}
# Add one-to-one mappings
with open(unicode_data_file) as unicode_data:
unicode_data_reader = csv.reader(unicode_data, delimiter=';')
for line in unicode_data_reader:
letter_id = int(line[0], 16)
# Skip supplementary planes and ascii chars
if letter_id >= 0x10000 or letter_id < 128:
continue
capital_letter = line[12]
small_letter = line[13]
if capital_letter:
upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter)
if small_letter:
lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter)
# Update the conversion tables with the special cases
with open(special_casing_file) as special_casing:
special_casing_reader = csv.reader(special_casing, delimiter=';')
for line in special_casing_reader:
# Skip comment sections and empty lines
if not line or line[0].startswith('#'):
continue
# Replace '#' character with empty string
for idx, i in enumerate(line):
if i.find('#') >= 0:
line[idx] = ''
letter_id = int(line[0], 16)
condition_list = line[4]
# Skip supplementary planes, ascii chars, and condition_list
if letter_id >= 0x10000 or letter_id < 128 or condition_list:
continue
small_letter = parse_unicode_sequence(line[1])
capital_letter = parse_unicode_sequence(line[3])
lower_case_mapping[letter_id] = small_letter
upper_case_mapping[letter_id] = capital_letter
return lower_case_mapping, upper_case_mapping
def extract_ranges(letter_case, reverse_letter_case=None): def extract_ranges(letter_case, reverse_letter_case=None):
""" """
Extract ranges from case mappings Extract ranges from case mappings
@@ -675,27 +629,13 @@ def calculate_conversion_distance(letter_case, letter_id):
return ord(letter_case[letter_id]) - letter_id return ord(letter_case[letter_id]) - letter_id
def generate_conversions(script_args): def generate_conversions(script_args, plane_type):
# Read the corresponding unicode values of lower and upper case letters and store these in tables if plane_type == UNICODE_PLANE_TYPE_SUPPLEMENTARY:
case_mappings = read_case_mappings(script_args.unicode_data, script_args.special_casing) c_source = UnicodeSupplementarySource(CONVERSIONS_SUP_C_SOURCE)
lower_case = case_mappings[0] categorizer = UnicodeSupplementaryCategorizer()
upper_case = case_mappings[1] else:
c_source = UnicodeBasicSource(CONVERSIONS_C_SOURCE)
character_case_ranges = extract_ranges(lower_case, upper_case) categorizer = UnicodeBasicCategorizer()
character_pair_ranges = extract_character_pair_ranges(lower_case, upper_case)
character_pairs = extract_character_pairs(lower_case, upper_case)
upper_case_special_ranges = extract_special_ranges(upper_case)
lower_case_ranges = extract_ranges(lower_case)
lower_case_conversions = extract_conversions(lower_case)
upper_case_conversions = extract_conversions(upper_case)
if lower_case:
warnings.warn('Not all elements extracted from the lowercase table!')
if upper_case:
warnings.warn('Not all elements extracted from the uppercase table!')
# Generate conversions output
c_source = UniCodeSource(CONVERSIONS_C_SOURCE)
unicode_file = os.path.basename(script_args.unicode_data) unicode_file = os.path.basename(script_args.unicode_data)
spec_casing_file = os.path.basename(script_args.special_casing) spec_casing_file = os.path.basename(script_args.special_casing)
@@ -706,75 +646,58 @@ def generate_conversions(script_args):
c_source.complete_header("\n".join(header_completion)) c_source.complete_header("\n".join(header_completion))
c_source.add_table(character_case_ranges[0], # Read the corresponding unicode values of lower and upper case letters and store these in tables
"character_case_ranges", lower_case, upper_case = categorizer.read_case_mappings(script_args.unicode_data, script_args.special_casing)
"uint16_t",
("/* Contains start points of character case ranges "
"(these are bidirectional conversions). */"))
c_source.add_table(character_case_ranges[1], c_source.add_conversion_range("character_case",
"character_case_range_lengths", extract_ranges(lower_case, upper_case),
"uint8_t", [("/* Contains start points of character case ranges "
"/* Interval lengths of start points in `character_case_ranges` table. */") "(these are bidirectional conversions). */"),
"/* Interval lengths of start points in `character_case_ranges` table. */"])
c_source.add_conversion_range("character_pair",
extract_character_pair_ranges(lower_case, upper_case),
["/* Contains the start points of bidirectional conversion ranges. */",
"/* Interval lengths of start points in `character_pair_ranges` table. */"])
c_source.add_table(character_pair_ranges[0], c_source.add_table(extract_character_pairs(lower_case, upper_case),
"character_pair_ranges", "/* Contains lower/upper case bidirectional conversion pairs. */",
"uint16_t", c_source.character_type,
"/* Contains the start points of bidirectional conversion ranges. */")
c_source.add_table(character_pair_ranges[1],
"character_pair_range_lengths",
"uint8_t",
"/* Interval lengths of start points in `character_pair_ranges` table. */")
c_source.add_table(character_pairs,
"character_pairs", "character_pairs",
"uint16_t", "")
"/* Contains lower/upper case bidirectional conversion pairs. */")
c_source.add_table(upper_case_special_ranges[0], c_source.add_conversion_range("upper_case_special",
"upper_case_special_ranges", extract_special_ranges(upper_case),
"uint16_t", [("/* Contains start points of one-to-two uppercase ranges where the "
("/* Contains start points of one-to-two uppercase ranges where the second character\n" "second character\n"
" * is always the same.\n" " * is always the same.\n"
" */")) " */"),
"/* Interval lengths for start points in `upper_case_special_ranges` table. */"])
c_source.add_table(upper_case_special_ranges[1], c_source.add_conversion_range("lower_case",
"upper_case_special_range_lengths", extract_ranges(lower_case),
"uint8_t", ["/* Contains start points of lowercase ranges. */",
"/* Interval lengths for start points in `upper_case_special_ranges` table. */") "/* Interval lengths for start points in `lower_case_ranges` table. */"])
c_source.add_table(lower_case_ranges[0], c_source.add_named_conversion_range("lower_case",
"lower_case_ranges", extract_conversions(lower_case),
"uint16_t", ["conversions", "conversion_counters"],
"/* Contains start points of lowercase ranges. */") [("/* The remaining lowercase conversions. The lowercase variant can "
"be one-to-three character long. */"),
("/* Number of one-to-one, one-to-two, and one-to-three lowercase "
"conversions. */")])
c_source.add_table(lower_case_ranges[1], c_source.add_named_conversion_range("upper_case",
"lower_case_range_lengths", extract_conversions(upper_case),
"uint8_t", ["conversions", "conversion_counters"],
"/* Interval lengths for start points in `lower_case_ranges` table. */") [("/* The remaining uppercase conversions. The uppercase variant can "
"be one-to-three character long. */"),
("/* Number of one-to-one, one-to-two, and one-to-three uppercase "
"conversions. */")])
c_source.add_table(lower_case_conversions[0], if lower_case:
"lower_case_conversions", warnings.warn('Not all elements extracted from the lowercase table!')
"uint16_t", if upper_case:
("/* The remaining lowercase conversions. The lowercase variant can " warnings.warn('Not all elements extracted from the uppercase table!')
"be one-to-three character long. */"))
c_source.add_table(lower_case_conversions[1],
"lower_case_conversion_counters",
"uint8_t",
"/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */")
c_source.add_table(upper_case_conversions[0],
"upper_case_conversions",
"uint16_t",
("/* The remaining uppercase conversions. The uppercase variant can "
"be one-to-three character long. */"))
c_source.add_table(upper_case_conversions[1],
"upper_case_conversion_counters",
"uint8_t",
"/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */")
c_source.generate() c_source.generate()
@@ -783,29 +706,37 @@ def generate_conversions(script_args):
def main(): def main():
parser = argparse.ArgumentParser(description='lit-unicode-{conversions,ranges}.inc.h generator', parser = argparse.ArgumentParser(description='lit-unicode-{conversions,ranges}-{sup}.inc.h generator',
epilog=''' epilog='''
The input files (UnicodeData.txt, SpecialCasing.txt) The input files:
- UnicodeData.txt
- SpecialCasing.txt
- DerivedCoreProperties.txt
- PropList.txt
must be retrieved from must be retrieved from
http://www.unicode.org/Public/<VERSION>/ucd/. http://www.unicode.org/Public/<VERSION>/ucd/.
The last known good version is 13.0.0. The last known good version is 13.0.0.
''') ''')
def check_file(path):
if not os.path.isfile(path) or not os.access(path, os.R_OK):
raise argparse.ArgumentTypeError('The %s file is missing or not readable!' % path)
return path
parser.add_argument('--unicode-data', metavar='FILE', action='store', required=True, parser.add_argument('--unicode-data', metavar='FILE', action='store', required=True,
help='specify the unicode data file') type=check_file, help='specify the unicode data file')
parser.add_argument('--special-casing', metavar='FILE', action='store', required=True, parser.add_argument('--special-casing', metavar='FILE', action='store', required=True,
help='specify the special casing file') type=check_file, help='specify the special casing file')
parser.add_argument('--prop-list', metavar='FILE', action='store', required=True,
type=check_file, help='specify the prop list file')
parser.add_argument('--derived-core-properties', metavar='FILE', action='store', required=True,
type=check_file, help='specify the DerivedCodeProperties file')
script_args = parser.parse_args() script_args = parser.parse_args()
if not os.path.isfile(script_args.unicode_data) or not os.access(script_args.unicode_data, os.R_OK): generate_ranges(script_args, UNICODE_PLANE_TYPE_BASIC)
parser.error('The %s file is missing or not readable!' % script_args.unicode_data) generate_ranges(script_args, UNICODE_PLANE_TYPE_SUPPLEMENTARY)
generate_conversions(script_args, UNICODE_PLANE_TYPE_BASIC)
if not os.path.isfile(script_args.special_casing) or not os.access(script_args.special_casing, os.R_OK): generate_conversions(script_args, UNICODE_PLANE_TYPE_SUPPLEMENTARY)
parser.error('The %s file is missing or not readable!' % script_args.special_casing)
generate_ranges(script_args)
generate_conversions(script_args)
if __name__ == "__main__": if __name__ == "__main__":