Support Unicode supplementary planes (#3928)
JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
@@ -2605,6 +2605,19 @@ ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, /**< string buil
|
|||||||
memcpy (dest_p, data_p, data_size);
|
memcpy (dest_p, data_p, data_size);
|
||||||
} /* ecma_stringbuilder_append_raw */
|
} /* ecma_stringbuilder_append_raw */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a codepoint to a string builder
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, /**< string builder */
|
||||||
|
lit_code_point_t cp) /**< code point */
|
||||||
|
{
|
||||||
|
const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (cp);
|
||||||
|
lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
|
||||||
|
|
||||||
|
lit_code_point_to_cesu8_bytes (dest_p, cp);
|
||||||
|
} /* ecma_stringbuilder_append_codepoint */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append an ecma_char_t to a string builder
|
* Append an ecma_char_t to a string builder
|
||||||
*/
|
*/
|
||||||
@@ -2612,10 +2625,7 @@ void
|
|||||||
ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
|
ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
|
||||||
const ecma_char_t c) /**< ecma char */
|
const ecma_char_t c) /**< ecma char */
|
||||||
{
|
{
|
||||||
const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
|
ecma_stringbuilder_append_codepoint (builder_p, c);
|
||||||
lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
|
|
||||||
|
|
||||||
lit_code_point_to_cesu8_bytes (dest_p, c);
|
|
||||||
} /* ecma_stringbuilder_append_char */
|
} /* ecma_stringbuilder_append_char */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -393,6 +393,7 @@ void ecma_stringbuilder_append_magic (ecma_stringbuilder_t *builder_p, const lit
|
|||||||
void ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p,
|
void ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p,
|
||||||
const lit_utf8_byte_t *data_p,
|
const lit_utf8_byte_t *data_p,
|
||||||
const lit_utf8_size_t data_size);
|
const lit_utf8_size_t data_size);
|
||||||
|
void ecma_stringbuilder_append_codepoint (ecma_stringbuilder_t *builder_p, lit_code_point_t cp);
|
||||||
void ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, const ecma_char_t c);
|
void ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, const ecma_char_t c);
|
||||||
void ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, const lit_utf8_byte_t);
|
void ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, const lit_utf8_byte_t);
|
||||||
ecma_string_t *ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p);
|
ecma_string_t *ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p);
|
||||||
|
|||||||
@@ -988,96 +988,42 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
|
|||||||
bool lower_case) /**< convert to lower (true)
|
bool lower_case) /**< convert to lower (true)
|
||||||
* or upper (false) case */
|
* or upper (false) case */
|
||||||
{
|
{
|
||||||
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
|
ecma_stringbuilder_t builder = ecma_stringbuilder_create ();
|
||||||
|
|
||||||
/* 3. */
|
|
||||||
ECMA_STRING_TO_UTF8_STRING (input_string_p, input_start_p, input_start_size);
|
ECMA_STRING_TO_UTF8_STRING (input_string_p, input_start_p, input_start_size);
|
||||||
|
|
||||||
/*
|
const lit_utf8_byte_t *input_curr_p = input_start_p;
|
||||||
* The URI encoding has two major phases: first we compute
|
|
||||||
* the length of the lower case string, then we encode it.
|
|
||||||
*/
|
|
||||||
|
|
||||||
lit_utf8_size_t output_length = 0;
|
|
||||||
const lit_utf8_byte_t *input_str_curr_p = input_start_p;
|
|
||||||
const lit_utf8_byte_t *input_str_end_p = input_start_p + input_start_size;
|
const lit_utf8_byte_t *input_str_end_p = input_start_p + input_start_size;
|
||||||
|
|
||||||
while (input_str_curr_p < input_str_end_p)
|
while (input_curr_p < input_str_end_p)
|
||||||
{
|
{
|
||||||
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
|
lit_code_point_t cp = lit_cesu8_read_next (&input_curr_p);
|
||||||
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
|
||||||
ecma_length_t character_length;
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
lit_utf8_byte_t utf8_byte_buffer[LIT_CESU8_MAX_BYTES_IN_CODE_POINT];
|
if (lit_is_code_point_utf16_high_surrogate (cp))
|
||||||
|
{
|
||||||
|
const ecma_char_t next_ch = lit_cesu8_peek_next (input_curr_p);
|
||||||
|
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||||
|
{
|
||||||
|
cp = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) cp, next_ch);
|
||||||
|
input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
|
||||||
if (lower_case)
|
if (lower_case)
|
||||||
{
|
{
|
||||||
character_length = lit_char_to_lower_case (character,
|
lit_char_to_lower_case (cp, &builder);
|
||||||
character_buffer,
|
|
||||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
character_length = lit_char_to_upper_case (character,
|
lit_char_to_upper_case (cp, &builder);
|
||||||
character_buffer,
|
|
||||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
}
|
|
||||||
|
|
||||||
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
|
|
||||||
for (ecma_length_t i = 0; i < character_length; i++)
|
|
||||||
{
|
|
||||||
output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Second phase. */
|
|
||||||
|
|
||||||
JMEM_DEFINE_LOCAL_ARRAY (output_start_p,
|
|
||||||
output_length,
|
|
||||||
lit_utf8_byte_t);
|
|
||||||
|
|
||||||
lit_utf8_byte_t *output_char_p = output_start_p;
|
|
||||||
|
|
||||||
/* Encoding the output. */
|
|
||||||
input_str_curr_p = input_start_p;
|
|
||||||
|
|
||||||
while (input_str_curr_p < input_str_end_p)
|
|
||||||
{
|
|
||||||
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
|
|
||||||
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
|
||||||
ecma_length_t character_length;
|
|
||||||
|
|
||||||
if (lower_case)
|
|
||||||
{
|
|
||||||
character_length = lit_char_to_lower_case (character,
|
|
||||||
character_buffer,
|
|
||||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
character_length = lit_char_to_upper_case (character,
|
|
||||||
character_buffer,
|
|
||||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
}
|
|
||||||
|
|
||||||
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
|
|
||||||
for (ecma_length_t i = 0; i < character_length; i++)
|
|
||||||
{
|
|
||||||
output_char_p += lit_code_unit_to_utf8 (character_buffer[i], output_char_p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
JERRY_ASSERT (output_start_p + output_length == output_char_p);
|
|
||||||
|
|
||||||
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
|
|
||||||
|
|
||||||
ret_value = ecma_make_string_value (output_string_p);
|
|
||||||
|
|
||||||
JMEM_FINALIZE_LOCAL_ARRAY (output_start_p);
|
|
||||||
ECMA_FINALIZE_UTF8_STRING (input_start_p, input_start_size);
|
ECMA_FINALIZE_UTF8_STRING (input_start_p, input_start_size);
|
||||||
|
|
||||||
return ret_value;
|
return ecma_make_string_value (ecma_stringbuilder_finalize (&builder));
|
||||||
} /* ecma_builtin_string_prototype_object_conversion_helper */
|
} /* ecma_builtin_string_prototype_object_conversion_helper */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -413,23 +413,13 @@ ecma_regexp_canonicalize_char (lit_code_point_t ch, /**< character */
|
|||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ENABLED (JERRY_ESNEXT)
|
lit_code_point_t cu = lit_char_to_upper_case (ch, NULL);
|
||||||
/* TODO: Implement case folding for code points in the upper planes. */
|
|
||||||
if (JERRY_UNLIKELY (ch > LIT_UTF16_CODE_UNIT_MAX))
|
|
||||||
{
|
|
||||||
return ch;
|
|
||||||
}
|
|
||||||
#endif /* ENABLED (JERRY_ESNEXT) */
|
|
||||||
|
|
||||||
ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
if (cu == LIT_MULTIPLE_CU)
|
||||||
const ecma_length_t size = lit_char_to_upper_case ((ecma_char_t) ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
|
|
||||||
if (size != 1)
|
|
||||||
{
|
{
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ecma_char_t cu = u[0];
|
|
||||||
if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode)
|
if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode)
|
||||||
{
|
{
|
||||||
/* 6. */
|
/* 6. */
|
||||||
|
|||||||
+341
-259
@@ -14,12 +14,15 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include "ecma-helpers.h"
|
||||||
#include "lit-char-helpers.h"
|
#include "lit-char-helpers.h"
|
||||||
#include "lit-unicode-ranges.inc.h"
|
#include "lit-unicode-ranges.inc.h"
|
||||||
|
#include "lit-unicode-ranges-sup.inc.h"
|
||||||
#include "lit-strings.h"
|
#include "lit-strings.h"
|
||||||
|
|
||||||
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
|
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
|
||||||
#include "lit-unicode-conversions.inc.h"
|
#include "lit-unicode-conversions.inc.h"
|
||||||
|
#include "lit-unicode-conversions-sup.inc.h"
|
||||||
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
||||||
|
|
||||||
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
|
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
|
||||||
@@ -31,36 +34,43 @@
|
|||||||
* @return true - if the character is in the given array
|
* @return true - if the character is in the given array
|
||||||
* false - otherwise
|
* false - otherwise
|
||||||
*/
|
*/
|
||||||
static bool
|
#define LIT_SEARCH_CHAR_IN_ARRAY_FN(function_name, char_type, array_type) \
|
||||||
search_char_in_char_array (ecma_char_t c, /**< code unit */
|
static bool \
|
||||||
const ecma_char_t *array, /**< array */
|
function_name (char_type c, /**< code unit */ \
|
||||||
int size_of_array) /**< length of the array */
|
const array_type *array, /**< array */ \
|
||||||
{
|
int size_of_array) /**< length of the array */\
|
||||||
int bottom = 0;
|
{ \
|
||||||
int top = size_of_array - 1;
|
int bottom = 0; \
|
||||||
|
int top = size_of_array - 1; \
|
||||||
|
\
|
||||||
|
while (bottom <= top) \
|
||||||
|
{ \
|
||||||
|
int middle = (bottom + top) / 2; \
|
||||||
|
char_type current = array[middle]; \
|
||||||
|
\
|
||||||
|
if (current == c) \
|
||||||
|
{ \
|
||||||
|
return true; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if (c < current) \
|
||||||
|
{ \
|
||||||
|
top = middle - 1; \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
bottom = middle + 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
return false; \
|
||||||
|
} /* __function_name */
|
||||||
|
|
||||||
while (bottom <= top)
|
LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_char_in_array, ecma_char_t, uint16_t)
|
||||||
{
|
|
||||||
int middle = (bottom + top) / 2;
|
|
||||||
ecma_char_t current = array[middle];
|
|
||||||
|
|
||||||
if (current == c)
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
{
|
LIT_SEARCH_CHAR_IN_ARRAY_FN (lit_search_codepoint_in_array, lit_code_point_t, uint32_t)
|
||||||
return true;
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
}
|
|
||||||
|
|
||||||
if (c < current)
|
|
||||||
{
|
|
||||||
top = middle - 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
bottom = middle + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* search_char_in_char_array */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Binary search algorithm that searches a character in the given intervals.
|
* Binary search algorithm that searches a character in the given intervals.
|
||||||
@@ -70,37 +80,44 @@ search_char_in_char_array (ecma_char_t c, /**< code unit */
|
|||||||
* @return true - if the the character is included (inclusively) in one of the intervals in the given array
|
* @return true - if the the character is included (inclusively) in one of the intervals in the given array
|
||||||
* false - otherwise
|
* false - otherwise
|
||||||
*/
|
*/
|
||||||
static bool
|
#define LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN(function_name, char_type, array_type, interval_type) \
|
||||||
search_char_in_interval_array (ecma_char_t c, /**< code unit */
|
static bool \
|
||||||
const ecma_char_t *array_sp, /**< array of interval starting points */
|
function_name (char_type c, /**< code unit */ \
|
||||||
const uint8_t *lengths, /**< array of interval lengths */
|
const array_type *array_sp, /**< array of interval starting points */ \
|
||||||
int size_of_array) /**< length of the array */
|
const interval_type *lengths, /**< array of interval lengths */ \
|
||||||
{
|
int size_of_array) /**< length of the array */ \
|
||||||
int bottom = 0;
|
{ \
|
||||||
int top = size_of_array - 1;
|
int bottom = 0; \
|
||||||
|
int top = size_of_array - 1; \
|
||||||
|
\
|
||||||
|
while (bottom <= top) \
|
||||||
|
{ \
|
||||||
|
int middle = (bottom + top) / 2; \
|
||||||
|
char_type current_sp = array_sp[middle]; \
|
||||||
|
\
|
||||||
|
if (current_sp <= c && c <= current_sp + lengths[middle]) \
|
||||||
|
{ \
|
||||||
|
return true; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if (c > current_sp) \
|
||||||
|
{ \
|
||||||
|
bottom = middle + 1; \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
top = middle - 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
return false; \
|
||||||
|
} /* function_name */
|
||||||
|
|
||||||
while (bottom <= top)
|
LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_char_in_interval_array, ecma_char_t, uint16_t, uint8_t)
|
||||||
{
|
|
||||||
int middle = (bottom + top) / 2;
|
|
||||||
ecma_char_t current_sp = array_sp[middle];
|
|
||||||
|
|
||||||
if (current_sp <= c && c <= current_sp + lengths[middle])
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
{
|
LIT_SEARCH_CHAR_IN_INTERVAL_ARRAY_FN (lit_search_codepoint_in_interval_array, lit_code_point_t, uint32_t, uint16_t)
|
||||||
return true;
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
}
|
|
||||||
|
|
||||||
if (c > current_sp)
|
|
||||||
{
|
|
||||||
bottom = middle + 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
top = middle - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* search_char_in_interval_array */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is one of the Whitespace characters including those that fall into
|
* Check if specified character is one of the Whitespace characters including those that fall into
|
||||||
@@ -116,20 +133,18 @@ lit_char_is_white_space (lit_code_point_t c) /**< code point */
|
|||||||
{
|
{
|
||||||
return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
|
return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
|
if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (c <= LIT_UTF16_CODE_UNIT_MAX
|
return (c <= LIT_UTF16_CODE_UNIT_MAX
|
||||||
&& ((c >= lit_unicode_separator_char_interval_sps[0]
|
&& ((c >= lit_unicode_white_space_interval_starts[0]
|
||||||
&& c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
|
&& c < lit_unicode_white_space_interval_starts[0] + lit_unicode_white_space_interval_lengths[0])
|
||||||
|| search_char_in_char_array ((ecma_char_t) c,
|
|| lit_search_char_in_array ((ecma_char_t) c,
|
||||||
lit_unicode_separator_chars,
|
lit_unicode_white_space_chars,
|
||||||
NUM_OF_ELEMENTS (lit_unicode_separator_chars))));
|
NUM_OF_ELEMENTS (lit_unicode_white_space_chars))));
|
||||||
}
|
|
||||||
} /* lit_char_is_white_space */
|
} /* lit_char_is_white_space */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -148,58 +163,84 @@ lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
|
|||||||
} /* lit_char_is_line_terminator */
|
} /* lit_char_is_line_terminator */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is a unicode letter
|
* Check if specified character is a Unicode ID_Start
|
||||||
*
|
|
||||||
* Note:
|
|
||||||
* Unicode letter is a character, included into one of the following categories:
|
|
||||||
* - Uppercase letter (Lu);
|
|
||||||
* - Lowercase letter (Ll);
|
|
||||||
* - Titlecase letter (Lt);
|
|
||||||
* - Modifier letter (Lm);
|
|
||||||
* - Other letter (Lo);
|
|
||||||
* - Letter number (Nl).
|
|
||||||
*
|
*
|
||||||
* See also:
|
* See also:
|
||||||
* ECMA-262 v5, 7.6
|
* ECMA-262 v1, 11.6: UnicodeIDStart
|
||||||
*
|
*
|
||||||
* @return true - if specified character falls into one of the listed categories,
|
* @return true - if the codepoint has Unicode property "ID_Start"
|
||||||
* false - otherwise
|
* false - otherwise
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
|
lit_char_is_unicode_id_start (lit_code_point_t code_point) /**< code unit */
|
||||||
{
|
{
|
||||||
return (search_char_in_interval_array (c,
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
lit_unicode_letter_interval_sps,
|
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
|
||||||
lit_unicode_letter_interval_lengths,
|
{
|
||||||
NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps))
|
return (lit_search_codepoint_in_interval_array (code_point,
|
||||||
|| search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars)));
|
lit_unicode_id_start_interval_starts_sup,
|
||||||
} /* lit_char_is_unicode_letter */
|
lit_unicode_id_start_interval_lengths_sup,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts_sup))
|
||||||
|
|| lit_search_codepoint_in_array (code_point,
|
||||||
|
lit_unicode_id_start_chars_sup,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_start_chars_sup)));
|
||||||
|
}
|
||||||
|
#else /* !ENABLED (JERRY_ESNEXT) */
|
||||||
|
JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
|
||||||
|
ecma_char_t c = (ecma_char_t) code_point;
|
||||||
|
|
||||||
|
return (lit_search_char_in_interval_array (c,
|
||||||
|
lit_unicode_id_start_interval_starts,
|
||||||
|
lit_unicode_id_start_interval_lengths,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_start_interval_starts))
|
||||||
|
|| lit_search_char_in_array (c, lit_unicode_id_start_chars, NUM_OF_ELEMENTS (lit_unicode_id_start_chars)));
|
||||||
|
} /* lit_char_is_unicode_id_start */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is a non-letter character and can be used as a
|
* Check if specified character is a Unicode ID_Continue
|
||||||
* non-first character of an identifier.
|
|
||||||
* These characters coverd by the following unicode categories:
|
|
||||||
* - digit (Nd)
|
|
||||||
* - punctuation mark (Mn, Mc)
|
|
||||||
* - connector punctuation (Pc)
|
|
||||||
*
|
*
|
||||||
* See also:
|
* See also:
|
||||||
* ECMA-262 v5, 7.6
|
* ECMA-262 v1, 11.6: UnicodeIDContinue
|
||||||
*
|
*
|
||||||
* @return true - if specified character falls into one of the listed categories,
|
* @return true - if the codepoint has Unicode property "ID_Continue"
|
||||||
* false - otherwise
|
* false - otherwise
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
|
lit_char_is_unicode_id_continue (lit_code_point_t code_point) /**< code unit */
|
||||||
{
|
{
|
||||||
return (search_char_in_interval_array (c,
|
/* Each ID_Start codepoint is ID_Continue as well. */
|
||||||
lit_unicode_non_letter_ident_part_interval_sps,
|
if (lit_char_is_unicode_id_start (code_point))
|
||||||
lit_unicode_non_letter_ident_part_interval_lengths,
|
{
|
||||||
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps))
|
return true;
|
||||||
|| search_char_in_char_array (c,
|
}
|
||||||
lit_unicode_non_letter_ident_part_chars,
|
|
||||||
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
} /* lit_char_is_unicode_non_letter_ident_part */
|
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN))
|
||||||
|
{
|
||||||
|
return (lit_search_codepoint_in_interval_array (code_point,
|
||||||
|
lit_unicode_id_continue_interval_starts_sup,
|
||||||
|
lit_unicode_id_continue_interval_lengths_sup,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts_sup))
|
||||||
|
|| lit_search_codepoint_in_array (code_point,
|
||||||
|
lit_unicode_id_continue_chars_sup,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_continue_chars_sup)));
|
||||||
|
}
|
||||||
|
#else /* !ENABLED (JERRY_ESNEXT) */
|
||||||
|
JERRY_ASSERT (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
|
||||||
|
ecma_char_t c = (ecma_char_t) code_point;
|
||||||
|
|
||||||
|
return (lit_search_char_in_interval_array (c,
|
||||||
|
lit_unicode_id_continue_interval_starts,
|
||||||
|
lit_unicode_id_continue_interval_lengths,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_continue_interval_starts))
|
||||||
|
|| lit_search_char_in_array (c,
|
||||||
|
lit_unicode_id_continue_chars,
|
||||||
|
NUM_OF_ELEMENTS (lit_unicode_id_continue_chars)));
|
||||||
|
} /* lit_char_is_unicode_id_continue */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether the character is a valid identifier start.
|
* Checks whether the character is a valid identifier start.
|
||||||
@@ -218,17 +259,7 @@ lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point
|
|||||||
|| code_point == LIT_CHAR_UNDERSCORE);
|
|| code_point == LIT_CHAR_UNDERSCORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ENABLED (JERRY_ESNEXT)
|
return lit_char_is_unicode_id_start (code_point);
|
||||||
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
|
|
||||||
{
|
|
||||||
/* TODO: detect these ranges correctly. */
|
|
||||||
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
|
||||||
}
|
|
||||||
#else /* !ENABLED (JERRY_ESNEXT) */
|
|
||||||
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
|
||||||
#endif /* ENABLED (JERRY_ESNEXT) */
|
|
||||||
|
|
||||||
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
|
|
||||||
} /* lit_code_point_is_identifier_start */
|
} /* lit_code_point_is_identifier_start */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -249,18 +280,7 @@ lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point
|
|||||||
|| code_point == LIT_CHAR_UNDERSCORE);
|
|| code_point == LIT_CHAR_UNDERSCORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ENABLED (JERRY_ESNEXT)
|
return lit_char_is_unicode_id_continue (code_point);
|
||||||
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
|
|
||||||
{
|
|
||||||
/* TODO: detect these ranges correctly. */
|
|
||||||
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
|
||||||
}
|
|
||||||
#else /* !ENABLED (JERRY_ESNEXT) */
|
|
||||||
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
|
||||||
#endif /* ENABLED (JERRY_ESNEXT) */
|
|
||||||
|
|
||||||
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
|
|
||||||
|| lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
|
|
||||||
} /* lit_code_point_is_identifier_part */
|
} /* lit_code_point_is_identifier_part */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -519,16 +539,27 @@ lit_char_is_word_char (lit_code_point_t c) /**< code point */
|
|||||||
/**
|
/**
|
||||||
* Check if the specified character is in one of those tables which contain bidirectional conversions.
|
* Check if the specified character is in one of those tables which contain bidirectional conversions.
|
||||||
*
|
*
|
||||||
* @return the mapped character sequence of an ecma character, if it's in the table.
|
* @return codepoint of the converted character if it is found the the tables
|
||||||
* 0 - otherwise.
|
* LIT_INVALID_CP - otherwise.
|
||||||
*/
|
*/
|
||||||
static ecma_length_t
|
static lit_code_point_t
|
||||||
search_in_bidirectional_conversion_tables (ecma_char_t character, /**< code unit */
|
lit_search_in_bidirectional_conversion_tables (lit_code_point_t cp, /**< code point */
|
||||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
|
||||||
bool is_lowercase) /**< is lowercase conversion */
|
bool is_lowercase) /**< is lowercase conversion */
|
||||||
{
|
{
|
||||||
/* 1, Check if the specified character is part of the lit_character_case_ranges table. */
|
/* 1, Check if the specified character is part of the lit_unicode_character_case_ranges_{sup} table. */
|
||||||
int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges);
|
int number_of_case_ranges;
|
||||||
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
|
bool is_supplementary = cp > LIT_UTF16_CODE_UNIT_MAX;
|
||||||
|
if (is_supplementary)
|
||||||
|
{
|
||||||
|
number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges_sup);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
{
|
||||||
|
number_of_case_ranges = NUM_OF_ELEMENTS (lit_unicode_character_case_ranges);
|
||||||
|
}
|
||||||
|
|
||||||
int conv_counter = 0;
|
int conv_counter = 0;
|
||||||
|
|
||||||
for (int i = 0; i < number_of_case_ranges; i++)
|
for (int i = 0; i < number_of_case_ranges; i++)
|
||||||
@@ -538,54 +569,92 @@ search_in_bidirectional_conversion_tables (ecma_char_t character, /**< co
|
|||||||
conv_counter++;
|
conv_counter++;
|
||||||
}
|
}
|
||||||
|
|
||||||
int range_length = lit_character_case_range_lengths[conv_counter];
|
size_t range_length;
|
||||||
ecma_char_t start_point = lit_character_case_ranges[i];
|
lit_code_point_t start_point;
|
||||||
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
|
if (is_supplementary)
|
||||||
|
{
|
||||||
|
range_length = lit_unicode_character_case_range_lengths_sup[conv_counter];
|
||||||
|
start_point = lit_unicode_character_case_ranges_sup[i];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
{
|
||||||
|
range_length = lit_unicode_character_case_range_lengths[conv_counter];
|
||||||
|
start_point = lit_unicode_character_case_ranges[i];
|
||||||
|
}
|
||||||
|
|
||||||
if (start_point > character || character >= start_point + range_length)
|
if (start_point > cp || cp >= start_point + range_length)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int char_dist = character - start_point;
|
uint32_t char_dist = (uint32_t) cp - start_point;
|
||||||
|
int offset;
|
||||||
if (i % 2 == 0)
|
if (i % 2 == 0)
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character;
|
if (!is_lowercase)
|
||||||
|
{
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = i + 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist);
|
if (is_lowercase)
|
||||||
|
{
|
||||||
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
offset = i - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
|
if (is_supplementary)
|
||||||
|
{
|
||||||
|
start_point = lit_unicode_character_case_ranges_sup[offset];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
{
|
||||||
|
start_point = lit_unicode_character_case_ranges[offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
return (lit_code_point_t) (start_point + char_dist);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Note: After this point based on the latest unicode standard(13.0.0.6) no conversion characters are
|
||||||
|
defined for supplementary planes */
|
||||||
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
|
if (is_supplementary)
|
||||||
|
{
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
|
||||||
/* 2, Check if the specified character is part of the character_pair_ranges table. */
|
/* 2, Check if the specified character is part of the character_pair_ranges table. */
|
||||||
int bottom = 0;
|
int bottom = 0;
|
||||||
int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1;
|
int top = NUM_OF_ELEMENTS (lit_unicode_character_pair_ranges) - 1;
|
||||||
|
|
||||||
while (bottom <= top)
|
while (bottom <= top)
|
||||||
{
|
{
|
||||||
int middle = (bottom + top) / 2;
|
int middle = (bottom + top) / 2;
|
||||||
ecma_char_t current_sp = lit_character_pair_ranges[middle];
|
lit_code_point_t current_sp = lit_unicode_character_pair_ranges[middle];
|
||||||
|
|
||||||
if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle])
|
if (current_sp <= cp && cp < current_sp + lit_unicode_character_pair_range_lengths[middle])
|
||||||
{
|
{
|
||||||
int char_dist = character - current_sp;
|
uint32_t char_dist = (uint32_t) (cp - current_sp);
|
||||||
|
|
||||||
if ((character - current_sp) % 2 == 0)
|
if ((cp - current_sp) % 2 == 0)
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character;
|
return is_lowercase ? (lit_code_point_t) (current_sp + char_dist + 1) : cp;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return is_lowercase ? cp : (lit_code_point_t) (current_sp + char_dist - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (character > current_sp)
|
if (cp > current_sp)
|
||||||
{
|
{
|
||||||
bottom = middle + 1;
|
bottom = middle + 1;
|
||||||
}
|
}
|
||||||
@@ -596,39 +665,35 @@ search_in_bidirectional_conversion_tables (ecma_char_t character, /**< co
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* 3, Check if the specified character is part of the character_pairs table. */
|
/* 3, Check if the specified character is part of the character_pairs table. */
|
||||||
int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs);
|
int number_of_character_pairs = NUM_OF_ELEMENTS (lit_unicode_character_pairs);
|
||||||
|
|
||||||
for (int i = 0; i < number_of_character_pairs; i++)
|
for (int i = 0; i < number_of_character_pairs; i++)
|
||||||
{
|
{
|
||||||
if (character != lit_character_pairs[i])
|
if (cp != lit_unicode_character_pairs[i])
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i % 2 == 0)
|
if (i % 2 == 0)
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character;
|
return is_lowercase ? lit_unicode_character_pairs[i + 1] : cp;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return is_lowercase ? cp : lit_unicode_character_pairs[i - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return LIT_INVALID_CP;
|
||||||
} /* search_in_bidirectional_conversion_tables */
|
} /* lit_search_in_bidirectional_conversion_tables */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the specified character is in the given conversion table.
|
* Check if the specified character is in the given conversion table.
|
||||||
*
|
*
|
||||||
* @return the mapped character sequence of an ecma character, if it's in the table.
|
* @return LIT_MULTIPLE_CU if the converted character consist more than a single code unit
|
||||||
* 0 - otherwise.
|
* converted code point - otherwise
|
||||||
*/
|
*/
|
||||||
static ecma_length_t
|
static lit_code_point_t
|
||||||
search_in_conversion_table (ecma_char_t character, /**< code unit */
|
lit_search_in_conversion_table (ecma_char_t character, /**< code unit */
|
||||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
ecma_stringbuilder_t *builder_p, /**< string builder */
|
||||||
const ecma_char_t *array, /**< array */
|
const ecma_char_t *array, /**< array */
|
||||||
const uint8_t *counters) /**< case_values counter */
|
const uint8_t *counters) /**< case_values counter */
|
||||||
{
|
{
|
||||||
@@ -653,28 +718,21 @@ search_in_conversion_table (ecma_char_t character, /**< code unit */
|
|||||||
|
|
||||||
if (current == character)
|
if (current == character)
|
||||||
{
|
{
|
||||||
ecma_length_t char_sequence = 1;
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_char (builder_p, array[middle + 1]);
|
||||||
|
|
||||||
switch (size_of_case_value)
|
if (size_of_case_value > 1)
|
||||||
{
|
{
|
||||||
case 3:
|
ecma_stringbuilder_append_char (builder_p, array[middle + 2]);
|
||||||
{
|
|
||||||
output_buffer_p[2] = array[middle + 3];
|
|
||||||
char_sequence++;
|
|
||||||
/* FALLTHRU */
|
|
||||||
}
|
}
|
||||||
case 2:
|
if (size_of_case_value > 2)
|
||||||
{
|
{
|
||||||
output_buffer_p[1] = array[middle + 2];
|
ecma_stringbuilder_append_char (builder_p, array[middle + 3]);
|
||||||
char_sequence++;
|
|
||||||
/* FALLTHRU */
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
output_buffer_p[0] = array[middle + 1];
|
|
||||||
return char_sequence;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return size_of_case_value == 1 ? array[middle + 1]: LIT_MULTIPLE_CU;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (character < current)
|
if (character < current)
|
||||||
@@ -688,127 +746,151 @@ search_in_conversion_table (ecma_char_t character, /**< code unit */
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
if (builder_p != NULL)
|
||||||
} /* search_in_conversion_table */
|
{
|
||||||
|
ecma_stringbuilder_append_char (builder_p, character);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (lit_code_point_t) character;
|
||||||
|
} /* lit_search_in_conversion_table */
|
||||||
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the lowercase character sequence of an ecma character.
|
* Append the converted lowercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
|
||||||
*
|
*
|
||||||
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
* @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
|
||||||
*
|
* converted code point - otherwise
|
||||||
* @return the length of the lowercase character sequence
|
|
||||||
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
|
||||||
*/
|
*/
|
||||||
ecma_length_t
|
lit_code_point_t
|
||||||
lit_char_to_lower_case (ecma_char_t character, /**< input character value */
|
lit_char_to_lower_case (lit_code_point_t cp, /**< code point */
|
||||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
ecma_stringbuilder_t *builder_p) /**< string builder */
|
||||||
ecma_length_t buffer_size) /**< buffer size */
|
|
||||||
{
|
{
|
||||||
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
if (cp >= LIT_CHAR_UPPERCASE_A && cp <= LIT_CHAR_UPPERCASE_Z)
|
||||||
|
|
||||||
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
|
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
lit_utf8_byte_t lowercase_char = (lit_utf8_byte_t) (cp + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||||
return 1;
|
|
||||||
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_byte (builder_p, lowercase_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lowercase_char;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
|
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
|
||||||
|
lit_code_point_t lowercase_cp = lit_search_in_bidirectional_conversion_tables (cp, true);
|
||||||
|
|
||||||
ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true);
|
if (lowercase_cp != LIT_INVALID_CP)
|
||||||
|
|
||||||
if (lowercase_sequence != 0)
|
|
||||||
{
|
{
|
||||||
return lowercase_sequence;
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges);
|
return lowercase_cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
JERRY_ASSERT (cp < LIT_UTF8_4_BYTE_CODE_POINT_MIN);
|
||||||
|
|
||||||
|
int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_unicode_lower_case_ranges);
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
|
for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
|
||||||
{
|
{
|
||||||
int range_length = lit_lower_case_range_lengths[j] - 1;
|
JERRY_ASSERT (lit_unicode_lower_case_range_lengths[j] > 0);
|
||||||
ecma_char_t start_point = lit_lower_case_ranges[i];
|
uint32_t range_length = (uint32_t) (lit_unicode_lower_case_range_lengths[j] - 1);
|
||||||
|
lit_code_point_t start_point = lit_unicode_lower_case_ranges[i];
|
||||||
|
|
||||||
if (start_point <= character && character <= start_point + range_length)
|
if (start_point <= cp && cp <= start_point + range_length)
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point));
|
lowercase_cp = lit_unicode_lower_case_ranges[i + 1] + (cp - start_point);
|
||||||
return 1;
|
if (builder_p != NULL)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lowercase_sequence = search_in_conversion_table (character,
|
|
||||||
output_buffer_p,
|
|
||||||
lit_lower_case_conversions,
|
|
||||||
lit_lower_case_conversion_counters);
|
|
||||||
|
|
||||||
if (lowercase_sequence != 0)
|
|
||||||
{
|
{
|
||||||
return lowercase_sequence;
|
ecma_stringbuilder_append_codepoint (builder_p, lowercase_cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return lowercase_cp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return lit_search_in_conversion_table ((ecma_char_t) cp,
|
||||||
|
builder_p,
|
||||||
|
lit_unicode_lower_case_conversions,
|
||||||
|
lit_unicode_lower_case_conversion_counters);
|
||||||
|
#else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
||||||
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_codepoint (builder_p, cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cp;
|
||||||
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
||||||
|
|
||||||
output_buffer_p[0] = character;
|
|
||||||
return 1;
|
|
||||||
} /* lit_char_to_lower_case */
|
} /* lit_char_to_lower_case */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the uppercase character sequence of an ecma character.
|
* Append the converted uppercase codeunit sequence of an a given codepoint into the stringbuilder if it is present.
|
||||||
*
|
*
|
||||||
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
* @return LIT_MULTIPLE_CU if the converted codepoint consist more than a single code unit
|
||||||
*
|
* converted code point - otherwise
|
||||||
* @return the length of the uppercase character sequence
|
|
||||||
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
|
||||||
*/
|
*/
|
||||||
ecma_length_t
|
lit_code_point_t
|
||||||
lit_char_to_upper_case (ecma_char_t character, /**< input character value */
|
lit_char_to_upper_case (lit_code_point_t cp, /**< code point */
|
||||||
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
|
ecma_stringbuilder_t *builder_p) /**< string builder */
|
||||||
ecma_length_t buffer_size) /**< buffer size */
|
|
||||||
{
|
{
|
||||||
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
if (cp >= LIT_CHAR_LOWERCASE_A && cp <= LIT_CHAR_LOWERCASE_Z)
|
||||||
|
|
||||||
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
|
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
lit_utf8_byte_t uppercase_char = (lit_utf8_byte_t) (cp - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||||
return 1;
|
|
||||||
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_byte (builder_p, uppercase_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
return uppercase_char;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
|
#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
|
||||||
|
lit_code_point_t uppercase_cp = lit_search_in_bidirectional_conversion_tables (cp, false);
|
||||||
|
|
||||||
ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false);
|
if (uppercase_cp != LIT_INVALID_CP)
|
||||||
|
|
||||||
if (uppercase_sequence != 0)
|
|
||||||
{
|
{
|
||||||
return uppercase_sequence;
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges);
|
return uppercase_cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_unicode_upper_case_special_ranges);
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
|
for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
|
||||||
{
|
{
|
||||||
int range_length = lit_upper_case_special_range_lengths[j];
|
uint32_t range_length = lit_unicode_upper_case_special_range_lengths[j];
|
||||||
ecma_char_t start_point = lit_upper_case_special_ranges[i];
|
ecma_char_t start_point = lit_unicode_upper_case_special_ranges[i];
|
||||||
|
|
||||||
if (start_point <= character && character <= start_point + range_length)
|
if (start_point <= cp && cp <= start_point + range_length)
|
||||||
{
|
{
|
||||||
output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point));
|
if (builder_p != NULL)
|
||||||
output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]);
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uppercase_sequence = search_in_conversion_table (character,
|
|
||||||
output_buffer_p,
|
|
||||||
lit_upper_case_conversions,
|
|
||||||
lit_upper_case_conversion_counters);
|
|
||||||
|
|
||||||
if (uppercase_sequence != 0)
|
|
||||||
{
|
{
|
||||||
return uppercase_sequence;
|
uppercase_cp = lit_unicode_upper_case_special_ranges[i + 1] + (cp - start_point);
|
||||||
|
ecma_stringbuilder_append_codepoint (builder_p, uppercase_cp);
|
||||||
|
ecma_stringbuilder_append_codepoint (builder_p, lit_unicode_upper_case_special_ranges[i + 2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return LIT_MULTIPLE_CU;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return lit_search_in_conversion_table ((ecma_char_t) cp,
|
||||||
|
builder_p,
|
||||||
|
lit_unicode_upper_case_conversions,
|
||||||
|
lit_unicode_upper_case_conversion_counters);
|
||||||
|
#else /* !ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
||||||
|
if (builder_p != NULL)
|
||||||
|
{
|
||||||
|
ecma_stringbuilder_append_codepoint (builder_p, cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cp;
|
||||||
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
|
||||||
|
|
||||||
output_buffer_p[0] = character;
|
|
||||||
return 1;
|
|
||||||
} /* lit_char_to_upper_case */
|
} /* lit_char_to_upper_case */
|
||||||
|
|||||||
@@ -18,6 +18,16 @@
|
|||||||
|
|
||||||
#include "lit-globals.h"
|
#include "lit-globals.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invalid character code point
|
||||||
|
*/
|
||||||
|
#define LIT_INVALID_CP 0xFFFFFFFF
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of lit_char_to_lower_case/lit_char_to_upper_case consist more than of a single code unit
|
||||||
|
*/
|
||||||
|
#define LIT_MULTIPLE_CU 0xFFFFFFFE
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Format control characters (ECMA-262 v5, Table 1)
|
* Format control characters (ECMA-262 v5, Table 1)
|
||||||
*/
|
*/
|
||||||
@@ -234,12 +244,7 @@ bool lit_char_is_word_char (lit_code_point_t c);
|
|||||||
* Utility functions for uppercasing / lowercasing
|
* Utility functions for uppercasing / lowercasing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
lit_code_point_t lit_char_to_lower_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
|
||||||
* Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
|
lit_code_point_t lit_char_to_upper_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p);
|
||||||
*/
|
|
||||||
#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
|
|
||||||
|
|
||||||
ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
|
|
||||||
ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
|
|
||||||
|
|
||||||
#endif /* !LIT_CHAR_HELPERS_H */
|
#endif /* !LIT_CHAR_HELPERS_H */
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file is automatically generated by the gen-unicode.py script
|
||||||
|
* from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
|
||||||
|
|
||||||
|
/* Contains start points of character case ranges (these are bidirectional conversions). */
|
||||||
|
static const uint32_t lit_unicode_character_case_ranges_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x010400, 0x010428, 0x0104b0, 0x0104d8, 0x010c80, 0x010cc0, 0x0118a0, 0x0118c0, 0x016e40, 0x016e60,
|
||||||
|
0x01e900, 0x01e922
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Interval lengths of start points in `character_case_ranges` table. */
|
||||||
|
static const uint16_t lit_unicode_character_case_range_lengths_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x000028, 0x000024, 0x000033, 0x000020, 0x000020, 0x000022
|
||||||
|
};
|
||||||
@@ -14,10 +14,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* This file is automatically generated by the gen-unicode.py script
|
/* This file is automatically generated by the gen-unicode.py script
|
||||||
* from UnicodeData-13.0.0d6.txt and SpecialCasing-13.0.0d1.txt files. Do not edit! */
|
* from UnicodeData.txt and SpecialCasing.txt files. Do not edit! */
|
||||||
|
|
||||||
/* Contains start points of character case ranges (these are bidirectional conversions). */
|
/* Contains start points of character case ranges (these are bidirectional conversions). */
|
||||||
static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_character_case_ranges[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x00c0, 0x00e0, 0x00d8, 0x00f8, 0x0189, 0x0256, 0x01b1, 0x028a, 0x0388, 0x03ad,
|
0x00c0, 0x00e0, 0x00d8, 0x00f8, 0x0189, 0x0256, 0x01b1, 0x028a, 0x0388, 0x03ad,
|
||||||
0x038e, 0x03cd, 0x0391, 0x03b1, 0x03a3, 0x03c3, 0x03fd, 0x037b, 0x0400, 0x0450,
|
0x038e, 0x03cd, 0x0391, 0x03b1, 0x03a3, 0x03c3, 0x03fd, 0x037b, 0x0400, 0x0450,
|
||||||
@@ -30,7 +30,7 @@ static const uint16_t lit_character_case_ranges[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Interval lengths of start points in `character_case_ranges` table. */
|
/* Interval lengths of start points in `character_case_ranges` table. */
|
||||||
static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0017, 0x0007, 0x0002, 0x0002, 0x0003, 0x0002, 0x0011, 0x0009, 0x0003, 0x0010,
|
0x0017, 0x0007, 0x0002, 0x0002, 0x0003, 0x0002, 0x0011, 0x0009, 0x0003, 0x0010,
|
||||||
0x0020, 0x0026, 0x0026, 0x0050, 0x0006, 0x002b, 0x0003, 0x0008, 0x0006, 0x0008,
|
0x0020, 0x0026, 0x0026, 0x0050, 0x0006, 0x002b, 0x0003, 0x0008, 0x0006, 0x0008,
|
||||||
@@ -39,7 +39,7 @@ static const uint8_t lit_character_case_range_lengths[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Contains the start points of bidirectional conversion ranges. */
|
/* Contains the start points of bidirectional conversion ranges. */
|
||||||
static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0100, 0x0132, 0x0139, 0x014a, 0x0179, 0x0182, 0x0187, 0x018b, 0x0191, 0x0198,
|
0x0100, 0x0132, 0x0139, 0x014a, 0x0179, 0x0182, 0x0187, 0x018b, 0x0191, 0x0198,
|
||||||
0x01a0, 0x01a7, 0x01ac, 0x01af, 0x01b3, 0x01b8, 0x01bc, 0x01cd, 0x01de, 0x01f4,
|
0x01a0, 0x01a7, 0x01ac, 0x01af, 0x01b3, 0x01b8, 0x01bc, 0x01cd, 0x01de, 0x01f4,
|
||||||
@@ -50,7 +50,7 @@ static const uint16_t lit_character_pair_ranges[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Interval lengths of start points in `character_pair_ranges` table. */
|
/* Interval lengths of start points in `character_pair_ranges` table. */
|
||||||
static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0030, 0x0006, 0x0010, 0x002e, 0x0006, 0x0004, 0x0002, 0x0002, 0x0002, 0x0002,
|
0x0030, 0x0006, 0x0010, 0x002e, 0x0006, 0x0004, 0x0002, 0x0002, 0x0002, 0x0002,
|
||||||
0x0006, 0x0002, 0x0002, 0x0002, 0x0004, 0x0002, 0x0002, 0x0010, 0x0012, 0x0002,
|
0x0006, 0x0002, 0x0002, 0x0002, 0x0004, 0x0002, 0x0002, 0x0010, 0x0012, 0x0002,
|
||||||
@@ -61,7 +61,7 @@ static const uint8_t lit_character_pair_range_lengths[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Contains lower/upper case bidirectional conversion pairs. */
|
/* Contains lower/upper case bidirectional conversion pairs. */
|
||||||
static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_character_pairs[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0178, 0x00ff, 0x0181, 0x0253, 0x0186, 0x0254, 0x018e, 0x01dd, 0x018f, 0x0259,
|
0x0178, 0x00ff, 0x0181, 0x0253, 0x0186, 0x0254, 0x018e, 0x01dd, 0x018f, 0x0259,
|
||||||
0x0190, 0x025b, 0x0193, 0x0260, 0x0194, 0x0263, 0x0196, 0x0269, 0x0197, 0x0268,
|
0x0190, 0x025b, 0x0193, 0x0260, 0x0194, 0x0263, 0x0196, 0x0269, 0x0197, 0x0268,
|
||||||
@@ -81,20 +81,20 @@ static const uint16_t lit_character_pairs[] JERRY_ATTR_CONST_DATA =
|
|||||||
/* Contains start points of one-to-two uppercase ranges where the second character
|
/* Contains start points of one-to-two uppercase ranges where the second character
|
||||||
* is always the same.
|
* is always the same.
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_upper_case_special_ranges[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x1f80, 0x1f08, 0x0399, 0x1f88, 0x1f08, 0x0399, 0x1f90, 0x1f28, 0x0399, 0x1f98,
|
0x1f80, 0x1f08, 0x0399, 0x1f88, 0x1f08, 0x0399, 0x1f90, 0x1f28, 0x0399, 0x1f98,
|
||||||
0x1f28, 0x0399, 0x1fa0, 0x1f68, 0x0399, 0x1fa8, 0x1f68, 0x0399
|
0x1f28, 0x0399, 0x1fa0, 0x1f68, 0x0399, 0x1fa8, 0x1f68, 0x0399
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Interval lengths for start points in `upper_case_special_ranges` table. */
|
/* Interval lengths for start points in `upper_case_special_ranges` table. */
|
||||||
static const uint8_t lit_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_upper_case_special_range_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0007, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007
|
0x0007, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Contains start points of lowercase ranges. */
|
/* Contains start points of lowercase ranges. */
|
||||||
static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x1e96, 0x1e96, 0x1f80, 0x1f80, 0x1f88, 0x1f80, 0x1f90, 0x1f90, 0x1f98, 0x1f90,
|
0x1e96, 0x1e96, 0x1f80, 0x1f80, 0x1f88, 0x1f80, 0x1f90, 0x1f90, 0x1f98, 0x1f90,
|
||||||
0x1fa0, 0x1fa0, 0x1fa8, 0x1fa0, 0x1fb2, 0x1fb2, 0x1fb6, 0x1fb6, 0x1fc2, 0x1fc2,
|
0x1fa0, 0x1fa0, 0x1fa8, 0x1fa0, 0x1fb2, 0x1fb2, 0x1fb6, 0x1fb6, 0x1fc2, 0x1fc2,
|
||||||
@@ -103,14 +103,14 @@ static const uint16_t lit_lower_case_ranges[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Interval lengths for start points in `lower_case_ranges` table. */
|
/* Interval lengths for start points in `lower_case_ranges` table. */
|
||||||
static const uint8_t lit_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_lower_case_range_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0005, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0003, 0x0002, 0x0003,
|
0x0005, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0003, 0x0002, 0x0003,
|
||||||
0x0002, 0x0002, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0007, 0x0005
|
0x0002, 0x0002, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0007, 0x0005
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */
|
/* The remaining lowercase conversions. The lowercase variant can be one-to-three character long. */
|
||||||
static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x00df, 0x00df, 0x0149, 0x0149, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc,
|
0x00df, 0x00df, 0x0149, 0x0149, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc,
|
||||||
0x01f0, 0x01f0, 0x01f2, 0x01f3, 0x0390, 0x0390, 0x03b0, 0x03b0, 0x03f4, 0x03b8,
|
0x01f0, 0x01f0, 0x01f2, 0x01f3, 0x0390, 0x0390, 0x03b0, 0x03b0, 0x03f4, 0x03b8,
|
||||||
@@ -120,13 +120,13 @@ static const uint16_t lit_lower_case_conversions[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */
|
/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */
|
||||||
static const uint8_t lit_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_lower_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0016, 0x0001, 0x0000
|
0x0016, 0x0001, 0x0000
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */
|
/* The remaining uppercase conversions. The uppercase variant can be one-to-three character long. */
|
||||||
static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x00b5, 0x039c, 0x0130, 0x0130, 0x0131, 0x0049, 0x017f, 0x0053, 0x01c5, 0x01c4,
|
0x00b5, 0x039c, 0x0130, 0x0130, 0x0131, 0x0049, 0x017f, 0x0053, 0x01c5, 0x01c4,
|
||||||
0x01c8, 0x01c7, 0x01cb, 0x01ca, 0x01f2, 0x01f1, 0x0345, 0x0399, 0x03c2, 0x03a3,
|
0x01c8, 0x01c7, 0x01cb, 0x01ca, 0x01f2, 0x01f1, 0x0345, 0x0399, 0x03c2, 0x03a3,
|
||||||
@@ -157,7 +157,7 @@ static const uint16_t lit_upper_case_conversions[] JERRY_ATTR_CONST_DATA =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */
|
/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */
|
||||||
static const uint8_t lit_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_upper_case_conversion_counters[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x001c, 0x002c, 0x0010
|
0x001c, 0x002c, 0x0010
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -0,0 +1,129 @@
|
|||||||
|
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file is automatically generated by the gen-unicode.py script
|
||||||
|
* from DerivedCoreProperties.txt. Do not edit! */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character interval starting points for ID_Start.
|
||||||
|
*/
|
||||||
|
static const uint32_t lit_unicode_id_start_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x010000, 0x01000d, 0x010028, 0x01003c, 0x01003f, 0x010050, 0x010080, 0x010140, 0x010280, 0x0102a0,
|
||||||
|
0x010300, 0x01032d, 0x010350, 0x010380, 0x0103a0, 0x0103c8, 0x0103d1, 0x010400, 0x0104b0, 0x0104d8,
|
||||||
|
0x010500, 0x010530, 0x010600, 0x010740, 0x010760, 0x010800, 0x01080a, 0x010837, 0x01083f, 0x010860,
|
||||||
|
0x010880, 0x0108e0, 0x0108f4, 0x010900, 0x010920, 0x010980, 0x0109be, 0x010a10, 0x010a15, 0x010a19,
|
||||||
|
0x010a60, 0x010a80, 0x010ac0, 0x010ac9, 0x010b00, 0x010b40, 0x010b60, 0x010b80, 0x010c00, 0x010c80,
|
||||||
|
0x010cc0, 0x010d00, 0x010e80, 0x010eb0, 0x010f00, 0x010f30, 0x010fb0, 0x010fe0, 0x011003, 0x011083,
|
||||||
|
0x0110d0, 0x011103, 0x011150, 0x011183, 0x0111c1, 0x011200, 0x011213, 0x011280, 0x01128a, 0x01128f,
|
||||||
|
0x01129f, 0x0112b0, 0x011305, 0x01130f, 0x011313, 0x01132a, 0x011332, 0x011335, 0x01135d, 0x011400,
|
||||||
|
0x011447, 0x01145f, 0x011480, 0x0114c4, 0x011580, 0x0115d8, 0x011600, 0x011680, 0x011700, 0x011800,
|
||||||
|
0x0118a0, 0x0118ff, 0x01190c, 0x011915, 0x011918, 0x0119a0, 0x0119aa, 0x011a0b, 0x011a5c, 0x011ac0,
|
||||||
|
0x011c00, 0x011c0a, 0x011c72, 0x011d00, 0x011d08, 0x011d0b, 0x011d60, 0x011d67, 0x011d6a, 0x011ee0,
|
||||||
|
0x012000, 0x012400, 0x012480, 0x013000, 0x014400, 0x016800, 0x016a40, 0x016ad0, 0x016b00, 0x016b40,
|
||||||
|
0x016b63, 0x016b7d, 0x016e40, 0x016f00, 0x016f93, 0x016fe0, 0x017000, 0x018800, 0x018d00, 0x01b000,
|
||||||
|
0x01b150, 0x01b164, 0x01b170, 0x01bc00, 0x01bc70, 0x01bc80, 0x01bc90, 0x01d400, 0x01d456, 0x01d49e,
|
||||||
|
0x01d4a5, 0x01d4a9, 0x01d4ae, 0x01d4bd, 0x01d4c5, 0x01d507, 0x01d50d, 0x01d516, 0x01d51e, 0x01d53b,
|
||||||
|
0x01d540, 0x01d54a, 0x01d552, 0x01d6a8, 0x01d6c2, 0x01d6dc, 0x01d6fc, 0x01d716, 0x01d736, 0x01d750,
|
||||||
|
0x01d770, 0x01d78a, 0x01d7aa, 0x01d7c4, 0x01e100, 0x01e137, 0x01e2c0, 0x01e800, 0x01e900, 0x01ee00,
|
||||||
|
0x01ee05, 0x01ee21, 0x01ee29, 0x01ee34, 0x01ee4d, 0x01ee51, 0x01ee61, 0x01ee67, 0x01ee6c, 0x01ee74,
|
||||||
|
0x01ee79, 0x01ee80, 0x01ee8b, 0x01eea1, 0x01eea5, 0x01eeab, 0x020000, 0x02a700, 0x02b740, 0x02b820,
|
||||||
|
0x02ceb0, 0x02f800, 0x030000
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character interval lengths for ID_Start.
|
||||||
|
*/
|
||||||
|
static const uint16_t lit_unicode_id_start_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x00000b, 0x000019, 0x000012, 0x000001, 0x00000e, 0x00000d, 0x00007a, 0x000034, 0x00001c, 0x000030,
|
||||||
|
0x00001f, 0x00001d, 0x000025, 0x00001d, 0x000023, 0x000007, 0x000004, 0x00009d, 0x000023, 0x000023,
|
||||||
|
0x000027, 0x000033, 0x000136, 0x000015, 0x000007, 0x000005, 0x00002b, 0x000001, 0x000016, 0x000016,
|
||||||
|
0x00001e, 0x000012, 0x000001, 0x000015, 0x000019, 0x000037, 0x000001, 0x000003, 0x000002, 0x00001c,
|
||||||
|
0x00001c, 0x00001c, 0x000007, 0x00001b, 0x000035, 0x000015, 0x000012, 0x000011, 0x000048, 0x000032,
|
||||||
|
0x000032, 0x000023, 0x000029, 0x000001, 0x00001c, 0x000015, 0x000014, 0x000016, 0x000034, 0x00002c,
|
||||||
|
0x000018, 0x000023, 0x000022, 0x00002f, 0x000003, 0x000011, 0x000018, 0x000006, 0x000003, 0x00000e,
|
||||||
|
0x000009, 0x00002e, 0x000007, 0x000001, 0x000015, 0x000006, 0x000001, 0x000004, 0x000004, 0x000034,
|
||||||
|
0x000003, 0x000002, 0x00002f, 0x000001, 0x00002e, 0x000003, 0x00002f, 0x00002a, 0x00001a, 0x00002b,
|
||||||
|
0x00003f, 0x000007, 0x000007, 0x000001, 0x000017, 0x000007, 0x000026, 0x000027, 0x00002d, 0x000038,
|
||||||
|
0x000008, 0x000024, 0x00001d, 0x000006, 0x000001, 0x000025, 0x000005, 0x000001, 0x00001f, 0x000012,
|
||||||
|
0x000399, 0x00006e, 0x0000c3, 0x00042e, 0x000246, 0x000238, 0x00001e, 0x00001d, 0x00002f, 0x000003,
|
||||||
|
0x000014, 0x000012, 0x00003f, 0x00004a, 0x00000c, 0x000001, 0x0017f7, 0x0004d5, 0x000008, 0x00011e,
|
||||||
|
0x000002, 0x000003, 0x00018b, 0x00006a, 0x00000c, 0x000008, 0x000009, 0x000054, 0x000046, 0x000001,
|
||||||
|
0x000001, 0x000003, 0x00000b, 0x000006, 0x000040, 0x000003, 0x000007, 0x000006, 0x00001b, 0x000003,
|
||||||
|
0x000004, 0x000006, 0x000153, 0x000018, 0x000018, 0x00001e, 0x000018, 0x00001e, 0x000018, 0x00001e,
|
||||||
|
0x000018, 0x00001e, 0x000018, 0x000007, 0x00002c, 0x000006, 0x00002b, 0x0000c4, 0x000043, 0x000003,
|
||||||
|
0x00001a, 0x000001, 0x000009, 0x000003, 0x000002, 0x000001, 0x000001, 0x000003, 0x000006, 0x000003,
|
||||||
|
0x000003, 0x000009, 0x000010, 0x000002, 0x000004, 0x000010, 0x00a6dd, 0x001034, 0x0000dd, 0x001681,
|
||||||
|
0x001d30, 0x00021d, 0x00134a
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Non-interval characters for ID_Start.
|
||||||
|
*/
|
||||||
|
static const uint32_t lit_unicode_id_start_chars_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x010808, 0x01083c, 0x010a00, 0x010f27, 0x011144, 0x011147, 0x011176, 0x0111da, 0x0111dc, 0x011288,
|
||||||
|
0x01133d, 0x011350, 0x0114c7, 0x011644, 0x0116b8, 0x011909, 0x01193f, 0x011941, 0x0119e1, 0x0119e3,
|
||||||
|
0x011a00, 0x011a3a, 0x011a50, 0x011a9d, 0x011c40, 0x011d46, 0x011d98, 0x011fb0, 0x016f50, 0x016fe3,
|
||||||
|
0x01d4a2, 0x01d4bb, 0x01d546, 0x01e14e, 0x01e94b, 0x01ee24, 0x01ee27, 0x01ee39, 0x01ee3b, 0x01ee42,
|
||||||
|
0x01ee47, 0x01ee49, 0x01ee4b, 0x01ee54, 0x01ee57, 0x01ee59, 0x01ee5b, 0x01ee5d, 0x01ee5f, 0x01ee64,
|
||||||
|
0x01ee7e
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character interval starting points for ID_Continue.
|
||||||
|
*/
|
||||||
|
static const uint32_t lit_unicode_id_continue_interval_starts_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x010376, 0x0104a0, 0x010a01, 0x010a05, 0x010a0c, 0x010a38, 0x010ae5, 0x010d24, 0x010d30, 0x010eab,
|
||||||
|
0x010f46, 0x011000, 0x011038, 0x011066, 0x01107f, 0x0110b0, 0x0110f0, 0x011100, 0x011127, 0x011136,
|
||||||
|
0x011145, 0x011180, 0x0111b3, 0x0111c9, 0x0111ce, 0x01122c, 0x0112df, 0x0112f0, 0x011300, 0x01133b,
|
||||||
|
0x01133e, 0x011347, 0x01134b, 0x011362, 0x011366, 0x011370, 0x011435, 0x011450, 0x0114b0, 0x0114d0,
|
||||||
|
0x0115af, 0x0115b8, 0x0115dc, 0x011630, 0x011650, 0x0116ab, 0x0116c0, 0x01171d, 0x011730, 0x01182c,
|
||||||
|
0x0118e0, 0x011930, 0x011937, 0x01193b, 0x011942, 0x011950, 0x0119d1, 0x0119da, 0x011a01, 0x011a33,
|
||||||
|
0x011a3b, 0x011a51, 0x011a8a, 0x011c2f, 0x011c38, 0x011c50, 0x011c92, 0x011ca9, 0x011d31, 0x011d3c,
|
||||||
|
0x011d3f, 0x011d50, 0x011d8a, 0x011d90, 0x011d93, 0x011da0, 0x011ef3, 0x016a60, 0x016af0, 0x016b30,
|
||||||
|
0x016b50, 0x016f51, 0x016f8f, 0x016ff0, 0x01bc9d, 0x01d165, 0x01d16d, 0x01d17b, 0x01d185, 0x01d1aa,
|
||||||
|
0x01d242, 0x01d7ce, 0x01da00, 0x01da3b, 0x01da9b, 0x01daa1, 0x01e000, 0x01e008, 0x01e01b, 0x01e023,
|
||||||
|
0x01e026, 0x01e130, 0x01e140, 0x01e2ec, 0x01e8d0, 0x01e944, 0x01e950, 0x01fbf0, 0x0e0100
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character interval lengths for ID_Continue.
|
||||||
|
*/
|
||||||
|
static const uint16_t lit_unicode_id_continue_interval_lengths_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x000004, 0x000009, 0x000002, 0x000001, 0x000003, 0x000002, 0x000001, 0x000003, 0x000009, 0x000001,
|
||||||
|
0x00000a, 0x000002, 0x00000e, 0x000009, 0x000003, 0x00000a, 0x000009, 0x000002, 0x00000d, 0x000009,
|
||||||
|
0x000001, 0x000002, 0x00000d, 0x000003, 0x00000b, 0x00000b, 0x00000b, 0x000009, 0x000003, 0x000001,
|
||||||
|
0x000006, 0x000001, 0x000002, 0x000001, 0x000006, 0x000004, 0x000011, 0x000009, 0x000013, 0x000009,
|
||||||
|
0x000006, 0x000008, 0x000001, 0x000010, 0x000009, 0x00000c, 0x000009, 0x00000e, 0x000009, 0x00000e,
|
||||||
|
0x000009, 0x000005, 0x000001, 0x000003, 0x000001, 0x000009, 0x000006, 0x000006, 0x000009, 0x000006,
|
||||||
|
0x000003, 0x00000a, 0x00000f, 0x000007, 0x000007, 0x000009, 0x000015, 0x00000d, 0x000005, 0x000001,
|
||||||
|
0x000006, 0x000009, 0x000004, 0x000001, 0x000004, 0x000009, 0x000003, 0x000009, 0x000004, 0x000006,
|
||||||
|
0x000009, 0x000036, 0x000003, 0x000001, 0x000001, 0x000004, 0x000005, 0x000007, 0x000006, 0x000003,
|
||||||
|
0x000002, 0x000031, 0x000036, 0x000031, 0x000004, 0x00000e, 0x000006, 0x000010, 0x000006, 0x000001,
|
||||||
|
0x000004, 0x000006, 0x000009, 0x00000d, 0x000006, 0x000006, 0x000009, 0x000009, 0x0000ef
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Non-interval characters for ID_Continue.
|
||||||
|
*/
|
||||||
|
static const uint32_t lit_unicode_id_continue_chars_sup[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x0101fd, 0x0102e0, 0x010a3f, 0x011173, 0x01123e, 0x011357, 0x01145e, 0x011940, 0x0119e4, 0x011a47,
|
||||||
|
0x011d3a, 0x011d47, 0x016f4f, 0x016fe4, 0x01da75, 0x01da84
|
||||||
|
};
|
||||||
@@ -14,15 +14,12 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* This file is automatically generated by the gen-unicode.py script
|
/* This file is automatically generated by the gen-unicode.py script
|
||||||
* from UnicodeData-13.0.0d6.txt. Do not edit! */
|
* from DerivedCoreProperties.txt. Do not edit! */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character interval starting points for the unicode letters.
|
* Character interval starting points for ID_Start.
|
||||||
*
|
|
||||||
* The characters covered by these intervals are from
|
|
||||||
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
|
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_id_start_interval_starts[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x00c0, 0x00d8, 0x00f8, 0x01f8, 0x02c6, 0x02e0, 0x0370, 0x0376, 0x037a, 0x0388,
|
0x00c0, 0x00d8, 0x00f8, 0x01f8, 0x02c6, 0x02e0, 0x0370, 0x0376, 0x037a, 0x0388,
|
||||||
0x038e, 0x03a3, 0x03f7, 0x048a, 0x0531, 0x0560, 0x05d0, 0x05ef, 0x0620, 0x066e,
|
0x038e, 0x03a3, 0x03f7, 0x048a, 0x0531, 0x0560, 0x05d0, 0x05ef, 0x0620, 0x066e,
|
||||||
@@ -39,46 +36,43 @@ static const uint16_t lit_unicode_letter_interval_sps[] JERRY_ATTR_CONST_DATA =
|
|||||||
0x10fc, 0x11fc, 0x124a, 0x1250, 0x125a, 0x1260, 0x128a, 0x1290, 0x12b2, 0x12b8,
|
0x10fc, 0x11fc, 0x124a, 0x1250, 0x125a, 0x1260, 0x128a, 0x1290, 0x12b2, 0x12b8,
|
||||||
0x12c2, 0x12c8, 0x12d8, 0x1312, 0x1318, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x1501,
|
0x12c2, 0x12c8, 0x12d8, 0x1312, 0x1318, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x1501,
|
||||||
0x1601, 0x166f, 0x1681, 0x16a0, 0x16ee, 0x1700, 0x170e, 0x1720, 0x1740, 0x1760,
|
0x1601, 0x166f, 0x1681, 0x16a0, 0x16ee, 0x1700, 0x170e, 0x1720, 0x1740, 0x1760,
|
||||||
0x176e, 0x1780, 0x1820, 0x1880, 0x1887, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980,
|
0x176e, 0x1780, 0x1820, 0x1880, 0x18b0, 0x1900, 0x1950, 0x1970, 0x1980, 0x19b0,
|
||||||
0x19b0, 0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d,
|
0x1a00, 0x1a20, 0x1b05, 0x1b45, 0x1b83, 0x1bae, 0x1bba, 0x1c00, 0x1c4d, 0x1c5a,
|
||||||
0x1c5a, 0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00,
|
0x1c80, 0x1c90, 0x1cbd, 0x1ce9, 0x1cee, 0x1cf5, 0x1d00, 0x1e00, 0x1f00, 0x1f18,
|
||||||
0x1f18, 0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0,
|
0x1f20, 0x1f48, 0x1f50, 0x1f5f, 0x1f80, 0x1fb6, 0x1fc2, 0x1fc6, 0x1fd0, 0x1fd6,
|
||||||
0x1fd6, 0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2119, 0x212a, 0x212f, 0x213c,
|
0x1fe0, 0x1ff2, 0x1ff6, 0x2090, 0x210a, 0x2118, 0x212a, 0x213c, 0x2145, 0x2160,
|
||||||
0x2145, 0x2160, 0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80,
|
0x2c00, 0x2c30, 0x2c60, 0x2ceb, 0x2cf2, 0x2d00, 0x2d30, 0x2d80, 0x2da0, 0x2da8,
|
||||||
0x2da0, 0x2da8, 0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021,
|
0x2db0, 0x2db8, 0x2dc0, 0x2dc8, 0x2dd0, 0x2dd8, 0x3005, 0x3021, 0x3031, 0x3038,
|
||||||
0x3031, 0x3038, 0x3041, 0x309d, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0,
|
0x3041, 0x309b, 0x30a1, 0x30fc, 0x3105, 0x3131, 0x31a0, 0x31f0, 0x3400, 0x3500,
|
||||||
0x3400, 0x3500, 0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00,
|
0x3600, 0x3700, 0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00, 0x3e00, 0x3f00,
|
||||||
0x3e00, 0x3f00, 0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700,
|
0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900,
|
||||||
0x4800, 0x4900, 0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100,
|
0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00, 0x5000, 0x5100, 0x5200, 0x5300,
|
||||||
0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00,
|
0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
|
||||||
0x5c00, 0x5d00, 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500,
|
0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, 0x6700,
|
||||||
0x6600, 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
|
0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, 0x7000, 0x7100,
|
||||||
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900,
|
0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, 0x7900, 0x7a00, 0x7b00,
|
||||||
0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300,
|
0x7c00, 0x7d00, 0x7e00, 0x7f00, 0x8000, 0x8100, 0x8200, 0x8300, 0x8400, 0x8500,
|
||||||
0x8400, 0x8500, 0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00,
|
0x8600, 0x8700, 0x8800, 0x8900, 0x8a00, 0x8b00, 0x8c00, 0x8d00, 0x8e00, 0x8f00,
|
||||||
0x8e00, 0x8f00, 0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700,
|
0x9000, 0x9100, 0x9200, 0x9300, 0x9400, 0x9500, 0x9600, 0x9700, 0x9800, 0x9900,
|
||||||
0x9800, 0x9900, 0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100,
|
0x9a00, 0x9b00, 0x9c00, 0x9d00, 0x9e00, 0x9f00, 0xa000, 0xa100, 0xa200, 0xa300,
|
||||||
0xa200, 0xa300, 0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f,
|
0xa400, 0xa4d0, 0xa500, 0xa600, 0xa610, 0xa62a, 0xa640, 0xa67f, 0xa6a0, 0xa717,
|
||||||
0xa6a0, 0xa717, 0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840,
|
0xa722, 0xa78b, 0xa7c2, 0xa7f5, 0xa803, 0xa807, 0xa80c, 0xa840, 0xa882, 0xa8f2,
|
||||||
0xa882, 0xa8f2, 0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa,
|
0xa8fd, 0xa90a, 0xa930, 0xa960, 0xa984, 0xa9e0, 0xa9e6, 0xa9fa, 0xaa00, 0xaa40,
|
||||||
0xaa00, 0xaa40, 0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2,
|
0xaa44, 0xaa60, 0xaa7e, 0xaab5, 0xaab9, 0xaadb, 0xaae0, 0xaaf2, 0xab01, 0xab09,
|
||||||
0xab01, 0xab09, 0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00,
|
0xab11, 0xab20, 0xab28, 0xab30, 0xab5c, 0xab70, 0xac00, 0xad00, 0xae00, 0xaf00,
|
||||||
0xae00, 0xaf00, 0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700,
|
0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700, 0xb800, 0xb900,
|
||||||
0xb800, 0xb900, 0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100,
|
0xba00, 0xbb00, 0xbc00, 0xbd00, 0xbe00, 0xbf00, 0xc000, 0xc100, 0xc200, 0xc300,
|
||||||
0xc200, 0xc300, 0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00,
|
0xc400, 0xc500, 0xc600, 0xc700, 0xc800, 0xc900, 0xca00, 0xcb00, 0xcc00, 0xcd00,
|
||||||
0xcc00, 0xcd00, 0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500,
|
0xce00, 0xcf00, 0xd000, 0xd100, 0xd200, 0xd300, 0xd400, 0xd500, 0xd600, 0xd700,
|
||||||
0xd600, 0xd700, 0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f,
|
0xd7b0, 0xd7cb, 0xf900, 0xfa00, 0xfa70, 0xfb00, 0xfb13, 0xfb1f, 0xfb2a, 0xfb38,
|
||||||
0xfb2a, 0xfb38, 0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0,
|
0xfb40, 0xfb43, 0xfb46, 0xfbd3, 0xfcd3, 0xfd50, 0xfd92, 0xfdf0, 0xfe70, 0xfe76,
|
||||||
0xfe70, 0xfe76, 0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
|
0xff21, 0xff41, 0xff66, 0xffc2, 0xffca, 0xffd2, 0xffda
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character lengths for the unicode letters.
|
* Character interval lengths for ID_Start.
|
||||||
*
|
|
||||||
* The characters covered by these intervals are from
|
|
||||||
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
|
|
||||||
*/
|
*/
|
||||||
static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_id_start_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0016, 0x001e, 0x00ff, 0x00c9, 0x000b, 0x0004, 0x0004, 0x0001, 0x0003, 0x0002,
|
0x0016, 0x001e, 0x00ff, 0x00c9, 0x000b, 0x0004, 0x0004, 0x0001, 0x0003, 0x0002,
|
||||||
0x0013, 0x0052, 0x008a, 0x00a5, 0x0025, 0x0028, 0x001a, 0x0003, 0x002a, 0x0001,
|
0x0013, 0x0052, 0x008a, 0x00a5, 0x0025, 0x0028, 0x001a, 0x0003, 0x002a, 0x0001,
|
||||||
@@ -95,17 +89,17 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
|
|||||||
0x00ff, 0x004c, 0x0003, 0x0006, 0x0003, 0x0028, 0x0003, 0x0020, 0x0003, 0x0006,
|
0x00ff, 0x004c, 0x0003, 0x0006, 0x0003, 0x0028, 0x0003, 0x0020, 0x0003, 0x0006,
|
||||||
0x0003, 0x000e, 0x0038, 0x0003, 0x0042, 0x000f, 0x0055, 0x0005, 0x00ff, 0x00ff,
|
0x0003, 0x000e, 0x0038, 0x0003, 0x0042, 0x000f, 0x0055, 0x0005, 0x00ff, 0x00ff,
|
||||||
0x006b, 0x0010, 0x0019, 0x004a, 0x000a, 0x000c, 0x0003, 0x0011, 0x0011, 0x000c,
|
0x006b, 0x0010, 0x0019, 0x004a, 0x000a, 0x000c, 0x0003, 0x0011, 0x0011, 0x000c,
|
||||||
0x0002, 0x0033, 0x0058, 0x0004, 0x0021, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b,
|
0x0002, 0x0033, 0x0058, 0x0028, 0x0045, 0x001e, 0x001d, 0x0004, 0x002b, 0x0019,
|
||||||
0x0019, 0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002,
|
0x0016, 0x0034, 0x002e, 0x0006, 0x001d, 0x0001, 0x002b, 0x0023, 0x0002, 0x0023,
|
||||||
0x0023, 0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015,
|
0x0008, 0x002a, 0x0002, 0x0003, 0x0005, 0x0001, 0x00bf, 0x00ff, 0x0015, 0x0005,
|
||||||
0x0005, 0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003,
|
0x0025, 0x0005, 0x0007, 0x001e, 0x0034, 0x0006, 0x0002, 0x0006, 0x0003, 0x0005,
|
||||||
0x0005, 0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0004, 0x0003, 0x000a, 0x0003,
|
0x000c, 0x0002, 0x0006, 0x000c, 0x0009, 0x0005, 0x000f, 0x0003, 0x0004, 0x0028,
|
||||||
0x0004, 0x0028, 0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016,
|
0x002e, 0x002e, 0x0084, 0x0003, 0x0001, 0x0025, 0x0037, 0x0016, 0x0006, 0x0006,
|
||||||
0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008,
|
0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0006, 0x0002, 0x0008, 0x0004, 0x0004,
|
||||||
0x0004, 0x0004, 0x0055, 0x0002, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f,
|
0x0055, 0x0004, 0x0059, 0x0003, 0x002a, 0x005d, 0x001f, 0x000f, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00bf, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
@@ -113,29 +107,25 @@ static const uint8_t lit_unicode_letter_interval_lengths[] JERRY_ATTR_CONST_DATA
|
|||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00fc, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e,
|
0x008c, 0x002d, 0x00ff, 0x000c, 0x000f, 0x0001, 0x002e, 0x001e, 0x004f, 0x0008,
|
||||||
0x004f, 0x0008, 0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033,
|
0x0066, 0x0034, 0x0008, 0x000c, 0x0002, 0x0003, 0x0016, 0x0033, 0x0031, 0x0005,
|
||||||
0x0031, 0x0005, 0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004,
|
0x0001, 0x001b, 0x0016, 0x001c, 0x002e, 0x0004, 0x0009, 0x0004, 0x0028, 0x0002,
|
||||||
0x0028, 0x0002, 0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002,
|
0x0007, 0x0016, 0x0031, 0x0001, 0x0004, 0x0002, 0x000a, 0x0002, 0x0005, 0x0005,
|
||||||
0x0005, 0x0005, 0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff,
|
0x0005, 0x0006, 0x0006, 0x002a, 0x000d, 0x0072, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
||||||
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff,
|
0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00a3,
|
||||||
0x00ff, 0x00a3, 0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009,
|
0x0016, 0x0030, 0x00ff, 0x006d, 0x0069, 0x0006, 0x0004, 0x0009, 0x000c, 0x0004,
|
||||||
0x000c, 0x0004, 0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b,
|
0x0001, 0x0001, 0x006b, 0x00ff, 0x006a, 0x003f, 0x0035, 0x000b, 0x0004, 0x0086,
|
||||||
0x0004, 0x0086, 0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
|
0x0019, 0x0019, 0x0058, 0x0005, 0x0005, 0x0005, 0x0002
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Those unicode letter characters that are not inside any of
|
* Non-interval characters for ID_Start.
|
||||||
* the intervals specified in lit_unicode_letter_interval_sps array.
|
|
||||||
*
|
|
||||||
* The characters are from the following Unicode categories:
|
|
||||||
* Lu, Ll, Lt, Lm, Lo, Nl
|
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_id_start_chars[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5,
|
0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5,
|
||||||
0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2,
|
0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2,
|
||||||
@@ -144,18 +134,13 @@ static const uint16_t lit_unicode_letter_chars[] JERRY_ATTR_CONST_DATA =
|
|||||||
0x0ea5, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, 0x10c7, 0x10cd, 0x1258,
|
0x0ea5, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, 0x10c7, 0x10cd, 0x1258,
|
||||||
0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1cfa, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe,
|
0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1cfa, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe,
|
||||||
0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2d27,
|
0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2d27,
|
||||||
0x2d2d, 0x2d6f, 0x2e2f, 0x3400, 0x4e00, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0,
|
0x2d2d, 0x2d6f, 0xa8fb, 0xa9cf, 0xaa7a, 0xaab1, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e
|
||||||
0xaac2, 0xac00, 0xfb1d, 0xfb3e
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character interval starting points for non-letter character
|
* Character interval starting points for ID_Continue.
|
||||||
* that can be used as a non-first character of an identifier.
|
|
||||||
*
|
|
||||||
* The characters covered by these intervals are from
|
|
||||||
* the following Unicode categories: Nd, Mn, Mc, Pc
|
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_id_continue_interval_starts[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x0300, 0x0483, 0x0591, 0x05c1, 0x05c4, 0x0610, 0x064b, 0x06d6, 0x06df, 0x06e7,
|
0x0300, 0x0483, 0x0591, 0x05c1, 0x05c4, 0x0610, 0x064b, 0x06d6, 0x06df, 0x06e7,
|
||||||
0x06ea, 0x06f0, 0x0730, 0x07a6, 0x07c0, 0x07eb, 0x0816, 0x081b, 0x0825, 0x0829,
|
0x06ea, 0x06f0, 0x0730, 0x07a6, 0x07c0, 0x07eb, 0x0816, 0x081b, 0x0825, 0x0829,
|
||||||
@@ -167,8 +152,8 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
|
|||||||
0x0ce6, 0x0d00, 0x0d3b, 0x0d3e, 0x0d46, 0x0d4a, 0x0d62, 0x0d66, 0x0d81, 0x0dcf,
|
0x0ce6, 0x0d00, 0x0d3b, 0x0d3e, 0x0d46, 0x0d4a, 0x0d62, 0x0d66, 0x0d81, 0x0dcf,
|
||||||
0x0dd8, 0x0de6, 0x0df2, 0x0e34, 0x0e47, 0x0e50, 0x0eb4, 0x0ec8, 0x0ed0, 0x0f18,
|
0x0dd8, 0x0de6, 0x0df2, 0x0e34, 0x0e47, 0x0e50, 0x0eb4, 0x0ec8, 0x0ed0, 0x0f18,
|
||||||
0x0f20, 0x0f3e, 0x0f71, 0x0f86, 0x0f8d, 0x0f99, 0x102b, 0x1040, 0x1056, 0x105e,
|
0x0f20, 0x0f3e, 0x0f71, 0x0f86, 0x0f8d, 0x0f99, 0x102b, 0x1040, 0x1056, 0x105e,
|
||||||
0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1712, 0x1732, 0x1752, 0x1772,
|
0x1062, 0x1067, 0x1071, 0x1082, 0x108f, 0x135d, 0x1369, 0x1712, 0x1732, 0x1752,
|
||||||
0x17b4, 0x17e0, 0x180b, 0x1810, 0x1885, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
|
0x1772, 0x17b4, 0x17e0, 0x180b, 0x1810, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17,
|
||||||
0x1a55, 0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1abf, 0x1b00, 0x1b34, 0x1b50, 0x1b6b,
|
0x1a55, 0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1abf, 0x1b00, 0x1b34, 0x1b50, 0x1b6b,
|
||||||
0x1b80, 0x1ba1, 0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf7,
|
0x1b80, 0x1ba1, 0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf7,
|
||||||
0x1dc0, 0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099,
|
0x1dc0, 0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099,
|
||||||
@@ -179,13 +164,9 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character interval lengths for non-letter character
|
* Character interval lengths for ID_Continue.
|
||||||
* that can be used as a non-first character of an identifier.
|
|
||||||
*
|
|
||||||
* The characters covered by these intervals are from
|
|
||||||
* the following Unicode categories: Nd, Mn, Mc, Pc
|
|
||||||
*/
|
*/
|
||||||
static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_id_continue_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x006f, 0x0004, 0x002c, 0x0001, 0x0001, 0x000a, 0x001e, 0x0006, 0x0005, 0x0001,
|
0x006f, 0x0004, 0x002c, 0x0001, 0x0001, 0x000a, 0x001e, 0x0006, 0x0005, 0x0001,
|
||||||
0x0003, 0x0009, 0x001a, 0x000a, 0x0009, 0x0008, 0x0003, 0x0008, 0x0002, 0x0004,
|
0x0003, 0x0009, 0x001a, 0x000a, 0x0009, 0x0008, 0x0003, 0x0008, 0x0002, 0x0004,
|
||||||
@@ -197,8 +178,8 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
|
|||||||
0x0009, 0x0003, 0x0001, 0x0006, 0x0002, 0x0003, 0x0001, 0x0009, 0x0002, 0x0005,
|
0x0009, 0x0003, 0x0001, 0x0006, 0x0002, 0x0003, 0x0001, 0x0009, 0x0002, 0x0005,
|
||||||
0x0007, 0x0009, 0x0001, 0x0006, 0x0007, 0x0009, 0x0008, 0x0005, 0x0009, 0x0001,
|
0x0007, 0x0009, 0x0001, 0x0006, 0x0007, 0x0009, 0x0008, 0x0005, 0x0009, 0x0001,
|
||||||
0x0009, 0x0001, 0x0013, 0x0001, 0x000a, 0x0023, 0x0013, 0x0009, 0x0003, 0x0002,
|
0x0009, 0x0001, 0x0013, 0x0001, 0x000a, 0x0023, 0x0013, 0x0009, 0x0003, 0x0002,
|
||||||
0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0002, 0x0002, 0x0001, 0x0001,
|
0x0002, 0x0006, 0x0003, 0x000b, 0x000e, 0x0002, 0x0008, 0x0002, 0x0002, 0x0001,
|
||||||
0x001f, 0x0009, 0x0002, 0x0009, 0x0001, 0x000b, 0x000b, 0x0009, 0x0009, 0x0004,
|
0x0001, 0x001f, 0x0009, 0x0002, 0x0009, 0x000b, 0x000b, 0x0009, 0x000a, 0x0004,
|
||||||
0x0009, 0x001c, 0x000a, 0x0009, 0x000d, 0x0001, 0x0004, 0x0010, 0x0009, 0x0008,
|
0x0009, 0x001c, 0x000a, 0x0009, 0x000d, 0x0001, 0x0004, 0x0010, 0x0009, 0x0008,
|
||||||
0x0002, 0x000c, 0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002,
|
0x0002, 0x000c, 0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002,
|
||||||
0x0039, 0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001,
|
0x0039, 0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001,
|
||||||
@@ -209,45 +190,65 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Those non-letter characters that can be used as a non-first
|
* Non-interval characters for ID_Continue.
|
||||||
* character of an identifier and not included in any of the intervals
|
|
||||||
* specified in lit_unicode_non_letter_ident_part_interval_sps array.
|
|
||||||
*
|
|
||||||
* The characters are from the following Unicode categories:
|
|
||||||
* Nd, Mn, Mc, Pc
|
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_unicode_non_letter_ident_part_chars[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_id_continue_chars[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe, 0x0a3c, 0x0a51,
|
0x00b7, 0x0387, 0x05bf, 0x05c7, 0x0670, 0x0711, 0x07fd, 0x09bc, 0x09d7, 0x09fe,
|
||||||
0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca, 0x0dd6, 0x0e31,
|
0x0a3c, 0x0a51, 0x0a75, 0x0abc, 0x0b3c, 0x0b82, 0x0bd7, 0x0cbc, 0x0d57, 0x0dca,
|
||||||
0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced, 0x1cf4, 0x2054,
|
0x0dd6, 0x0e31, 0x0eb1, 0x0f35, 0x0f37, 0x0f39, 0x0fc6, 0x17dd, 0x18a9, 0x1ced,
|
||||||
0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5, 0xaa43, 0xaab0,
|
0x1cf4, 0x2054, 0x20e1, 0x2d7f, 0xa66f, 0xa802, 0xa806, 0xa80b, 0xa82c, 0xa9e5,
|
||||||
0xaac1, 0xfb1e, 0xff3f
|
0xaa43, 0xaab0, 0xaac1, 0xfb1e, 0xff3f
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if ENABLED (JERRY_ESNEXT)
|
||||||
/**
|
/**
|
||||||
* Unicode separator character interval starting points from Unicode category: Zs
|
* Character interval starting points for White_Space.
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_unicode_separator_char_interval_sps[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x2000
|
0x2000
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unicode separator character interval lengths from Unicode category: Zs
|
* Character interval lengths for White_Space.
|
||||||
*/
|
*/
|
||||||
static const uint8_t lit_unicode_separator_char_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x000a
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Non-interval characters for White_Space.
|
||||||
|
*/
|
||||||
|
static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x00a0, 0x1680, 0x202f, 0x205f, 0x3000
|
||||||
|
};
|
||||||
|
|
||||||
|
#else /* !ENABLED (JERRY_ESNEXT) */
|
||||||
|
/**
|
||||||
|
* Character interval starting points for White_Space.
|
||||||
|
*/
|
||||||
|
static const uint16_t lit_unicode_white_space_interval_starts[] JERRY_ATTR_CONST_DATA =
|
||||||
|
{
|
||||||
|
0x2000
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character interval lengths for White_Space.
|
||||||
|
*/
|
||||||
|
static const uint8_t lit_unicode_white_space_interval_lengths[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x000b
|
0x000b
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unicode separator characters that are not in the
|
* Non-interval characters for White_Space.
|
||||||
* lit_unicode_separator_char_intervals array.
|
|
||||||
*
|
|
||||||
* Unicode category: Zs
|
|
||||||
*/
|
*/
|
||||||
static const uint16_t lit_unicode_separator_chars[] JERRY_ATTR_CONST_DATA =
|
static const uint16_t lit_unicode_white_space_chars[] JERRY_ATTR_CONST_DATA =
|
||||||
{
|
{
|
||||||
0x1680, 0x180e, 0x202f, 0x205f, 0x3000
|
0x1680, 0x180e, 0x202f, 0x205f, 0x3000
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
let start = 0x10000
|
||||||
|
let end = 0x10FFFF
|
||||||
|
|
||||||
|
const lower_expected = [66560, 66561, 66562, 66563, 66564, 66565, 66566, 66567, 66568, 66569, 66570, 66571, 66572,
|
||||||
|
66573, 66574, 66575, 66576, 66577, 66578, 66579, 66580, 66581, 66582, 66583, 66584, 66585,
|
||||||
|
66586, 66587, 66588, 66589, 66590, 66591, 66592, 66593, 66594, 66595, 66596, 66597, 66598,
|
||||||
|
66599, 66736, 66737, 66738, 66739, 66740, 66741, 66742, 66743, 66744, 66745, 66746, 66747,
|
||||||
|
66748, 66749, 66750, 66751, 66752, 66753, 66754, 66755, 66756, 66757, 66758, 66759, 66760,
|
||||||
|
66761, 66762, 66763, 66764, 66765, 66766, 66767, 66768, 66769, 66770, 66771, 68736, 68737,
|
||||||
|
68738, 68739, 68740, 68741, 68742, 68743, 68744, 68745, 68746, 68747, 68748, 68749, 68750,
|
||||||
|
68751, 68752, 68753, 68754, 68755, 68756, 68757, 68758, 68759, 68760, 68761, 68762, 68763,
|
||||||
|
68764, 68765, 68766, 68767, 68768, 68769, 68770, 68771, 68772, 68773, 68774, 68775, 68776,
|
||||||
|
68777, 68778, 68779, 68780, 68781, 68782, 68783, 68784, 68785, 68786, 71840, 71841, 71842,
|
||||||
|
71843, 71844, 71845, 71846, 71847, 71848, 71849, 71850, 71851, 71852, 71853, 71854, 71855,
|
||||||
|
71856, 71857, 71858, 71859, 71860, 71861, 71862, 71863, 71864, 71865, 71866, 71867, 71868,
|
||||||
|
71869, 71870, 71871, 93760, 93761, 93762, 93763, 93764, 93765, 93766, 93767, 93768, 93769,
|
||||||
|
93770, 93771, 93772, 93773, 93774, 93775, 93776, 93777, 93778, 93779, 93780, 93781, 93782,
|
||||||
|
93783, 93784, 93785, 93786, 93787, 93788, 93789, 93790, 93791, 125184, 125185, 125186, 125187,
|
||||||
|
125188, 125189, 125190, 125191, 125192, 125193, 125194, 125195, 125196, 125197, 125198, 125199,
|
||||||
|
125200, 125201, 125202, 125203, 125204, 125205, 125206, 125207, 125208, 125209, 125210, 125211,
|
||||||
|
125212, 125213, 125214, 125215, 125216, 125217];
|
||||||
|
|
||||||
|
const upper_expected = [66600, 66601, 66602, 66603, 66604, 66605, 66606, 66607, 66608, 66609, 66610, 66611, 66612,
|
||||||
|
66613, 66614, 66615, 66616, 66617, 66618, 66619, 66620, 66621, 66622, 66623, 66624, 66625,
|
||||||
|
66626, 66627, 66628, 66629, 66630, 66631, 66632, 66633, 66634, 66635, 66636, 66637, 66638,
|
||||||
|
66639, 66776, 66777, 66778, 66779, 66780, 66781, 66782, 66783, 66784, 66785, 66786, 66787,
|
||||||
|
66788, 66789, 66790, 66791, 66792, 66793, 66794, 66795, 66796, 66797, 66798, 66799, 66800,
|
||||||
|
66801, 66802, 66803, 66804, 66805, 66806, 66807, 66808, 66809, 66810, 66811, 68800, 68801,
|
||||||
|
68802, 68803, 68804, 68805, 68806, 68807, 68808, 68809, 68810, 68811, 68812, 68813, 68814,
|
||||||
|
68815, 68816, 68817, 68818, 68819, 68820, 68821, 68822, 68823, 68824, 68825, 68826, 68827,
|
||||||
|
68828, 68829, 68830, 68831, 68832, 68833, 68834, 68835, 68836, 68837, 68838, 68839, 68840,
|
||||||
|
68841, 68842, 68843, 68844, 68845, 68846, 68847, 68848, 68849, 68850, 71872, 71873, 71874,
|
||||||
|
71875, 71876, 71877, 71878, 71879, 71880, 71881, 71882, 71883, 71884, 71885, 71886, 71887,
|
||||||
|
71888, 71889, 71890, 71891, 71892, 71893, 71894, 71895, 71896, 71897, 71898, 71899, 71900,
|
||||||
|
71901, 71902, 71903, 93792, 93793, 93794, 93795, 93796, 93797, 93798, 93799, 93800, 93801,
|
||||||
|
93802, 93803, 93804, 93805, 93806, 93807, 93808, 93809, 93810, 93811, 93812, 93813, 93814,
|
||||||
|
93815, 93816, 93817, 93818, 93819, 93820, 93821, 93822, 93823, 125218, 125219, 125220, 125221,
|
||||||
|
125222, 125223, 125224, 125225, 125226, 125227, 125228, 125229, 125230, 125231, 125232, 125233,
|
||||||
|
125234, 125235, 125236, 125237, 125238, 125239, 125240, 125241, 125242, 125243, 125244, 125245,
|
||||||
|
125246, 125247, 125248, 125249, 125250, 125251];
|
||||||
|
|
||||||
|
for (let iter of lower_expected) {
|
||||||
|
let cp = String.fromCodePoint(iter);
|
||||||
|
assert(cp !== cp.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let iter of upper_expected) {
|
||||||
|
let cp = String.fromCodePoint(iter);
|
||||||
|
assert(cp !== cp.toUpperCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
assert("\ud801A".toLowerCase() === "\ud801a");
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
var \u{102C0} = 2;
|
||||||
|
assert(\u{102C0} === 2);
|
||||||
|
|
||||||
|
var o1 = { \u{102C0} : 3 };
|
||||||
|
assert(o1['\ud800\udec0'] === 3);
|
||||||
|
|
||||||
|
var o2 = { '\ud800\udec0' : 4 };
|
||||||
|
assert(o2.\u{102C0} === 4);
|
||||||
|
|
||||||
|
try {
|
||||||
|
eval('var ⸯ');
|
||||||
|
assert(false);
|
||||||
|
} catch(e) {
|
||||||
|
assert(e instanceof SyntaxError);
|
||||||
|
}
|
||||||
|
|
||||||
|
var 𐋀 = 5;
|
||||||
|
assert(𐋀 === 5);
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
|
||||||
|
// we must not do their conversion in JavaScript. We must also ignore
|
||||||
|
// stray surrogates.
|
||||||
|
|
||||||
|
assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
|
||||||
|
assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
|
||||||
@@ -84,13 +84,6 @@ assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toLower
|
|||||||
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase()
|
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase()
|
||||||
== "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
== "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||||
|
|
||||||
// Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
|
|
||||||
// we must not do their conversion in JavaScript. We must also ignore
|
|
||||||
// stray surrogates.
|
|
||||||
|
|
||||||
assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
|
|
||||||
assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
|
|
||||||
|
|
||||||
// Conversion of non-string objects.
|
// Conversion of non-string objects.
|
||||||
|
|
||||||
assert (String.prototype.toUpperCase.call(true) == "TRUE");
|
assert (String.prototype.toUpperCase.call(true) == "TRUE");
|
||||||
|
|||||||
@@ -123,11 +123,7 @@
|
|||||||
<test id="built-ins/String/prototype/normalize/return-normalized-string.js"><reason></reason></test>
|
<test id="built-ins/String/prototype/normalize/return-normalized-string.js"><reason></reason></test>
|
||||||
<test id="built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js"><reason></reason></test>
|
<test id="built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js"><reason></reason></test>
|
||||||
<test id="built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional.js"><reason></reason></test>
|
<test id="built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional.js"><reason></reason></test>
|
||||||
<test id="built-ins/String/prototype/toLocaleLowerCase/supplementary_plane.js"><reason></reason></test>
|
|
||||||
<test id="built-ins/String/prototype/toLocaleUpperCase/supplementary_plane.js"><reason></reason></test>
|
|
||||||
<test id="built-ins/String/prototype/toLowerCase/special_casing_conditional.js"><reason></reason></test>
|
<test id="built-ins/String/prototype/toLowerCase/special_casing_conditional.js"><reason></reason></test>
|
||||||
<test id="built-ins/String/prototype/toLowerCase/supplementary_plane.js"><reason></reason></test>
|
|
||||||
<test id="built-ins/String/prototype/toUpperCase/supplementary_plane.js"><reason></reason></test>
|
|
||||||
<test id="intl402/6.2.2_a.js"><reason></reason></test>
|
<test id="intl402/6.2.2_a.js"><reason></reason></test>
|
||||||
<test id="intl402/6.2.2_b.js"><reason></reason></test>
|
<test id="intl402/6.2.2_b.js"><reason></reason></test>
|
||||||
<test id="intl402/6.2.2_c.js"><reason></reason></test>
|
<test id="intl402/6.2.2_c.js"><reason></reason></test>
|
||||||
@@ -377,4 +373,16 @@
|
|||||||
<test id="language/statements/generators/prototype-value.js"><reason></reason></test>
|
<test id="language/statements/generators/prototype-value.js"><reason></reason></test>
|
||||||
<test id="language/statements/let/syntax/identifier-let-disallowed-as-boundname.js"><reason></reason></test>
|
<test id="language/statements/let/syntax/identifier-let-disallowed-as-boundname.js"><reason></reason></test>
|
||||||
<test id="language/statements/try/S12.14_A16_T4.js"><reason>ES2019 change: catch without parameter is allowed</reason></test>
|
<test id="language/statements/try/S12.14_A16_T4.js"><reason>ES2019 change: catch without parameter is allowed</reason></test>
|
||||||
|
<test id="built-ins/Number/S9.3.1_A2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/Number/S9.3.1_A3_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/Number/S9.3.1_A3_T2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/RegExp/S15.10.2.12_A1_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/RegExp/S15.10.2.12_A2_T1.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/String/prototype/trim/15.5.4.20-3-2.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/String/prototype/trim/15.5.4.20-3-3.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/String/prototype/trim/15.5.4.20-3-4.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/String/prototype/trim/15.5.4.20-3-5.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/String/prototype/trim/15.5.4.20-3-6.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/parseFloat/S15.1.2.3_A2_T10.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
|
<test id="built-ins/parseInt/S15.1.2.2_A2_T10.js"><reason>Unicode 13: 0x180E is no longer whitespace character</reason></test>
|
||||||
</excludeList>
|
</excludeList>
|
||||||
|
|||||||
+298
-367
@@ -17,10 +17,10 @@
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import bisect
|
|
||||||
import csv
|
import csv
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from gen_c_source import LICENSE, format_code
|
from gen_c_source import LICENSE, format_code
|
||||||
@@ -28,268 +28,286 @@ from settings import PROJECT_DIR
|
|||||||
|
|
||||||
|
|
||||||
RANGES_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h')
|
RANGES_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges.inc.h')
|
||||||
|
RANGES_SUP_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-ranges-sup.inc.h')
|
||||||
CONVERSIONS_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h')
|
CONVERSIONS_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions.inc.h')
|
||||||
|
CONVERSIONS_SUP_C_SOURCE = os.path.join(PROJECT_DIR, 'jerry-core/lit/lit-unicode-conversions-sup.inc.h')
|
||||||
|
|
||||||
|
UNICODE_PLANE_TYPE_BASIC = 0
|
||||||
|
UNICODE_PLANE_TYPE_SUPPLEMENTARY = 1
|
||||||
|
|
||||||
|
# For ES5.1 profile we use a predefined subset of whitespace characters
|
||||||
|
ES5_1_WHITE_SPACE_UNITS = [0x1680, 0x180e]
|
||||||
|
ES5_1_WHITE_SPACE_UNITS.extend(range(0x2000, 0x200c))
|
||||||
|
ES5_1_WHITE_SPACE_UNITS.extend([0x202f, 0x205f, 0x3000])
|
||||||
|
|
||||||
# common code generation
|
# common code generation
|
||||||
|
|
||||||
|
class UnicodeBasicSource(object):
|
||||||
|
# pylint: disable=too-many-instance-attributes
|
||||||
|
def __init__(self, filepath, character_type="uint16_t", length_type="uint8_t"):
|
||||||
|
self._filepath = filepath
|
||||||
|
self._header = [LICENSE, ""]
|
||||||
|
self._data = []
|
||||||
|
self._table_name_suffix = ""
|
||||||
|
self.character_type = character_type
|
||||||
|
self.length_type = length_type
|
||||||
|
|
||||||
class UniCodeSource(object):
|
self._range_table_types = [self.character_type,
|
||||||
def __init__(self, filepath):
|
self.length_type,
|
||||||
self.__filepath = filepath
|
self.character_type]
|
||||||
self.__header = [LICENSE, ""]
|
self._range_table_names = ["interval_starts",
|
||||||
self.__data = []
|
"interval_lengths",
|
||||||
|
"chars"]
|
||||||
|
self._range_table_descriptions = ["Character interval starting points for",
|
||||||
|
"Character interval lengths for",
|
||||||
|
"Non-interval characters for"]
|
||||||
|
|
||||||
|
self._conversion_range_types = [self.character_type,
|
||||||
|
self.length_type]
|
||||||
|
self._conversion_range_names = ["ranges",
|
||||||
|
"range_lengths"]
|
||||||
|
|
||||||
def complete_header(self, completion):
|
def complete_header(self, completion):
|
||||||
self.__header.append(completion)
|
self._header.append(completion)
|
||||||
self.__header.append("") # for an extra empty line
|
self._header.append("") # for an extra empty line
|
||||||
|
|
||||||
def add_table(self, table, table_name, table_type, table_descr):
|
def add_whitepace_range(self, category, categorizer, units):
|
||||||
self.__data.append(table_descr)
|
self._data.append("#if ENABLED (JERRY_ESNEXT)")
|
||||||
self.__data.append("static const %s lit_%s[] JERRY_ATTR_CONST_DATA =" % (table_type, table_name))
|
self.add_range(category, categorizer.create_tables(units))
|
||||||
self.__data.append("{")
|
self._data.append("#else /* !ENABLED (JERRY_ESNEXT) */")
|
||||||
self.__data.append(format_code(table, 1))
|
self.add_range(category, categorizer.create_tables(ES5_1_WHITE_SPACE_UNITS))
|
||||||
self.__data.append("};")
|
self._data.append("#endif /* ENABLED (JERRY_ESNEXT) */\n")
|
||||||
self.__data.append("") # for an extra empty line
|
|
||||||
|
def add_range(self, category, tables):
|
||||||
|
idx = 0
|
||||||
|
for table in tables:
|
||||||
|
self.add_table(table,
|
||||||
|
"/**\n * %s %s.\n */" % (self._range_table_descriptions[idx], category),
|
||||||
|
self._range_table_types[idx],
|
||||||
|
category,
|
||||||
|
self._range_table_names[idx])
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
def add_conversion_range(self, category, tables, descriptions):
|
||||||
|
self.add_named_conversion_range(category, tables, self._conversion_range_names, descriptions)
|
||||||
|
|
||||||
|
def add_named_conversion_range(self, category, tables, table_names, descriptions):
|
||||||
|
idx = 0
|
||||||
|
for table in tables:
|
||||||
|
self.add_table(table,
|
||||||
|
descriptions[idx],
|
||||||
|
self._conversion_range_types[idx],
|
||||||
|
category,
|
||||||
|
table_names[idx])
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
def add_table(self, table, description, table_type, category, table_name):
|
||||||
|
if table and sum(table) != 0:
|
||||||
|
self._data.append(description)
|
||||||
|
self._data.append("static const %s lit_unicode_%s%s%s[] JERRY_ATTR_CONST_DATA ="
|
||||||
|
% (table_type,
|
||||||
|
category.lower(),
|
||||||
|
"_" + table_name if table_name else "",
|
||||||
|
self._table_name_suffix))
|
||||||
|
self._data.append("{")
|
||||||
|
self._data.append(format_code(table, 1, 6 if self._table_name_suffix else 4))
|
||||||
|
self._data.append("};")
|
||||||
|
self._data.append("") # for an extra empty line
|
||||||
|
|
||||||
def generate(self):
|
def generate(self):
|
||||||
with open(self.__filepath, 'w') as generated_source:
|
with open(self._filepath, 'w') as generated_source:
|
||||||
generated_source.write("\n".join(self.__header))
|
generated_source.write("\n".join(self._header))
|
||||||
generated_source.write("\n".join(self.__data))
|
generated_source.write("\n".join(self._data))
|
||||||
|
|
||||||
class UnicodeCategorizer(object):
|
|
||||||
|
class UnicodeSupplementarySource(UnicodeBasicSource):
|
||||||
|
def __init__(self, filepath):
|
||||||
|
UnicodeBasicSource.__init__(self, filepath, "uint32_t", "uint16_t")
|
||||||
|
self._table_name_suffix = "_sup"
|
||||||
|
|
||||||
|
def add_whitepace_range(self, category, categorizer, units):
|
||||||
|
self.add_range(category, categorizer.create_tables(units))
|
||||||
|
|
||||||
|
class UnicodeBasicCategorizer(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# unicode categories: Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs
|
self._length_limit = 0xff
|
||||||
# Co Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
|
self.extra_id_continue_units = set([0x200C, 0x200D])
|
||||||
# letter: Lu Ll Lt Lm Lo Nl
|
|
||||||
# non-letter-indent-part:
|
|
||||||
# digit: Nd
|
|
||||||
# punctuation mark: Mn Mc
|
|
||||||
# connector punctuation: Pc
|
|
||||||
# separators: Zs
|
|
||||||
self._unicode_categories = {
|
|
||||||
'letters_category' : ["Lu", "Ll", "Lt", "Lm", "Lo", "Nl"],
|
|
||||||
'non_letters_category' : ["Nd", "Mn", "Mc", "Pc"],
|
|
||||||
'separators_category' : ["Zs"]
|
|
||||||
}
|
|
||||||
|
|
||||||
self._categories = {
|
#pylint: disable=no-self-use
|
||||||
'letters' : [],
|
def in_range(self, i):
|
||||||
'non_letters' : [],
|
return i >= 0x80 and i < 0x10000
|
||||||
'separators' : []
|
|
||||||
}
|
|
||||||
|
|
||||||
def _store_by_category(self, unicode_id, category):
|
def _group_ranges(self, units):
|
||||||
"""
|
|
||||||
Store the given unicode_id by its category
|
|
||||||
"""
|
|
||||||
for target_category in self._categories:
|
|
||||||
if category in self._unicode_categories[target_category + '_category']:
|
|
||||||
self._categories[target_category].append(unicode_id)
|
|
||||||
|
|
||||||
def read_categories(self, unicode_data_file):
|
|
||||||
"""
|
|
||||||
Read the corresponding unicode values and store them in category lists.
|
|
||||||
|
|
||||||
:return: List of letters, non_letter and separators.
|
|
||||||
"""
|
|
||||||
|
|
||||||
range_start_id = 0
|
|
||||||
|
|
||||||
with open(unicode_data_file) as unicode_data:
|
|
||||||
for line in csv.reader(unicode_data, delimiter=';'):
|
|
||||||
unicode_id = int(line[0], 16)
|
|
||||||
|
|
||||||
# Skip supplementary planes and ascii chars
|
|
||||||
if unicode_id >= 0x10000 or unicode_id < 128:
|
|
||||||
continue
|
|
||||||
|
|
||||||
category = line[2]
|
|
||||||
|
|
||||||
if range_start_id != 0:
|
|
||||||
while range_start_id <= unicode_id:
|
|
||||||
self._store_by_category(range_start_id, category)
|
|
||||||
range_start_id += 1
|
|
||||||
range_start_id = 0
|
|
||||||
continue
|
|
||||||
|
|
||||||
if line[1].startswith('<'):
|
|
||||||
# Save the start position of the range
|
|
||||||
range_start_id = unicode_id
|
|
||||||
|
|
||||||
self._store_by_category(unicode_id, category)
|
|
||||||
|
|
||||||
# This separator char is handled separatly
|
|
||||||
separators = self._categories['separators']
|
|
||||||
non_breaking_space = 0x00A0
|
|
||||||
if non_breaking_space in separators:
|
|
||||||
separators.remove(int(non_breaking_space))
|
|
||||||
|
|
||||||
# These separator chars are not in the unicode data file or not in Zs category
|
|
||||||
mongolian_vowel_separator = 0x180E
|
|
||||||
medium_mathematical_space = 0x205F
|
|
||||||
zero_width_space = 0x200B
|
|
||||||
|
|
||||||
if mongolian_vowel_separator not in separators:
|
|
||||||
bisect.insort(separators, int(mongolian_vowel_separator))
|
|
||||||
if medium_mathematical_space not in separators:
|
|
||||||
bisect.insort(separators, int(medium_mathematical_space))
|
|
||||||
if zero_width_space not in separators:
|
|
||||||
bisect.insort(separators, int(zero_width_space))
|
|
||||||
|
|
||||||
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.1 format-control characters
|
|
||||||
non_letters = self._categories['non_letters']
|
|
||||||
zero_width_non_joiner = 0x200C
|
|
||||||
zero_width_joiner = 0x200D
|
|
||||||
|
|
||||||
bisect.insort(non_letters, int(zero_width_non_joiner))
|
|
||||||
bisect.insort(non_letters, int(zero_width_joiner))
|
|
||||||
|
|
||||||
return self._categories['letters'], self._categories['non_letters'], self._categories['separators']
|
|
||||||
|
|
||||||
|
|
||||||
def group_ranges(i):
|
|
||||||
"""
|
"""
|
||||||
Convert an increasing list of integers into a range list
|
Convert an increasing list of integers into a range list
|
||||||
|
|
||||||
:return: List of ranges.
|
:return: List of ranges.
|
||||||
"""
|
"""
|
||||||
for _, group in itertools.groupby(enumerate(i), lambda q: (q[1] - q[0])):
|
for _, group in itertools.groupby(enumerate(units), lambda q: (q[1] - q[0])):
|
||||||
group = list(group)
|
group = list(group)
|
||||||
yield group[0][1], group[-1][1]
|
yield group[0][1], group[-1][1]
|
||||||
|
|
||||||
|
def create_tables(self, units):
|
||||||
def split_list(category_list):
|
|
||||||
"""
|
"""
|
||||||
Split list of ranges into intervals and single char lists.
|
Split list of ranges into intervals and single char lists.
|
||||||
|
:return: A tuple containing the following info:
|
||||||
:return: List of interval starting points, interval lengths and single chars
|
- list of interval starting points
|
||||||
|
- list of interval lengths
|
||||||
|
- list of single chars
|
||||||
"""
|
"""
|
||||||
|
|
||||||
interval_sps = []
|
interval_sps = []
|
||||||
interval_lengths = []
|
interval_lengths = []
|
||||||
chars = []
|
chars = []
|
||||||
|
|
||||||
for element in category_list:
|
for element in self._group_ranges(units):
|
||||||
interval_length = element[1] - element[0]
|
interval_length = element[1] - element[0]
|
||||||
if interval_length == 0:
|
if interval_length == 0:
|
||||||
chars.append(element[0])
|
chars.append(element[0])
|
||||||
elif interval_length > 255:
|
elif interval_length > self._length_limit:
|
||||||
for i in range(element[0], element[1], 256):
|
for i in range(element[0], element[1], self._length_limit + 1):
|
||||||
length = 255 if (element[1] - i > 255) else (element[1] - i)
|
length = min(self._length_limit, element[1] - i)
|
||||||
interval_sps.append(i)
|
interval_sps.append(i)
|
||||||
interval_lengths.append(length)
|
interval_lengths.append(length)
|
||||||
else:
|
else:
|
||||||
interval_sps.append(element[0])
|
interval_sps.append(element[0])
|
||||||
interval_lengths.append(element[1] - element[0])
|
interval_lengths.append(interval_length)
|
||||||
|
|
||||||
return interval_sps, interval_lengths, chars
|
return interval_sps, interval_lengths, chars
|
||||||
|
|
||||||
|
def read_units(self, file_path, categories, subcategories=None):
|
||||||
|
"""
|
||||||
|
Read the Unicode Derived Core Properties file and extract the ranges
|
||||||
|
for the given categories.
|
||||||
|
|
||||||
def generate_ranges(script_args):
|
:param file_path: Path to the Unicode "DerivedCoreProperties.txt" file.
|
||||||
categorizer = UnicodeCategorizer()
|
:param categories: A list of category strings to extract from the Unicode file.
|
||||||
letters, non_letters, separators = categorizer.read_categories(script_args.unicode_data)
|
:param subcategories: A list of subcategory strings to restrict categories.
|
||||||
|
:return: A dictionary each string from the :param categories: is a key and for each
|
||||||
|
key list of code points are stored.
|
||||||
|
"""
|
||||||
|
# Create a dictionary in the format: { category[0]: [ ], ..., category[N]: [ ] }
|
||||||
|
units = {}
|
||||||
|
for category in categories:
|
||||||
|
units[category] = []
|
||||||
|
|
||||||
letter_tables = split_list(list(group_ranges(letters)))
|
# Formats to match:
|
||||||
non_letter_tables = split_list(list(group_ranges(non_letters)))
|
# <HEX> ; <category> #
|
||||||
separator_tables = split_list(list(group_ranges(separators)))
|
# <HEX>..<HEX> ; <category> # <subcategory>
|
||||||
|
matcher = r"(?P<start>[\dA-F]+)(?:\.\.(?P<end>[\dA-F]+))?\s+; (?P<category>[\w]+) # (?P<subcategory>[\w&]{2})"
|
||||||
|
|
||||||
c_source = UniCodeSource(RANGES_C_SOURCE)
|
with open(file_path, "r") as src_file:
|
||||||
|
for line in src_file:
|
||||||
|
match = re.match(matcher, line)
|
||||||
|
|
||||||
|
if (match
|
||||||
|
and match.group("category") in categories
|
||||||
|
and (not subcategories or match.group("subcategory") in subcategories)):
|
||||||
|
start = int(match.group("start"), 16)
|
||||||
|
# if no "end" found use the "start"
|
||||||
|
end = int(match.group("end") or match.group("start"), 16)
|
||||||
|
|
||||||
|
matching_code_points = [
|
||||||
|
code_point for code_point in range(start, end + 1) if self.in_range(code_point)
|
||||||
|
]
|
||||||
|
|
||||||
|
units[match.group("category")].extend(matching_code_points)
|
||||||
|
|
||||||
|
return units
|
||||||
|
|
||||||
|
def read_case_mappings(self, unicode_data_file, special_casing_file):
|
||||||
|
"""
|
||||||
|
Read the corresponding unicode values of lower and upper case letters and store these in tables.
|
||||||
|
|
||||||
|
:param unicode_data_file: Contains the default case mappings (one-to-one mappings).
|
||||||
|
:param special_casing_file: Contains additional informative case mappings that are either not one-to-one
|
||||||
|
or which are context-sensitive.
|
||||||
|
:return: Upper and lower case mappings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
lower_case_mapping = {}
|
||||||
|
upper_case_mapping = {}
|
||||||
|
|
||||||
|
# Add one-to-one mappings
|
||||||
|
with open(unicode_data_file) as unicode_data:
|
||||||
|
reader = csv.reader(unicode_data, delimiter=';')
|
||||||
|
|
||||||
|
for line in reader:
|
||||||
|
letter_id = int(line[0], 16)
|
||||||
|
|
||||||
|
if not self.in_range(letter_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
capital_letter = line[12]
|
||||||
|
small_letter = line[13]
|
||||||
|
|
||||||
|
if capital_letter:
|
||||||
|
upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter)
|
||||||
|
|
||||||
|
if small_letter:
|
||||||
|
lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter)
|
||||||
|
|
||||||
|
# Update the conversion tables with the special cases
|
||||||
|
with open(special_casing_file) as special_casing:
|
||||||
|
reader = csv.reader(special_casing, delimiter=';')
|
||||||
|
|
||||||
|
for line in reader:
|
||||||
|
# Skip comment sections and empty lines
|
||||||
|
if not line or line[0].startswith('#'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Replace '#' character with empty string
|
||||||
|
for idx, fragment in enumerate(line):
|
||||||
|
if fragment.find('#') >= 0:
|
||||||
|
line[idx] = ''
|
||||||
|
|
||||||
|
letter_id = int(line[0], 16)
|
||||||
|
condition_list = line[4]
|
||||||
|
|
||||||
|
if not self.in_range(letter_id) or condition_list:
|
||||||
|
continue
|
||||||
|
|
||||||
|
small_letter = parse_unicode_sequence(line[1])
|
||||||
|
capital_letter = parse_unicode_sequence(line[3])
|
||||||
|
|
||||||
|
lower_case_mapping[letter_id] = small_letter
|
||||||
|
upper_case_mapping[letter_id] = capital_letter
|
||||||
|
|
||||||
|
return lower_case_mapping, upper_case_mapping
|
||||||
|
|
||||||
|
class UnicodeSupplementaryCategorizer(UnicodeBasicCategorizer):
|
||||||
|
def __init__(self):
|
||||||
|
UnicodeBasicCategorizer.__init__(self)
|
||||||
|
self._length_limit = 0xffff
|
||||||
|
self.extra_id_continue_units = set()
|
||||||
|
|
||||||
|
def in_range(self, i):
|
||||||
|
return i >= 0x10000
|
||||||
|
|
||||||
|
def generate_ranges(script_args, plane_type):
|
||||||
|
if plane_type == UNICODE_PLANE_TYPE_SUPPLEMENTARY:
|
||||||
|
c_source = UnicodeSupplementarySource(RANGES_SUP_C_SOURCE)
|
||||||
|
categorizer = UnicodeSupplementaryCategorizer()
|
||||||
|
else:
|
||||||
|
c_source = UnicodeBasicSource(RANGES_C_SOURCE)
|
||||||
|
categorizer = UnicodeBasicCategorizer()
|
||||||
|
|
||||||
header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__),
|
header_completion = ["/* This file is automatically generated by the %s script" % os.path.basename(__file__),
|
||||||
" * from %s. Do not edit! */" % os.path.basename(script_args.unicode_data),
|
" * from %s. Do not edit! */" % os.path.basename(script_args.derived_core_properties),
|
||||||
""]
|
""]
|
||||||
|
|
||||||
c_source.complete_header("\n".join(header_completion))
|
c_source.complete_header("\n".join(header_completion))
|
||||||
|
|
||||||
c_source.add_table(letter_tables[0],
|
units = categorizer.read_units(script_args.derived_core_properties, ["ID_Start", "ID_Continue"])
|
||||||
"unicode_letter_interval_sps",
|
|
||||||
"uint16_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Character interval starting points for the unicode letters.\n"
|
|
||||||
" *\n"
|
|
||||||
" * The characters covered by these intervals are from\n"
|
|
||||||
" * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(letter_tables[1],
|
units["ID_Continue"] = sorted(set(units["ID_Continue"]).union(categorizer.extra_id_continue_units)
|
||||||
"unicode_letter_interval_lengths",
|
- set(units["ID_Start"]))
|
||||||
"uint8_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Character lengths for the unicode letters.\n"
|
|
||||||
" *\n"
|
|
||||||
" * The characters covered by these intervals are from\n"
|
|
||||||
" * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(letter_tables[2],
|
for category, unit in units.items():
|
||||||
"unicode_letter_chars",
|
c_source.add_range(category, categorizer.create_tables(unit))
|
||||||
"uint16_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Those unicode letter characters that are not inside any of\n"
|
|
||||||
" * the intervals specified in lit_unicode_letter_interval_sps array.\n"
|
|
||||||
" *\n"
|
|
||||||
" * The characters are from the following Unicode categories:\n"
|
|
||||||
" * Lu, Ll, Lt, Lm, Lo, Nl\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(non_letter_tables[0],
|
white_space_units = categorizer.read_units(script_args.prop_list, ["White_Space"], ["Zs"])["White_Space"]
|
||||||
"unicode_non_letter_ident_part_interval_sps",
|
|
||||||
"uint16_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Character interval starting points for non-letter character\n"
|
|
||||||
" * that can be used as a non-first character of an identifier.\n"
|
|
||||||
" *\n"
|
|
||||||
" * The characters covered by these intervals are from\n"
|
|
||||||
" * the following Unicode categories: Nd, Mn, Mc, Pc\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(non_letter_tables[1],
|
c_source.add_whitepace_range("White_Space", categorizer, white_space_units)
|
||||||
"unicode_non_letter_ident_part_interval_lengths",
|
|
||||||
"uint8_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Character interval lengths for non-letter character\n"
|
|
||||||
" * that can be used as a non-first character of an identifier.\n"
|
|
||||||
" *\n"
|
|
||||||
" * The characters covered by these intervals are from\n"
|
|
||||||
" * the following Unicode categories: Nd, Mn, Mc, Pc\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(non_letter_tables[2],
|
|
||||||
"unicode_non_letter_ident_part_chars",
|
|
||||||
"uint16_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Those non-letter characters that can be used as a non-first\n"
|
|
||||||
" * character of an identifier and not included in any of the intervals\n"
|
|
||||||
" * specified in lit_unicode_non_letter_ident_part_interval_sps array.\n"
|
|
||||||
" *\n"
|
|
||||||
" * The characters are from the following Unicode categories:\n"
|
|
||||||
" * Nd, Mn, Mc, Pc\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(separator_tables[0],
|
|
||||||
"unicode_separator_char_interval_sps",
|
|
||||||
"uint16_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Unicode separator character interval starting points from Unicode category: Zs\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(separator_tables[1],
|
|
||||||
"unicode_separator_char_interval_lengths",
|
|
||||||
"uint8_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Unicode separator character interval lengths from Unicode category: Zs\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.add_table(separator_tables[2],
|
|
||||||
"unicode_separator_chars",
|
|
||||||
"uint16_t",
|
|
||||||
("/**\n"
|
|
||||||
" * Unicode separator characters that are not in the\n"
|
|
||||||
" * lit_unicode_separator_char_intervals array.\n"
|
|
||||||
" *\n"
|
|
||||||
" * Unicode category: Zs\n"
|
|
||||||
" */"))
|
|
||||||
|
|
||||||
c_source.generate()
|
c_source.generate()
|
||||||
|
|
||||||
@@ -320,70 +338,6 @@ def parse_unicode_sequence(raw_data):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def read_case_mappings(unicode_data_file, special_casing_file):
|
|
||||||
"""
|
|
||||||
Read the corresponding unicode values of lower and upper case letters and store these in tables.
|
|
||||||
|
|
||||||
:param unicode_data_file: Contains the default case mappings (one-to-one mappings).
|
|
||||||
:param special_casing_file: Contains additional informative case mappings that are either not one-to-one
|
|
||||||
or which are context-sensitive.
|
|
||||||
:return: Upper and lower case mappings.
|
|
||||||
"""
|
|
||||||
|
|
||||||
lower_case_mapping = {}
|
|
||||||
upper_case_mapping = {}
|
|
||||||
|
|
||||||
# Add one-to-one mappings
|
|
||||||
with open(unicode_data_file) as unicode_data:
|
|
||||||
unicode_data_reader = csv.reader(unicode_data, delimiter=';')
|
|
||||||
|
|
||||||
for line in unicode_data_reader:
|
|
||||||
letter_id = int(line[0], 16)
|
|
||||||
|
|
||||||
# Skip supplementary planes and ascii chars
|
|
||||||
if letter_id >= 0x10000 or letter_id < 128:
|
|
||||||
continue
|
|
||||||
|
|
||||||
capital_letter = line[12]
|
|
||||||
small_letter = line[13]
|
|
||||||
|
|
||||||
if capital_letter:
|
|
||||||
upper_case_mapping[letter_id] = parse_unicode_sequence(capital_letter)
|
|
||||||
|
|
||||||
if small_letter:
|
|
||||||
lower_case_mapping[letter_id] = parse_unicode_sequence(small_letter)
|
|
||||||
|
|
||||||
# Update the conversion tables with the special cases
|
|
||||||
with open(special_casing_file) as special_casing:
|
|
||||||
special_casing_reader = csv.reader(special_casing, delimiter=';')
|
|
||||||
|
|
||||||
for line in special_casing_reader:
|
|
||||||
# Skip comment sections and empty lines
|
|
||||||
if not line or line[0].startswith('#'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Replace '#' character with empty string
|
|
||||||
for idx, i in enumerate(line):
|
|
||||||
if i.find('#') >= 0:
|
|
||||||
line[idx] = ''
|
|
||||||
|
|
||||||
letter_id = int(line[0], 16)
|
|
||||||
condition_list = line[4]
|
|
||||||
|
|
||||||
# Skip supplementary planes, ascii chars, and condition_list
|
|
||||||
if letter_id >= 0x10000 or letter_id < 128 or condition_list:
|
|
||||||
continue
|
|
||||||
|
|
||||||
small_letter = parse_unicode_sequence(line[1])
|
|
||||||
capital_letter = parse_unicode_sequence(line[3])
|
|
||||||
|
|
||||||
lower_case_mapping[letter_id] = small_letter
|
|
||||||
upper_case_mapping[letter_id] = capital_letter
|
|
||||||
|
|
||||||
return lower_case_mapping, upper_case_mapping
|
|
||||||
|
|
||||||
|
|
||||||
def extract_ranges(letter_case, reverse_letter_case=None):
|
def extract_ranges(letter_case, reverse_letter_case=None):
|
||||||
"""
|
"""
|
||||||
Extract ranges from case mappings
|
Extract ranges from case mappings
|
||||||
@@ -675,27 +629,13 @@ def calculate_conversion_distance(letter_case, letter_id):
|
|||||||
return ord(letter_case[letter_id]) - letter_id
|
return ord(letter_case[letter_id]) - letter_id
|
||||||
|
|
||||||
|
|
||||||
def generate_conversions(script_args):
|
def generate_conversions(script_args, plane_type):
|
||||||
# Read the corresponding unicode values of lower and upper case letters and store these in tables
|
if plane_type == UNICODE_PLANE_TYPE_SUPPLEMENTARY:
|
||||||
case_mappings = read_case_mappings(script_args.unicode_data, script_args.special_casing)
|
c_source = UnicodeSupplementarySource(CONVERSIONS_SUP_C_SOURCE)
|
||||||
lower_case = case_mappings[0]
|
categorizer = UnicodeSupplementaryCategorizer()
|
||||||
upper_case = case_mappings[1]
|
else:
|
||||||
|
c_source = UnicodeBasicSource(CONVERSIONS_C_SOURCE)
|
||||||
character_case_ranges = extract_ranges(lower_case, upper_case)
|
categorizer = UnicodeBasicCategorizer()
|
||||||
character_pair_ranges = extract_character_pair_ranges(lower_case, upper_case)
|
|
||||||
character_pairs = extract_character_pairs(lower_case, upper_case)
|
|
||||||
upper_case_special_ranges = extract_special_ranges(upper_case)
|
|
||||||
lower_case_ranges = extract_ranges(lower_case)
|
|
||||||
lower_case_conversions = extract_conversions(lower_case)
|
|
||||||
upper_case_conversions = extract_conversions(upper_case)
|
|
||||||
|
|
||||||
if lower_case:
|
|
||||||
warnings.warn('Not all elements extracted from the lowercase table!')
|
|
||||||
if upper_case:
|
|
||||||
warnings.warn('Not all elements extracted from the uppercase table!')
|
|
||||||
|
|
||||||
# Generate conversions output
|
|
||||||
c_source = UniCodeSource(CONVERSIONS_C_SOURCE)
|
|
||||||
|
|
||||||
unicode_file = os.path.basename(script_args.unicode_data)
|
unicode_file = os.path.basename(script_args.unicode_data)
|
||||||
spec_casing_file = os.path.basename(script_args.special_casing)
|
spec_casing_file = os.path.basename(script_args.special_casing)
|
||||||
@@ -706,75 +646,58 @@ def generate_conversions(script_args):
|
|||||||
|
|
||||||
c_source.complete_header("\n".join(header_completion))
|
c_source.complete_header("\n".join(header_completion))
|
||||||
|
|
||||||
c_source.add_table(character_case_ranges[0],
|
# Read the corresponding unicode values of lower and upper case letters and store these in tables
|
||||||
"character_case_ranges",
|
lower_case, upper_case = categorizer.read_case_mappings(script_args.unicode_data, script_args.special_casing)
|
||||||
"uint16_t",
|
|
||||||
("/* Contains start points of character case ranges "
|
|
||||||
"(these are bidirectional conversions). */"))
|
|
||||||
|
|
||||||
c_source.add_table(character_case_ranges[1],
|
c_source.add_conversion_range("character_case",
|
||||||
"character_case_range_lengths",
|
extract_ranges(lower_case, upper_case),
|
||||||
"uint8_t",
|
[("/* Contains start points of character case ranges "
|
||||||
"/* Interval lengths of start points in `character_case_ranges` table. */")
|
"(these are bidirectional conversions). */"),
|
||||||
|
"/* Interval lengths of start points in `character_case_ranges` table. */"])
|
||||||
|
c_source.add_conversion_range("character_pair",
|
||||||
|
extract_character_pair_ranges(lower_case, upper_case),
|
||||||
|
["/* Contains the start points of bidirectional conversion ranges. */",
|
||||||
|
"/* Interval lengths of start points in `character_pair_ranges` table. */"])
|
||||||
|
|
||||||
c_source.add_table(character_pair_ranges[0],
|
c_source.add_table(extract_character_pairs(lower_case, upper_case),
|
||||||
"character_pair_ranges",
|
"/* Contains lower/upper case bidirectional conversion pairs. */",
|
||||||
"uint16_t",
|
c_source.character_type,
|
||||||
"/* Contains the start points of bidirectional conversion ranges. */")
|
|
||||||
|
|
||||||
c_source.add_table(character_pair_ranges[1],
|
|
||||||
"character_pair_range_lengths",
|
|
||||||
"uint8_t",
|
|
||||||
"/* Interval lengths of start points in `character_pair_ranges` table. */")
|
|
||||||
|
|
||||||
c_source.add_table(character_pairs,
|
|
||||||
"character_pairs",
|
"character_pairs",
|
||||||
"uint16_t",
|
"")
|
||||||
"/* Contains lower/upper case bidirectional conversion pairs. */")
|
|
||||||
|
|
||||||
c_source.add_table(upper_case_special_ranges[0],
|
c_source.add_conversion_range("upper_case_special",
|
||||||
"upper_case_special_ranges",
|
extract_special_ranges(upper_case),
|
||||||
"uint16_t",
|
[("/* Contains start points of one-to-two uppercase ranges where the "
|
||||||
("/* Contains start points of one-to-two uppercase ranges where the second character\n"
|
"second character\n"
|
||||||
" * is always the same.\n"
|
" * is always the same.\n"
|
||||||
" */"))
|
" */"),
|
||||||
|
"/* Interval lengths for start points in `upper_case_special_ranges` table. */"])
|
||||||
|
|
||||||
c_source.add_table(upper_case_special_ranges[1],
|
c_source.add_conversion_range("lower_case",
|
||||||
"upper_case_special_range_lengths",
|
extract_ranges(lower_case),
|
||||||
"uint8_t",
|
["/* Contains start points of lowercase ranges. */",
|
||||||
"/* Interval lengths for start points in `upper_case_special_ranges` table. */")
|
"/* Interval lengths for start points in `lower_case_ranges` table. */"])
|
||||||
|
|
||||||
c_source.add_table(lower_case_ranges[0],
|
c_source.add_named_conversion_range("lower_case",
|
||||||
"lower_case_ranges",
|
extract_conversions(lower_case),
|
||||||
"uint16_t",
|
["conversions", "conversion_counters"],
|
||||||
"/* Contains start points of lowercase ranges. */")
|
[("/* The remaining lowercase conversions. The lowercase variant can "
|
||||||
|
"be one-to-three character long. */"),
|
||||||
|
("/* Number of one-to-one, one-to-two, and one-to-three lowercase "
|
||||||
|
"conversions. */")])
|
||||||
|
|
||||||
c_source.add_table(lower_case_ranges[1],
|
c_source.add_named_conversion_range("upper_case",
|
||||||
"lower_case_range_lengths",
|
extract_conversions(upper_case),
|
||||||
"uint8_t",
|
["conversions", "conversion_counters"],
|
||||||
"/* Interval lengths for start points in `lower_case_ranges` table. */")
|
[("/* The remaining uppercase conversions. The uppercase variant can "
|
||||||
|
"be one-to-three character long. */"),
|
||||||
|
("/* Number of one-to-one, one-to-two, and one-to-three uppercase "
|
||||||
|
"conversions. */")])
|
||||||
|
|
||||||
c_source.add_table(lower_case_conversions[0],
|
if lower_case:
|
||||||
"lower_case_conversions",
|
warnings.warn('Not all elements extracted from the lowercase table!')
|
||||||
"uint16_t",
|
if upper_case:
|
||||||
("/* The remaining lowercase conversions. The lowercase variant can "
|
warnings.warn('Not all elements extracted from the uppercase table!')
|
||||||
"be one-to-three character long. */"))
|
|
||||||
|
|
||||||
c_source.add_table(lower_case_conversions[1],
|
|
||||||
"lower_case_conversion_counters",
|
|
||||||
"uint8_t",
|
|
||||||
"/* Number of one-to-one, one-to-two, and one-to-three lowercase conversions. */")
|
|
||||||
|
|
||||||
c_source.add_table(upper_case_conversions[0],
|
|
||||||
"upper_case_conversions",
|
|
||||||
"uint16_t",
|
|
||||||
("/* The remaining uppercase conversions. The uppercase variant can "
|
|
||||||
"be one-to-three character long. */"))
|
|
||||||
|
|
||||||
c_source.add_table(upper_case_conversions[1],
|
|
||||||
"upper_case_conversion_counters",
|
|
||||||
"uint8_t",
|
|
||||||
"/* Number of one-to-one, one-to-two, and one-to-three uppercase conversions. */")
|
|
||||||
|
|
||||||
c_source.generate()
|
c_source.generate()
|
||||||
|
|
||||||
@@ -783,29 +706,37 @@ def generate_conversions(script_args):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description='lit-unicode-{conversions,ranges}.inc.h generator',
|
parser = argparse.ArgumentParser(description='lit-unicode-{conversions,ranges}-{sup}.inc.h generator',
|
||||||
epilog='''
|
epilog='''
|
||||||
The input files (UnicodeData.txt, SpecialCasing.txt)
|
The input files:
|
||||||
|
- UnicodeData.txt
|
||||||
|
- SpecialCasing.txt
|
||||||
|
- DerivedCoreProperties.txt
|
||||||
|
- PropList.txt
|
||||||
must be retrieved from
|
must be retrieved from
|
||||||
http://www.unicode.org/Public/<VERSION>/ucd/.
|
http://www.unicode.org/Public/<VERSION>/ucd/.
|
||||||
The last known good version is 13.0.0.
|
The last known good version is 13.0.0.
|
||||||
''')
|
''')
|
||||||
|
def check_file(path):
|
||||||
|
if not os.path.isfile(path) or not os.access(path, os.R_OK):
|
||||||
|
raise argparse.ArgumentTypeError('The %s file is missing or not readable!' % path)
|
||||||
|
return path
|
||||||
|
|
||||||
parser.add_argument('--unicode-data', metavar='FILE', action='store', required=True,
|
parser.add_argument('--unicode-data', metavar='FILE', action='store', required=True,
|
||||||
help='specify the unicode data file')
|
type=check_file, help='specify the unicode data file')
|
||||||
parser.add_argument('--special-casing', metavar='FILE', action='store', required=True,
|
parser.add_argument('--special-casing', metavar='FILE', action='store', required=True,
|
||||||
help='specify the special casing file')
|
type=check_file, help='specify the special casing file')
|
||||||
|
parser.add_argument('--prop-list', metavar='FILE', action='store', required=True,
|
||||||
|
type=check_file, help='specify the prop list file')
|
||||||
|
parser.add_argument('--derived-core-properties', metavar='FILE', action='store', required=True,
|
||||||
|
type=check_file, help='specify the DerivedCodeProperties file')
|
||||||
|
|
||||||
script_args = parser.parse_args()
|
script_args = parser.parse_args()
|
||||||
|
|
||||||
if not os.path.isfile(script_args.unicode_data) or not os.access(script_args.unicode_data, os.R_OK):
|
generate_ranges(script_args, UNICODE_PLANE_TYPE_BASIC)
|
||||||
parser.error('The %s file is missing or not readable!' % script_args.unicode_data)
|
generate_ranges(script_args, UNICODE_PLANE_TYPE_SUPPLEMENTARY)
|
||||||
|
generate_conversions(script_args, UNICODE_PLANE_TYPE_BASIC)
|
||||||
if not os.path.isfile(script_args.special_casing) or not os.access(script_args.special_casing, os.R_OK):
|
generate_conversions(script_args, UNICODE_PLANE_TYPE_SUPPLEMENTARY)
|
||||||
parser.error('The %s file is missing or not readable!' % script_args.special_casing)
|
|
||||||
|
|
||||||
generate_ranges(script_args)
|
|
||||||
generate_conversions(script_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user