Improve toLowerCase and toUpperCase functions. (#1575)
Language-sensitive mappings are not processed now. Fixes #323 JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
committed by
László Langó
parent
025a99ccbb
commit
1b5f839db9
@@ -17,6 +17,10 @@
|
||||
#include "lit/lit-unicode-ranges.inc.h"
|
||||
#include "lit-strings.h"
|
||||
|
||||
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
||||
#include "lit-unicode-conversions.inc.h"
|
||||
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
||||
|
||||
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
|
||||
|
||||
/**
|
||||
@@ -458,6 +462,184 @@ lit_char_is_word_char (ecma_char_t c) /**< code unit */
|
||||
|| c == LIT_CHAR_UNDERSCORE);
|
||||
} /* lit_char_is_word_char */
|
||||
|
||||
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
||||
|
||||
/**
|
||||
* Check if the specified character is in one of those tables which contain bidirectional conversions.
|
||||
*
|
||||
* @return the mapped character sequence of an ecma character, if it's in the table.
|
||||
* 0 - otherwise.
|
||||
*/
|
||||
static ecma_length_t
|
||||
search_in_bidirectional_conversion_tables (ecma_char_t character, /**< code unit */
|
||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
||||
bool is_lowercase) /**< is lowercase conversion */
|
||||
{
|
||||
/* 1, Check if the specified character is part of the jerry_character_case_ranges table. */
|
||||
int number_of_case_ranges = NUM_OF_ELEMENTS (jerry_character_case_ranges);
|
||||
int conv_counter = 0;
|
||||
|
||||
for (int i = 0; i < number_of_case_ranges; i++)
|
||||
{
|
||||
if (i % 2 == 0 && i > 0)
|
||||
{
|
||||
conv_counter++;
|
||||
}
|
||||
|
||||
int range_length = jerry_character_case_range_lengths[conv_counter];
|
||||
ecma_char_t start_point = jerry_character_case_ranges[i];
|
||||
|
||||
if (start_point > character || character >= start_point + range_length)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
int char_dist = character - start_point;
|
||||
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (jerry_character_case_ranges[i + 1] + char_dist) : character;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (jerry_character_case_ranges[i - 1] + char_dist);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* 2, Check if the specified character is part of the character_pair_ranges table. */
|
||||
int bottom = 0;
|
||||
int top = NUM_OF_ELEMENTS (jerry_character_pair_ranges) - 1;
|
||||
|
||||
while (bottom <= top)
|
||||
{
|
||||
int middle = (bottom + top) / 2;
|
||||
ecma_char_t current_sp = jerry_character_pair_ranges[middle];
|
||||
|
||||
if (current_sp <= character && character < current_sp + jerry_character_pair_range_lengths[middle])
|
||||
{
|
||||
int char_dist = character - current_sp;
|
||||
|
||||
if ((character - current_sp) % 2 == 0)
|
||||
{
|
||||
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (character > current_sp)
|
||||
{
|
||||
bottom = middle + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
top = middle - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* 3, Check if the specified character is part of the character_pairs table. */
|
||||
int number_of_character_pairs = NUM_OF_ELEMENTS (jerry_character_pairs);
|
||||
|
||||
for (int i = 0; i < number_of_character_pairs; i++)
|
||||
{
|
||||
if (character != jerry_character_pairs[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
output_buffer_p[0] = is_lowercase ? jerry_character_pairs[i + 1] : character;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_buffer_p[0] = is_lowercase ? character : jerry_character_pairs[i - 1];
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} /* search_in_bidirectional_conversion_tables */
|
||||
|
||||
/**
|
||||
* Check if the specified character is in the given conversion table.
|
||||
*
|
||||
* @return the mapped character sequence of an ecma character, if it's in the table.
|
||||
* 0 - otherwise.
|
||||
*/
|
||||
static ecma_length_t
|
||||
search_in_conversion_table (ecma_char_t character, /**< code unit */
|
||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
||||
const ecma_char_t *array, /**< array */
|
||||
const uint8_t *counters) /**< case_values counter */
|
||||
{
|
||||
int end_point = 0;
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
int start_point = end_point;
|
||||
int size_of_case_value = i + 1;
|
||||
end_point += counters[i] * (size_of_case_value + 1);
|
||||
|
||||
int bottom = start_point;
|
||||
int top = end_point - size_of_case_value;
|
||||
|
||||
while (bottom <= top)
|
||||
{
|
||||
int middle = (bottom + top) / 2;
|
||||
|
||||
middle -= ((middle - bottom) % (size_of_case_value + 1));
|
||||
|
||||
ecma_char_t current = array[middle];
|
||||
|
||||
if (current == character)
|
||||
{
|
||||
ecma_length_t char_sequence = 1;
|
||||
|
||||
switch (size_of_case_value)
|
||||
{
|
||||
case 3:
|
||||
{
|
||||
output_buffer_p[2] = array[middle + 3];
|
||||
char_sequence++;
|
||||
/* FALLTHRU */
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
output_buffer_p[1] = array[middle + 2];
|
||||
char_sequence++;
|
||||
/* FALLTHRU */
|
||||
}
|
||||
default:
|
||||
{
|
||||
output_buffer_p[0] = array[middle + 1];
|
||||
return char_sequence;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (character < current)
|
||||
{
|
||||
top = middle - (size_of_case_value + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
bottom = middle + (size_of_case_value + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
} /* search_in_conversion_table */
|
||||
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
||||
|
||||
/**
|
||||
* Returns the lowercase character sequence of an ecma character.
|
||||
*
|
||||
@@ -471,8 +653,6 @@ lit_char_to_lower_case (ecma_char_t character, /**< input character value */
|
||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
||||
ecma_length_t buffer_size) /**< buffer size */
|
||||
{
|
||||
/* TODO: Needs a proper lower case implementation. See issue #323. */
|
||||
|
||||
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
|
||||
@@ -481,6 +661,41 @@ lit_char_to_lower_case (ecma_char_t character, /**< input character value */
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
||||
|
||||
ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true);
|
||||
|
||||
if (lowercase_sequence != 0)
|
||||
{
|
||||
return lowercase_sequence;
|
||||
}
|
||||
|
||||
int num_of_lowercase_ranges = NUM_OF_ELEMENTS (jerry_lower_case_ranges);
|
||||
|
||||
for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
|
||||
{
|
||||
int range_length = jerry_lower_case_range_lengths[j] - 1;
|
||||
ecma_char_t start_point = jerry_lower_case_ranges[i];
|
||||
|
||||
if (start_point <= character && character <= start_point + range_length)
|
||||
{
|
||||
output_buffer_p[0] = (ecma_char_t) (jerry_lower_case_ranges[i + 1] + (character - start_point));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
lowercase_sequence = search_in_conversion_table (character,
|
||||
output_buffer_p,
|
||||
jerry_lower_case_conversions,
|
||||
jerry_lower_case_conversion_counters);
|
||||
|
||||
if (lowercase_sequence != 0)
|
||||
{
|
||||
return lowercase_sequence;
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
||||
|
||||
output_buffer_p[0] = character;
|
||||
return 1;
|
||||
} /* lit_char_to_lower_case */
|
||||
@@ -498,8 +713,6 @@ lit_char_to_upper_case (ecma_char_t character, /**< input character value */
|
||||
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
|
||||
ecma_length_t buffer_size) /**< buffer size */
|
||||
{
|
||||
/* TODO: Needs a proper upper case implementation. See issue #323. */
|
||||
|
||||
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
|
||||
@@ -508,6 +721,42 @@ lit_char_to_upper_case (ecma_char_t character, /**< input character value */
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
||||
|
||||
ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false);
|
||||
|
||||
if (uppercase_sequence != 0)
|
||||
{
|
||||
return uppercase_sequence;
|
||||
}
|
||||
|
||||
int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (jerry_upper_case_special_ranges);
|
||||
|
||||
for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
|
||||
{
|
||||
int range_length = jerry_upper_case_special_range_lengths[j];
|
||||
ecma_char_t start_point = jerry_upper_case_special_ranges[i];
|
||||
|
||||
if (start_point <= character && character <= start_point + range_length)
|
||||
{
|
||||
output_buffer_p[0] = (ecma_char_t) (jerry_upper_case_special_ranges[i + 1] + (character - start_point));
|
||||
output_buffer_p[1] = (ecma_char_t) (jerry_upper_case_special_ranges[i + 2]);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
uppercase_sequence = search_in_conversion_table (character,
|
||||
output_buffer_p,
|
||||
jerry_upper_case_conversions,
|
||||
jerry_upper_case_conversion_counters);
|
||||
|
||||
if (uppercase_sequence != 0)
|
||||
{
|
||||
return uppercase_sequence;
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
||||
|
||||
output_buffer_p[0] = character;
|
||||
return 1;
|
||||
} /* lit_char_to_upper_case */
|
||||
|
||||
Reference in New Issue
Block a user