Implement toLowerCase and toUpperCase built-in functions.
Related issue: #323 JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
This commit is contained in:
@@ -27,6 +27,7 @@
|
|||||||
#include "ecma-try-catch-macro.h"
|
#include "ecma-try-catch-macro.h"
|
||||||
#include "jrt.h"
|
#include "jrt.h"
|
||||||
#include "jrt-libc-includes.h"
|
#include "jrt-libc-includes.h"
|
||||||
|
#include "lit-char-helpers.h"
|
||||||
|
|
||||||
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN
|
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN
|
||||||
|
|
||||||
@@ -507,6 +508,164 @@ ecma_builtin_string_prototype_object_substring (ecma_value_t this_arg, /**< this
|
|||||||
return ret_value;
|
return ret_value;
|
||||||
} /* ecma_builtin_string_prototype_object_substring */
|
} /* ecma_builtin_string_prototype_object_substring */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to convert a string to upper or lower case.
|
||||||
|
*
|
||||||
|
* @return completion value
|
||||||
|
* Returned value must be freed with ecma_free_completion_value.
|
||||||
|
*/
|
||||||
|
static ecma_completion_value_t
|
||||||
|
ecma_builtin_string_prototype_object_conversion_helper (ecma_value_t this_arg, /**< this argument */
|
||||||
|
bool lower_case) /**< convert to lower (true)
|
||||||
|
* or upper (false) case */
|
||||||
|
{
|
||||||
|
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
|
||||||
|
|
||||||
|
/* 1. */
|
||||||
|
ECMA_TRY_CATCH (check_coercible_val,
|
||||||
|
ecma_op_check_object_coercible (this_arg),
|
||||||
|
ret_value);
|
||||||
|
|
||||||
|
/* 2. */
|
||||||
|
ECMA_TRY_CATCH (to_string_val,
|
||||||
|
ecma_op_to_string (this_arg),
|
||||||
|
ret_value);
|
||||||
|
|
||||||
|
/* 3. */
|
||||||
|
ecma_string_t *input_string_p = ecma_get_string_from_value (to_string_val);
|
||||||
|
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
|
||||||
|
|
||||||
|
MEM_DEFINE_LOCAL_ARRAY (input_start_p,
|
||||||
|
input_size,
|
||||||
|
lit_utf8_byte_t);
|
||||||
|
|
||||||
|
ecma_string_to_utf8_string (input_string_p,
|
||||||
|
input_start_p,
|
||||||
|
(ssize_t) (input_size));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The URI encoding has two major phases: first we compute
|
||||||
|
* the length of the lower case string, then we encode it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
lit_utf8_size_t output_length = 0;
|
||||||
|
lit_utf8_iterator_t input_iterator = lit_utf8_iterator_create (input_start_p, input_size);
|
||||||
|
|
||||||
|
while (!lit_utf8_iterator_is_eos (&input_iterator))
|
||||||
|
{
|
||||||
|
ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
|
||||||
|
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||||
|
lit_utf8_byte_t utf8_byte_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_POINT];
|
||||||
|
lit_utf8_size_t character_length;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to keep surrogate pairs. Surrogates are never converted,
|
||||||
|
* regardless they form a valid pair or not.
|
||||||
|
*/
|
||||||
|
if (lit_is_code_unit_high_surrogate (character))
|
||||||
|
{
|
||||||
|
ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);
|
||||||
|
|
||||||
|
if (lit_is_code_unit_low_surrogate (next_character))
|
||||||
|
{
|
||||||
|
lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
|
||||||
|
output_length += lit_code_point_to_utf8 (surrogate_code_point, utf8_byte_buffer);
|
||||||
|
lit_utf8_iterator_incr (&input_iterator);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lower_case)
|
||||||
|
{
|
||||||
|
character_length = lit_char_to_lower_case (character,
|
||||||
|
character_buffer,
|
||||||
|
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
character_length = lit_char_to_upper_case (character,
|
||||||
|
character_buffer,
|
||||||
|
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
|
||||||
|
for (lit_utf8_size_t i = 0; i < character_length; i++)
|
||||||
|
{
|
||||||
|
output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Second phase. */
|
||||||
|
|
||||||
|
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
|
||||||
|
output_length,
|
||||||
|
lit_utf8_byte_t);
|
||||||
|
|
||||||
|
lit_utf8_byte_t *output_char_p = output_start_p;
|
||||||
|
|
||||||
|
/* Encoding the output. */
|
||||||
|
lit_utf8_iterator_seek_bos (&input_iterator);
|
||||||
|
|
||||||
|
while (!lit_utf8_iterator_is_eos (&input_iterator))
|
||||||
|
{
|
||||||
|
ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
|
||||||
|
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||||
|
lit_utf8_size_t character_length;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to keep surrogate pairs. Surrogates are never converted,
|
||||||
|
* regardless they form a valid pair or not.
|
||||||
|
*/
|
||||||
|
if (lit_is_code_unit_high_surrogate (character))
|
||||||
|
{
|
||||||
|
ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);
|
||||||
|
|
||||||
|
if (lit_is_code_unit_low_surrogate (next_character))
|
||||||
|
{
|
||||||
|
lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
|
||||||
|
output_char_p += lit_code_point_to_utf8 (surrogate_code_point, output_char_p);
|
||||||
|
lit_utf8_iterator_incr (&input_iterator);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lower_case)
|
||||||
|
{
|
||||||
|
character_length = lit_char_to_lower_case (character,
|
||||||
|
character_buffer,
|
||||||
|
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
character_length = lit_char_to_upper_case (character,
|
||||||
|
character_buffer,
|
||||||
|
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
|
||||||
|
for (lit_utf8_size_t i = 0; i < character_length; i++)
|
||||||
|
{
|
||||||
|
output_char_p += lit_code_point_to_utf8 (character_buffer[i], output_char_p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JERRY_ASSERT (output_start_p + output_length == output_char_p);
|
||||||
|
|
||||||
|
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
|
||||||
|
|
||||||
|
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
|
||||||
|
|
||||||
|
MEM_FINALIZE_LOCAL_ARRAY (output_start_p);
|
||||||
|
MEM_FINALIZE_LOCAL_ARRAY (input_start_p);
|
||||||
|
|
||||||
|
ECMA_FINALIZE (to_string_val);
|
||||||
|
ECMA_FINALIZE (check_coercible_val);
|
||||||
|
|
||||||
|
return ret_value;
|
||||||
|
} /* ecma_builtin_string_prototype_object_conversion_helper */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The String.prototype object's 'toLowerCase' routine
|
* The String.prototype object's 'toLowerCase' routine
|
||||||
*
|
*
|
||||||
@@ -519,7 +678,7 @@ ecma_builtin_string_prototype_object_substring (ecma_value_t this_arg, /**< this
|
|||||||
static ecma_completion_value_t
|
static ecma_completion_value_t
|
||||||
ecma_builtin_string_prototype_object_to_lower_case (ecma_value_t this_arg) /**< this argument */
|
ecma_builtin_string_prototype_object_to_lower_case (ecma_value_t this_arg) /**< this argument */
|
||||||
{
|
{
|
||||||
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
|
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, true);
|
||||||
} /* ecma_builtin_string_prototype_object_to_lower_case */
|
} /* ecma_builtin_string_prototype_object_to_lower_case */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -534,7 +693,7 @@ ecma_builtin_string_prototype_object_to_lower_case (ecma_value_t this_arg) /**<
|
|||||||
static ecma_completion_value_t
|
static ecma_completion_value_t
|
||||||
ecma_builtin_string_prototype_object_to_locale_lower_case (ecma_value_t this_arg) /**< this argument */
|
ecma_builtin_string_prototype_object_to_locale_lower_case (ecma_value_t this_arg) /**< this argument */
|
||||||
{
|
{
|
||||||
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
|
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, true);
|
||||||
} /* ecma_builtin_string_prototype_object_to_locale_lower_case */
|
} /* ecma_builtin_string_prototype_object_to_locale_lower_case */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -549,7 +708,7 @@ ecma_builtin_string_prototype_object_to_locale_lower_case (ecma_value_t this_arg
|
|||||||
static ecma_completion_value_t
|
static ecma_completion_value_t
|
||||||
ecma_builtin_string_prototype_object_to_upper_case (ecma_value_t this_arg) /**< this argument */
|
ecma_builtin_string_prototype_object_to_upper_case (ecma_value_t this_arg) /**< this argument */
|
||||||
{
|
{
|
||||||
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
|
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, false);
|
||||||
} /* ecma_builtin_string_prototype_object_to_upper_case */
|
} /* ecma_builtin_string_prototype_object_to_upper_case */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -564,7 +723,7 @@ ecma_builtin_string_prototype_object_to_upper_case (ecma_value_t this_arg) /**<
|
|||||||
static ecma_completion_value_t
|
static ecma_completion_value_t
|
||||||
ecma_builtin_string_prototype_object_to_locale_upper_case (ecma_value_t this_arg) /**< this argument */
|
ecma_builtin_string_prototype_object_to_locale_upper_case (ecma_value_t this_arg) /**< this argument */
|
||||||
{
|
{
|
||||||
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
|
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, false);
|
||||||
} /* ecma_builtin_string_prototype_object_to_locale_upper_case */
|
} /* ecma_builtin_string_prototype_object_to_locale_upper_case */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -328,3 +328,79 @@ lit_char_is_word_char (ecma_char_t c) /**< code unit */
|
|||||||
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
||||||
|| c == LIT_CHAR_UNDERSCORE);
|
|| c == LIT_CHAR_UNDERSCORE);
|
||||||
} /* lit_char_is_word_char */
|
} /* lit_char_is_word_char */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the lowercase character sequence of an ecma character.
|
||||||
|
*
|
||||||
|
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
||||||
|
*
|
||||||
|
* @return the length of the lowercase character sequence
|
||||||
|
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
||||||
|
*/
|
||||||
|
lit_utf8_size_t
|
||||||
|
lit_char_to_lower_case (ecma_char_t character, /**< input character value */
|
||||||
|
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
|
||||||
|
size_t buffer_size) /**< buffer size */
|
||||||
|
{
|
||||||
|
TODO ("Needs a proper lower case implementation. See issue #323.");
|
||||||
|
|
||||||
|
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
|
||||||
|
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
|
||||||
|
{
|
||||||
|
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (character == 0x130)
|
||||||
|
{
|
||||||
|
output_buffer_p[0] = LIT_CHAR_LOWERCASE_I;
|
||||||
|
output_buffer_p[1] = 0x307;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
output_buffer_p[0] = character;
|
||||||
|
return 1;
|
||||||
|
} /* lit_char_to_lower_case */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the uppercase character sequence of an ecma character.
|
||||||
|
*
|
||||||
|
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
||||||
|
*
|
||||||
|
* @return the length of the uppercase character sequence
|
||||||
|
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
||||||
|
*/
|
||||||
|
lit_utf8_size_t
|
||||||
|
lit_char_to_upper_case (ecma_char_t character, /**< input character value */
|
||||||
|
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
|
||||||
|
size_t buffer_size) /**< buffer size */
|
||||||
|
{
|
||||||
|
TODO ("Needs a proper upper case implementation. See issue #323.");
|
||||||
|
|
||||||
|
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
|
||||||
|
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
|
||||||
|
{
|
||||||
|
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (character == 0xdf)
|
||||||
|
{
|
||||||
|
output_buffer_p[0] = LIT_CHAR_UPPERCASE_S;
|
||||||
|
output_buffer_p[1] = LIT_CHAR_UPPERCASE_S;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (character == 0x1fd7)
|
||||||
|
{
|
||||||
|
output_buffer_p[0] = 0x399;
|
||||||
|
output_buffer_p[1] = 0x308;
|
||||||
|
output_buffer_p[2] = 0x342;
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
output_buffer_p[0] = character;
|
||||||
|
return 1;
|
||||||
|
} /* lit_char_to_upper_case */
|
||||||
|
|||||||
@@ -220,4 +220,16 @@ extern uint32_t lit_char_hex_to_int (ecma_char_t);
|
|||||||
*/
|
*/
|
||||||
extern bool lit_char_is_word_char (ecma_char_t);
|
extern bool lit_char_is_word_char (ecma_char_t);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Utility functions for uppercasing / lowercasing
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
|
||||||
|
*/
|
||||||
|
#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
|
||||||
|
|
||||||
|
lit_utf8_size_t lit_char_to_lower_case (ecma_char_t, ecma_char_t *, size_t);
|
||||||
|
lit_utf8_size_t lit_char_to_upper_case (ecma_char_t, ecma_char_t *, size_t);
|
||||||
|
|
||||||
#endif /* LIT_CHAR_HELPERS_H */
|
#endif /* LIT_CHAR_HELPERS_H */
|
||||||
|
|||||||
@@ -753,7 +753,7 @@ lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
|
|||||||
buf[3] = LIT_UTF8_EXTRA_BYTE_MARKER | fourth_byte_bits;
|
buf[3] = LIT_UTF8_EXTRA_BYTE_MARKER | fourth_byte_bits;
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
} /* lit_code_unit_to_utf8 */
|
} /* lit_code_point_to_utf8 */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert surrogate pair to code point
|
* Convert surrogate pair to code point
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
// Copyright 2015 University of Szeged
|
||||||
|
// Copyright 2015 Samsung Electronics Co., Ltd.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Conversion
|
||||||
|
|
||||||
|
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toLowerCase()
|
||||||
|
== "0123456789abcdefghijklmnopqrstuvwxzyabcdefghijklmnopqrstuvwxyz");
|
||||||
|
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase()
|
||||||
|
== "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||||
|
|
||||||
|
assert ("\u0130".toLowerCase() == "i\u0307");
|
||||||
|
assert ("\xdf".toUpperCase() == "SS");
|
||||||
|
assert ("\u1fd7".toUpperCase() == "\u0399\u0308\u0342");
|
||||||
|
|
||||||
|
assert ("H\u0130-+".toLowerCase() == "hi\u0307-+");
|
||||||
|
assert ("\xdf\u1fd7\xdf".toUpperCase() == "SS\u0399\u0308\u0342SS");
|
||||||
|
assert ("\u0130\u0130\u0130".toLowerCase() == "i\u0307i\u0307i\u0307");
|
||||||
|
|
||||||
|
// Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
|
||||||
|
// we must not do their conversion in JavaScript. We must also ignore
|
||||||
|
// stray surrogates.
|
||||||
|
|
||||||
|
assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
|
||||||
|
assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
|
||||||
|
|
||||||
|
// Conversion of non-string objects.
|
||||||
|
|
||||||
|
assert (String.prototype.toUpperCase.call(true) == "TRUE");
|
||||||
|
assert (String.prototype.toLowerCase.call(-23) == "-23");
|
||||||
|
|
||||||
|
var object = { toString : function() { return "<sTr>"; } };
|
||||||
|
assert (String.prototype.toUpperCase.call(object) == "<STR>");
|
||||||
|
assert (String.prototype.toLowerCase.call(object) == "<str>");
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
String.prototype.toUpperCase.call(null);
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
catch (e)
|
||||||
|
{
|
||||||
|
assert (e instanceof TypeError);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user