diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.c b/jerry-core/ecma/builtin-objects/ecma-builtin-global.c index 7c60e487b..b834e97fc 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.c @@ -494,11 +494,49 @@ ecma_builtin_global_object_escape (lit_utf8_byte_t *input_start_p, /**< routine' return ecma_make_string_value (ecma_stringbuilder_finalize (&builder)); } /* ecma_builtin_global_object_escape */ +/** + * Utility method to resolve character sequences for the 'unescape' method. + * + * Expected formats: %uxxxx or %yy + * + * @return number of characters processed during the escape resolve + */ +static uint8_t +ecma_builtin_global_object_unescape_resolve_escape (const lit_utf8_byte_t *buffer_p, /**< character buffer */ + bool unicode_sequence, /**< true if unescaping unicode sequence */ + ecma_char_t *out_result_p) /**< [out] resolved character */ +{ + JERRY_ASSERT (buffer_p != NULL); + JERRY_ASSERT (out_result_p != NULL); + + ecma_char_t unescaped_chr = 0; + uint8_t sequence_length = unicode_sequence ? 5 : 2; + uint8_t start = unicode_sequence ? 1 : 0; + + for (uint8_t i = start; i < sequence_length; i++) + { + const lit_utf8_byte_t current_char = buffer_p[i]; + + if (!lit_char_is_hex_digit (current_char)) + { + /* This was not an escape sequence, skip processing */ + return 0; + } + + unescaped_chr = (ecma_char_t) ((unescaped_chr << 4) + (ecma_char_t) lit_char_hex_to_int (current_char)); + } + + *out_result_p = unescaped_chr; + + return sequence_length; +} /* ecma_builtin_global_object_unescape_resolve_escape */ + /** * The Global object's 'unescape' routine * * See also: * ECMA-262 v5, B.2.2 + * ECMA-262 v11, B.2.1.2 * * @return ecma value * Returned value must be freed with ecma_free_value. @@ -509,76 +547,40 @@ ecma_builtin_global_object_unescape (lit_utf8_byte_t *input_start_p, /**< routin lit_utf8_size_t input_size) /**< routine's first argument's * string buffer's size */ { - const lit_utf8_byte_t *input_curr_p = input_start_p; - const lit_utf8_byte_t *input_end_p = input_start_p + input_size; - /* 4. */ - /* The length of input string is always greater than output string - * so we re-use the input string buffer. - * The %xx is three byte long, and the maximum encoded value is 0xff, - * which maximum encoded length is two byte. Similar to this, the maximum - * encoded length of %uxxxx is four byte. */ - lit_utf8_byte_t *output_char_p = input_start_p; - - /* The state of parsing that tells us where we are in an escape pattern. - * 0 we are outside of pattern, - * 1 found '%', start of pattern, - * 2 found first hex digit of '%xy' pattern - * 3 found valid '%xy' pattern - * 4 found 'u', start of '%uwxyz' pattern - * 5-7 found hex digits of '%uwxyz' pattern - * 8 found valid '%uwxyz' pattern - */ - uint8_t status = 0; - ecma_char_t hex_digits = 0; - /* 5. */ - while (input_curr_p < input_end_p) + if (input_size == 0) { - /* 6. */ - ecma_char_t chr = lit_cesu8_read_next (&input_curr_p); - - /* 7-8. */ - if (status == 0 && chr == LIT_CHAR_PERCENT) - { - /* Found '%' char, start of escape sequence. */ - status = 1; - } - /* 9-10. */ - else if (status == 1 && chr == LIT_CHAR_LOWERCASE_U) - { - /* Found 'u' char after '%'. */ - status = 4; - } - else if (status > 0 && lit_char_is_hex_digit (chr)) - { - /* Found hexadecimal digit in escape sequence. */ - hex_digits = (ecma_char_t) (hex_digits * 16 + (ecma_char_t) lit_char_hex_to_int (chr)); - status++; - } - else - { - /* Previously found hexadecimal digit in escape sequence but it's not valid '%xy' pattern - * so essentially it was only a simple character. */ - status = 0; - } - - /* 11-17. Found valid '%uwxyz' or '%xy' escape. */ - if (status == 8 || status == 3) - { - output_char_p -= (status == 3) ? 2 : 5; - status = 0; - chr = hex_digits; - hex_digits = 0; - } - - /* Copying character. */ - lit_utf8_size_t lit_size = lit_code_unit_to_utf8 (chr, output_char_p); - output_char_p += lit_size; - JERRY_ASSERT (output_char_p <= input_curr_p); + return ecma_make_magic_string_value (LIT_MAGIC_STRING__EMPTY); } - lit_utf8_size_t output_length = (lit_utf8_size_t) (output_char_p - input_start_p); - ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (input_start_p, output_length); - return ecma_make_string_value (output_string_p); + const lit_utf8_byte_t *input_curr_p = input_start_p; + const lit_utf8_byte_t *input_end_p = input_start_p + input_size; + ecma_stringbuilder_t builder = ecma_stringbuilder_create (); + + while (input_curr_p < input_end_p) + { + ecma_char_t chr = lit_cesu8_read_next (&input_curr_p); + + // potential pattern + if (chr == LIT_CHAR_PERCENT) + { + const lit_utf8_size_t chars_leftover = (lit_utf8_size_t) (input_end_p - input_curr_p); + + // potential unicode sequence + if (chars_leftover >= 5 && input_curr_p[0] == LIT_CHAR_LOWERCASE_U) + { + input_curr_p += ecma_builtin_global_object_unescape_resolve_escape (input_curr_p, true, &chr); + } + // potential two hexa sequence + else if (chars_leftover >= 2) + { + input_curr_p += ecma_builtin_global_object_unescape_resolve_escape (input_curr_p, false, &chr); + } + } + + ecma_stringbuilder_append_char (&builder, chr); + } + + return ecma_make_string_value (ecma_stringbuilder_finalize (&builder)); } /* ecma_builtin_global_object_unescape */ #endif /* ENABLED (JERRY_BUILTIN_ANNEXB) */ diff --git a/tests/jerry/es.next/global-unescape.js b/tests/jerry/es.next/global-unescape.js new file mode 100644 index 000000000..94801798a --- /dev/null +++ b/tests/jerry/es.next/global-unescape.js @@ -0,0 +1,120 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright (C) 2016 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +assert(unescape('%U0000') === '%U0000'); +assert(unescape('%t0000') === '%t0000'); +assert(unescape('%v0000') ==='%v0000'); +assert(unescape('%%0000') === '%\x0000'); + +// tests for two hexa unescape +assert(unescape('%0%0000') === '%0\x0000'); +assert(unescape('%0%0100') === '%0\x0100'); + +assert(unescape('%0%2900') === '%0)00'); +assert(unescape('%0%2a00') === '%0*00'); +assert(unescape('%0%2A00') === '%0*00'); +assert(unescape('%0%2b00') === '%0+00'); +assert(unescape('%0%2B00') === '%0+00'); +assert(unescape('%0%2c00') === '%0,00'); +assert(unescape('%0%2C00') === '%0,00'); +assert(unescape('%0%2d00') === '%0-00'); +assert(unescape('%0%2D00') === '%0-00'); + +assert(unescape('%0%3900') === '%0900'); +assert(unescape('%0%3a00') === '%0:00'); +assert(unescape('%0%3A00') === '%0:00'); + +assert(unescape('%0%3f00') === '%0?00'); +assert(unescape('%0%3F00') === '%0?00'); +assert(unescape('%0%4000') === '%0@00'); + +assert(unescape('%0%5a00') === '%0Z00'); +assert(unescape('%0%5A00') === '%0Z00'); +assert(unescape('%0%5b00') === '%0[00'); +assert(unescape('%0%5B00') === '%0[00'); + +assert(unescape('%0%5e00') === '%0^00'); +assert(unescape('%0%5E00') === '%0^00'); +assert(unescape('%0%5f00') === '%0_00'); +assert(unescape('%0%5F00') === '%0_00'); +assert(unescape('%0%6000') === '%0`00'); +assert(unescape('%0%6100') === '%0a00'); + +assert(unescape('%0%7a00') === '%0z00'); +assert(unescape('%0%7A00') === '%0z00'); +assert(unescape('%0%7b00') === '%0{00'); +assert(unescape('%0%7B00') === '%0{00'); + +assert(unescape('%0%fe00') === '%0\xfe00'); +assert(unescape('%0%Fe00') === '%0\xfe00'); +assert(unescape('%0%fE00') === '%0\xfe00'); +assert(unescape('%0%FE00') === '%0\xfe00'); + +assert(unescape('%0%ff00') === '%0\xff00'); +assert(unescape('%0%Ff00') === '%0\xff00'); +assert(unescape('%0%fF00') === '%0\xff00'); +assert(unescape('%0%FF00') === '%0\xff00'); + +// tests for unicode unescape +assert(unescape('%0%u00290') === '%0)0'); +assert(unescape('%0%u002a0') === '%0*0'); +assert(unescape('%0%u002A0') === '%0*0'); +assert(unescape('%0%u002b0') === '%0+0'); +assert(unescape('%0%u002B0') === '%0+0'); +assert(unescape('%0%u002c0') === '%0,0'); +assert(unescape('%0%u002C0') === '%0,0'); +assert(unescape('%0%u002d0') === '%0-0'); +assert(unescape('%0%u002D0') === '%0-0'); + +assert(unescape('%0%u00390') === '%090'); +assert(unescape('%0%u003a0') === '%0:0'); +assert(unescape('%0%u003A0') === '%0:0'); + +assert(unescape('%0%u003f0') === '%0?0'); +assert(unescape('%0%u003F0') === '%0?0'); +assert(unescape('%0%u00400') === '%0@0'); + +assert(unescape('%0%u005a0') === '%0Z0'); +assert(unescape('%0%u005A0') === '%0Z0'); +assert(unescape('%0%u005b0') === '%0[0'); +assert(unescape('%0%u005B0') === '%0[0'); + +assert(unescape('%0%u005e0') === '%0^0'); +assert(unescape('%0%u005E0') === '%0^0'); +assert(unescape('%0%u005f0') === '%0_0'); +assert(unescape('%0%u005F0') === '%0_0'); +assert(unescape('%0%u00600') === '%0`0'); +assert(unescape('%0%u00610') === '%0a0'); + +assert(unescape('%0%u007a0') === '%0z0'); +assert(unescape('%0%u007A0') === '%0z0'); +assert(unescape('%0%u007b0') === '%0{0'); +assert(unescape('%0%u007B0') === '%0{0'); + +assert(unescape('%0%ufffe0') === '%0\ufffe0'); +assert(unescape('%0%uFffe0') === '%0\ufffe0'); +assert(unescape('%0%ufFfe0') === '%0\ufffe0'); +assert(unescape('%0%uffFe0') === '%0\ufffe0'); +assert(unescape('%0%ufffE0') === '%0\ufffe0'); +assert(unescape('%0%uFFFE0') === '%0\ufffe0'); + +assert(unescape('%0%uffff0') === '%0\uffff0'); +assert(unescape('%0%uFfff0') === '%0\uffff0'); +assert(unescape('%0%ufFff0') === '%0\uffff0'); +assert(unescape('%0%uffFf0') === '%0\uffff0'); +assert(unescape('%0%ufffF0') === '%0\uffff0'); +assert(unescape('%0%uFFFF0') === '%0\uffff0'); diff --git a/tests/test262-esnext-excludelist.xml b/tests/test262-esnext-excludelist.xml index 3739499b0..255a48906 100644 --- a/tests/test262-esnext-excludelist.xml +++ b/tests/test262-esnext-excludelist.xml @@ -1168,9 +1168,6 @@ - - -