Optimize encode/decode URI for valid UTF-8 input.
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
This commit is contained in:
@@ -25,6 +25,7 @@
|
|||||||
#include "ecma-try-catch-macro.h"
|
#include "ecma-try-catch-macro.h"
|
||||||
#include "jrt.h"
|
#include "jrt.h"
|
||||||
#include "lit-magic-strings.h"
|
#include "lit-magic-strings.h"
|
||||||
|
#include "lit-strings.h"
|
||||||
#include "vm.h"
|
#include "vm.h"
|
||||||
#include "jrt-libc-includes.h"
|
#include "jrt-libc-includes.h"
|
||||||
|
|
||||||
@@ -511,7 +512,12 @@ static uint8_t unescaped_uri_component_set[16] =
|
|||||||
0xfe, 0xff, 0xff, 0x87, 0xfe, 0xff, 0xff, 0x47
|
0xfe, 0xff, 0xff, 0x87, 0xfe, 0xff, 0xff, 0x47
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ECMA_BUILTIN_HEX_TO_BYTE_ERROR 0x100
|
/*
|
||||||
|
* Format is a percent sign followed by two hex digits.
|
||||||
|
*/
|
||||||
|
#define URI_ENCODED_BYTE_SIZE (3)
|
||||||
|
|
||||||
|
#define ECMA_BUILTIN_HEX_TO_BYTE_ERROR (0x100)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper function to decode a hexadecimal byte from a string.
|
* Helper function to decode a hexadecimal byte from a string.
|
||||||
@@ -598,7 +604,11 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
|
|
||||||
while (input_char_p < input_end_p)
|
while (input_char_p < input_end_p)
|
||||||
{
|
{
|
||||||
/* Input validation. */
|
/*
|
||||||
|
* We expect that the input is a valid UTF-8 sequence,
|
||||||
|
* so characters >= 0x80 can be let through.
|
||||||
|
*/
|
||||||
|
|
||||||
if (*input_char_p != '%')
|
if (*input_char_p != '%')
|
||||||
{
|
{
|
||||||
output_size++;
|
output_size++;
|
||||||
@@ -613,9 +623,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
input_char_p += 3;
|
input_char_p += URI_ENCODED_BYTE_SIZE;
|
||||||
|
|
||||||
if (decoded_byte <= 0x7f)
|
if (decoded_byte <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We don't decode those bytes, which are part of reserved_uri_bitset
|
* We don't decode those bytes, which are part of reserved_uri_bitset
|
||||||
@@ -624,81 +634,16 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
if (ecma_builtin_global_object_character_is_in (decoded_byte, reserved_uri_bitset)
|
if (ecma_builtin_global_object_character_is_in (decoded_byte, reserved_uri_bitset)
|
||||||
&& !ecma_builtin_global_object_character_is_in (decoded_byte, unescaped_uri_component_set))
|
&& !ecma_builtin_global_object_character_is_in (decoded_byte, unescaped_uri_component_set))
|
||||||
{
|
{
|
||||||
output_size += 3;
|
output_size += URI_ENCODED_BYTE_SIZE;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
output_size++;
|
output_size++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (decoded_byte < 0xc0 || decoded_byte >= 0xf8)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Invalid UTF-8 starting bytes:
|
|
||||||
* 10xx xxxx - UTF continuation byte
|
|
||||||
* 1111 1xxx - maximum length is 4 bytes
|
|
||||||
*/
|
|
||||||
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint32_t count;
|
output_size++;
|
||||||
uint32_t min;
|
|
||||||
uint32_t character;
|
|
||||||
|
|
||||||
if (decoded_byte < 0xe0)
|
|
||||||
{
|
|
||||||
count = 1;
|
|
||||||
min = 0x80;
|
|
||||||
character = decoded_byte & 0x1f;
|
|
||||||
}
|
|
||||||
else if (decoded_byte < 0xf0)
|
|
||||||
{
|
|
||||||
count = 2;
|
|
||||||
min = 0x800;
|
|
||||||
character = decoded_byte & 0x0f;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
count = 3;
|
|
||||||
min = 0x1000;
|
|
||||||
character = decoded_byte & 0x07;
|
|
||||||
}
|
|
||||||
|
|
||||||
output_size += (count + 1);
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
decoded_byte = ecma_builtin_global_object_hex_to_byte (input_char_p);
|
|
||||||
if (decoded_byte == ECMA_BUILTIN_HEX_TO_BYTE_ERROR
|
|
||||||
|| (decoded_byte & 0xc0) != 0x80)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
character = (character << 6) + (decoded_byte & 0x3f);
|
|
||||||
input_char_p += 3;
|
|
||||||
}
|
|
||||||
while (--count > 0);
|
|
||||||
|
|
||||||
if (count != 0
|
|
||||||
/*
|
|
||||||
* Explanation of the character < min check: according to
|
|
||||||
* the UTF standard, each character must be encoded
|
|
||||||
* with the minimum amount of bytes. We need to reject
|
|
||||||
* those characters, which does not satisfy this condition.
|
|
||||||
*/
|
|
||||||
|| character < min
|
|
||||||
/*
|
|
||||||
* Not allowed character ranges.
|
|
||||||
*/
|
|
||||||
|| character > 0x10ffff
|
|
||||||
|| (character >= 0xd800 && character <= 0xdfff))
|
|
||||||
{
|
|
||||||
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -723,9 +668,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t decoded_byte = ecma_builtin_global_object_hex_to_byte (input_char_p);
|
uint32_t decoded_byte = ecma_builtin_global_object_hex_to_byte (input_char_p);
|
||||||
input_char_p += 3;
|
input_char_p += URI_ENCODED_BYTE_SIZE;
|
||||||
|
|
||||||
if (decoded_byte <= 0x7f)
|
if (decoded_byte <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||||
{
|
{
|
||||||
if (ecma_builtin_global_object_character_is_in (decoded_byte, reserved_uri_bitset)
|
if (ecma_builtin_global_object_character_is_in (decoded_byte, reserved_uri_bitset)
|
||||||
&& !ecma_builtin_global_object_character_is_in (decoded_byte, unescaped_uri_component_set))
|
&& !ecma_builtin_global_object_character_is_in (decoded_byte, unescaped_uri_component_set))
|
||||||
@@ -742,47 +687,40 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint32_t count;
|
*output_char_p = (lit_utf8_byte_t) decoded_byte;
|
||||||
uint32_t character;
|
output_char_p++;
|
||||||
|
|
||||||
/* The validator already checked this before. */
|
|
||||||
JERRY_ASSERT (decoded_byte >= 0xc0 && decoded_byte < 0xf8);
|
|
||||||
|
|
||||||
if (decoded_byte < 0xe0)
|
|
||||||
{
|
|
||||||
count = 1;
|
|
||||||
character = decoded_byte & 0x1f;
|
|
||||||
}
|
|
||||||
else if (decoded_byte < 0xf0)
|
|
||||||
{
|
|
||||||
count = 2;
|
|
||||||
character = decoded_byte & 0x0f;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
count = 3;
|
|
||||||
character = decoded_byte & 0x07;
|
|
||||||
}
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
decoded_byte = ecma_builtin_global_object_hex_to_byte (input_char_p);
|
|
||||||
JERRY_ASSERT (decoded_byte != ECMA_BUILTIN_HEX_TO_BYTE_ERROR
|
|
||||||
&& (decoded_byte & 0xc0) == 0x80);
|
|
||||||
character = (character << 6) + (decoded_byte & 0x3f);
|
|
||||||
input_char_p += 3;
|
|
||||||
}
|
|
||||||
while (--count > 0);
|
|
||||||
|
|
||||||
output_char_p += lit_code_point_to_utf8 (character, output_char_p);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
JERRY_ASSERT (output_start_p + output_size == output_char_p);
|
JERRY_ASSERT (output_start_p + output_size == output_char_p);
|
||||||
|
|
||||||
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_size);
|
bool valid_utf8 = lit_is_utf8_string_valid (output_start_p, output_size);
|
||||||
|
|
||||||
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
|
if (valid_utf8)
|
||||||
|
{
|
||||||
|
lit_utf8_iterator_t characters = lit_utf8_iterator_create (output_start_p, output_size);
|
||||||
|
while (!lit_utf8_iterator_is_eos (&characters))
|
||||||
|
{
|
||||||
|
ecma_char_t character = lit_utf8_iterator_read_next (&characters);
|
||||||
|
|
||||||
|
/* Surrogate fragments are allowed in JS, but not accepted by URI decoding. */
|
||||||
|
if (character >= LIT_UTF16_HIGH_SURROGATE_MIN && character <= LIT_UTF16_LOW_SURROGATE_MAX)
|
||||||
|
{
|
||||||
|
valid_utf8 = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (valid_utf8)
|
||||||
|
{
|
||||||
|
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_size);
|
||||||
|
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
|
||||||
|
}
|
||||||
|
|
||||||
MEM_FINALIZE_LOCAL_ARRAY (output_start_p);
|
MEM_FINALIZE_LOCAL_ARRAY (output_start_p);
|
||||||
}
|
}
|
||||||
@@ -864,11 +802,9 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen
|
|||||||
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
|
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
|
||||||
|
|
||||||
MEM_DEFINE_LOCAL_ARRAY (input_start_p,
|
MEM_DEFINE_LOCAL_ARRAY (input_start_p,
|
||||||
input_size + 1,
|
input_size,
|
||||||
lit_utf8_byte_t);
|
lit_utf8_byte_t);
|
||||||
|
|
||||||
input_start_p[input_size] = LIT_BYTE_NULL;
|
|
||||||
|
|
||||||
ecma_string_to_utf8_string (input_string_p,
|
ecma_string_to_utf8_string (input_string_p,
|
||||||
input_start_p,
|
input_start_p,
|
||||||
(ssize_t) (input_size));
|
(ssize_t) (input_size));
|
||||||
@@ -878,49 +814,51 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen
|
|||||||
* and compute the length of the output, then we encode the input.
|
* and compute the length of the output, then we encode the input.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_start_p, input_size);
|
lit_utf8_byte_t *input_char_p = input_start_p;
|
||||||
lit_utf8_size_t output_length = 1;
|
lit_utf8_byte_t *input_end_p = input_start_p + input_size;
|
||||||
while (!lit_utf8_iterator_is_eos (&iter))
|
lit_utf8_size_t output_length = 0;
|
||||||
{
|
|
||||||
/* Input validation. */
|
|
||||||
lit_code_point_t character = lit_utf8_iterator_read_next (&iter);
|
|
||||||
|
|
||||||
if (character <= 0x7f)
|
while (input_char_p < input_end_p)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We expect that the input is a valid UTF-8 sequence,
|
||||||
|
* so we only need to reject stray surrogate pairs.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Input validation. */
|
||||||
|
if (*input_char_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||||
{
|
{
|
||||||
if (ecma_builtin_global_object_character_is_in (character, unescaped_uri_bitset))
|
if (ecma_builtin_global_object_character_is_in (*input_char_p, unescaped_uri_bitset))
|
||||||
{
|
{
|
||||||
output_length++;
|
output_length++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
output_length += 3;
|
output_length += URI_ENCODED_BYTE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (character <= 0x7ff)
|
else if (*input_char_p == (LIT_UTF8_3_BYTE_MARKER + (LIT_UTF16_HIGH_SURROGATE_MARKER >> 12)))
|
||||||
{
|
{
|
||||||
output_length += 6;
|
/* The next character is in the [0xd000, 0xdfff] range. */
|
||||||
}
|
output_length += URI_ENCODED_BYTE_SIZE;
|
||||||
else if (character <= 0xffff)
|
input_char_p++;
|
||||||
{
|
JERRY_ASSERT (input_char_p < input_end_p);
|
||||||
if (character >= 0xd800 && character <= 0xdfff)
|
JERRY_ASSERT ((*input_char_p & LIT_UTF8_EXTRA_BYTE_MASK) == LIT_UTF8_EXTRA_BYTE_MARKER);
|
||||||
|
|
||||||
|
/* If this condition is true, the next character is >= LIT_UTF16_HIGH_SURROGATE_MIN. */
|
||||||
|
if (*input_char_p & 0x20)
|
||||||
{
|
{
|
||||||
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
|
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
output_length += URI_ENCODED_BYTE_SIZE;
|
||||||
{
|
|
||||||
output_length += 9;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (character <= 0x10ffff)
|
|
||||||
{
|
|
||||||
output_length += 12;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_URI));
|
output_length += URI_ENCODED_BYTE_SIZE;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input_char_p++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ecma_is_completion_value_empty (ret_value))
|
if (ecma_is_completion_value_empty (ret_value))
|
||||||
@@ -929,58 +867,37 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen
|
|||||||
output_length,
|
output_length,
|
||||||
lit_utf8_byte_t);
|
lit_utf8_byte_t);
|
||||||
|
|
||||||
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_start_p, input_size);
|
|
||||||
lit_utf8_byte_t *output_char_p = output_start_p;
|
lit_utf8_byte_t *output_char_p = output_start_p;
|
||||||
while (!lit_utf8_iterator_is_eos (&iter))
|
input_char_p = input_start_p;
|
||||||
|
|
||||||
|
while (input_char_p < input_end_p)
|
||||||
{
|
{
|
||||||
/* Input decode. */
|
/* Input decode. */
|
||||||
lit_code_point_t character = lit_utf8_iterator_read_next (&iter);
|
|
||||||
|
|
||||||
if (character <= 0x7f)
|
if (*input_char_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||||
{
|
{
|
||||||
if (ecma_builtin_global_object_character_is_in (character, unescaped_uri_bitset))
|
if (ecma_builtin_global_object_character_is_in (*input_char_p, unescaped_uri_bitset))
|
||||||
{
|
{
|
||||||
*output_char_p++ = (lit_utf8_byte_t) character;
|
*output_char_p++ = *input_char_p;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, character);
|
ecma_builtin_global_object_byte_to_hex (output_char_p, *input_char_p);
|
||||||
output_char_p += 3;
|
output_char_p += URI_ENCODED_BYTE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (character <= 0x7ff)
|
|
||||||
{
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0xc0 | (character >> 6));
|
|
||||||
output_char_p += 3;
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0x80 | (character & 0x3f));
|
|
||||||
output_char_p += 3;
|
|
||||||
}
|
|
||||||
else if (character <= 0xffff)
|
|
||||||
{
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0xe0 | (character >> 12));
|
|
||||||
output_char_p += 3;
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0x80 | ((character >> 6) & 0x3f));
|
|
||||||
output_char_p += 3;
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0x80 | (character & 0x3f));
|
|
||||||
output_char_p += 3;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0xf0 | (character >> 18));
|
ecma_builtin_global_object_byte_to_hex (output_char_p, *input_char_p);
|
||||||
output_char_p += 3;
|
output_char_p += URI_ENCODED_BYTE_SIZE;
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0x80 | ((character >> 12) & 0x3f));
|
|
||||||
output_char_p += 3;
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0x80 | ((character >> 6) & 0x3f));
|
|
||||||
output_char_p += 3;
|
|
||||||
ecma_builtin_global_object_byte_to_hex (output_char_p, 0x80 | (character & 0x3f));
|
|
||||||
output_char_p += 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input_char_p++;
|
||||||
}
|
}
|
||||||
|
|
||||||
*output_char_p = '\0';
|
JERRY_ASSERT (output_start_p + output_length == output_char_p);
|
||||||
JERRY_ASSERT (output_start_p + output_length == output_char_p + 1);
|
|
||||||
|
|
||||||
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length - 1);
|
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
|
||||||
|
|
||||||
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
|
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,7 @@
|
|||||||
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
|
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
|
||||||
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
|
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
|
||||||
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
|
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
|
||||||
|
#define LIT_UTF8_5_BYTE_MARKER (0xF8)
|
||||||
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
|
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
|
||||||
|
|
||||||
#define LIT_UTF8_1_BYTE_MASK (0x80)
|
#define LIT_UTF8_1_BYTE_MASK (0x80)
|
||||||
@@ -83,6 +84,11 @@
|
|||||||
*/
|
*/
|
||||||
#define LIT_ITERATOR_OFFSET_MASK ((1ull << LIT_ITERATOR_OFFSET_WIDTH) - 1)
|
#define LIT_ITERATOR_OFFSET_MASK ((1ull << LIT_ITERATOR_OFFSET_WIDTH) - 1)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Byte values >= LIT_UTF8_FIRST_BYTE_MAX are not allowed in internal strings
|
||||||
|
*/
|
||||||
|
#define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents position of the iterator
|
* Represents position of the iterator
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -15,8 +15,18 @@
|
|||||||
|
|
||||||
// URI encoding
|
// URI encoding
|
||||||
|
|
||||||
assert (encodeURI ("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") ===
|
function checkEncodeURIParseError (str)
|
||||||
"%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F");
|
{
|
||||||
|
try {
|
||||||
|
encodeURI (str);
|
||||||
|
assert (false);
|
||||||
|
} catch(e) {
|
||||||
|
assert(e instanceof URIError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert (encodeURI ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") ===
|
||||||
|
"%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F");
|
||||||
assert (encodeURI ("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f") ===
|
assert (encodeURI ("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f") ===
|
||||||
"%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F");
|
"%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F");
|
||||||
assert (encodeURI (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN") ===
|
assert (encodeURI (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN") ===
|
||||||
@@ -24,8 +34,8 @@ assert (encodeURI (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN") ===
|
|||||||
assert (encodeURI ("OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}\x7F") ===
|
assert (encodeURI ("OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}\x7F") ===
|
||||||
"OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7F");
|
"OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7F");
|
||||||
|
|
||||||
assert (encodeURIComponent ("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") ===
|
assert (encodeURIComponent ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") ===
|
||||||
"%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F");
|
"%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F");
|
||||||
assert (encodeURIComponent ("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f") ===
|
assert (encodeURIComponent ("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f") ===
|
||||||
"%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F");
|
"%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F");
|
||||||
assert (encodeURIComponent (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN") ===
|
assert (encodeURIComponent (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN") ===
|
||||||
@@ -33,9 +43,12 @@ assert (encodeURIComponent (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN")
|
|||||||
assert (encodeURIComponent ("OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}\x7F") ===
|
assert (encodeURIComponent ("OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}\x7F") ===
|
||||||
"OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7F");
|
"OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7F");
|
||||||
|
|
||||||
// TODO: we need tests for characters greater than 0xff and equal to 0x0
|
|
||||||
|
|
||||||
assert (encodeURI ("\xe9") == "%C3%A9");
|
assert (encodeURI ("\xe9") == "%C3%A9");
|
||||||
|
assert (encodeURI ("\ud7ff") == "%ED%9F%BF");
|
||||||
|
assert (encodeURI ("\ue000") == "%EE%80%80");
|
||||||
|
|
||||||
|
checkEncodeURIParseError ("\ud800");
|
||||||
|
checkEncodeURIParseError ("\udfff");
|
||||||
|
|
||||||
// URI decoding
|
// URI decoding
|
||||||
|
|
||||||
@@ -49,8 +62,8 @@ function checkDecodeURIParseError (str)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert (decodeURI ("%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F") ===
|
assert (decodeURI ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F") ===
|
||||||
"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f");
|
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f");
|
||||||
assert (decodeURI ("%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F") ===
|
assert (decodeURI ("%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F") ===
|
||||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f");
|
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f");
|
||||||
assert (decodeURI ("%20%21%22%23%24%25%26%27%28%29%2a%2b%2c%2d%2e%2f") ===
|
assert (decodeURI ("%20%21%22%23%24%25%26%27%28%29%2a%2b%2c%2d%2e%2f") ===
|
||||||
@@ -66,8 +79,8 @@ assert (decodeURI ("%60%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f") ===
|
|||||||
assert (decodeURI ("%70%71%72%73%74%75%76%77%78%79%7a%7b%7c%7d%7e") ===
|
assert (decodeURI ("%70%71%72%73%74%75%76%77%78%79%7a%7b%7c%7d%7e") ===
|
||||||
"pqrstuvwxyz{|}~");
|
"pqrstuvwxyz{|}~");
|
||||||
|
|
||||||
assert (decodeURIComponent ("%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F") ===
|
assert (decodeURIComponent ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F") ===
|
||||||
"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f");
|
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f");
|
||||||
assert (decodeURIComponent ("%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F") ===
|
assert (decodeURIComponent ("%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F") ===
|
||||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f");
|
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f");
|
||||||
assert (decodeURIComponent ("%20%21%22%23%24%25%26%27%28%29%2a%2b%2c%2d%2e%2f") ===
|
assert (decodeURIComponent ("%20%21%22%23%24%25%26%27%28%29%2a%2b%2c%2d%2e%2f") ===
|
||||||
@@ -83,9 +96,10 @@ assert (decodeURIComponent ("%60%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f")
|
|||||||
assert (decodeURIComponent ("%70%71%72%73%74%75%76%77%78%79%7a%7b%7c%7d%7e") ===
|
assert (decodeURIComponent ("%70%71%72%73%74%75%76%77%78%79%7a%7b%7c%7d%7e") ===
|
||||||
"pqrstuvwxyz{|}~");
|
"pqrstuvwxyz{|}~");
|
||||||
|
|
||||||
|
|
||||||
assert (decodeURI ("%6A%6B%6C%6D%6E%6F") === "jklmno");
|
assert (decodeURI ("%6A%6B%6C%6D%6E%6F") === "jklmno");
|
||||||
assert (decodeURI ("%C3%A9") === "\xe9");
|
assert (decodeURI ("%C3%A9") === "\xe9");
|
||||||
|
assert (decodeURI ("%e2%b1%a5") === "\u2c65");
|
||||||
|
/* assert (decodeURI ("%f0%90%90%a8") === "\ud801\udc28"); */
|
||||||
|
|
||||||
checkDecodeURIParseError ("13%");
|
checkDecodeURIParseError ("13%");
|
||||||
checkDecodeURIParseError ("%0g");
|
checkDecodeURIParseError ("%0g");
|
||||||
@@ -106,6 +120,3 @@ assert (decodeURI ({ x:1 }) === "[object Object]");
|
|||||||
assert (encodeURI (void 0) === "undefined");
|
assert (encodeURI (void 0) === "undefined");
|
||||||
assert (encodeURI (216.000e1) === "2160");
|
assert (encodeURI (216.000e1) === "2160");
|
||||||
|
|
||||||
// TODO: we need tests for characters greater than 0xff and equal to 0x0
|
|
||||||
|
|
||||||
assert (decodeURI ("%f0%9f%9f%8f").length === 2);
|
|
||||||
|
|||||||
Reference in New Issue
Block a user