Fixes for URI decoding.
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
This commit is contained in:
@@ -747,6 +747,13 @@ static uint8_t unescaped_uri_component_set[16] =
|
|||||||
*/
|
*/
|
||||||
#define URI_ENCODED_BYTE_SIZE (3)
|
#define URI_ENCODED_BYTE_SIZE (3)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These two types shows whether the byte is present in
|
||||||
|
* the original stream or decoded from a %xx sequence.
|
||||||
|
*/
|
||||||
|
#define URI_DECODE_ORIGINAL_BYTE 0
|
||||||
|
#define URI_DECODE_DECODED_BYTE 1
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper function to decode URI.
|
* Helper function to decode URI.
|
||||||
*
|
*
|
||||||
@@ -835,23 +842,27 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
if (ecma_is_completion_value_empty (ret_value))
|
if (ecma_is_completion_value_empty (ret_value))
|
||||||
{
|
{
|
||||||
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
|
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
|
||||||
output_size,
|
output_size * 2,
|
||||||
lit_utf8_byte_t);
|
lit_utf8_byte_t);
|
||||||
|
|
||||||
input_char_p = input_start_p;
|
input_char_p = input_start_p;
|
||||||
lit_utf8_byte_t *output_char_p = output_start_p;
|
lit_utf8_byte_t *output_char_p = output_start_p;
|
||||||
|
lit_utf8_byte_t *output_type_p = output_start_p + output_size;
|
||||||
|
|
||||||
while (input_char_p < input_end_p)
|
while (input_char_p < input_end_p)
|
||||||
{
|
{
|
||||||
/* Input decode. */
|
/* Input decode. */
|
||||||
if (*input_char_p != '%')
|
if (*input_char_p != '%')
|
||||||
{
|
{
|
||||||
|
*output_type_p++ = URI_DECODE_ORIGINAL_BYTE;
|
||||||
*output_char_p = *input_char_p;
|
*output_char_p = *input_char_p;
|
||||||
output_char_p++;
|
output_char_p++;
|
||||||
input_char_p++;
|
input_char_p++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*output_type_p++ = URI_DECODE_DECODED_BYTE;
|
||||||
|
|
||||||
lit_code_point_t decoded_byte;
|
lit_code_point_t decoded_byte;
|
||||||
|
|
||||||
lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte);
|
lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte);
|
||||||
@@ -886,16 +897,38 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
|
|||||||
if (valid_utf8)
|
if (valid_utf8)
|
||||||
{
|
{
|
||||||
lit_utf8_iterator_t characters = lit_utf8_iterator_create (output_start_p, output_size);
|
lit_utf8_iterator_t characters = lit_utf8_iterator_create (output_start_p, output_size);
|
||||||
|
output_type_p = output_start_p + output_size;
|
||||||
|
|
||||||
while (!lit_utf8_iterator_is_eos (&characters))
|
while (!lit_utf8_iterator_is_eos (&characters))
|
||||||
{
|
{
|
||||||
|
bool original_byte = output_type_p[characters.buf_pos.offset] == URI_DECODE_ORIGINAL_BYTE;
|
||||||
|
|
||||||
ecma_char_t character = lit_utf8_iterator_read_next (&characters);
|
ecma_char_t character = lit_utf8_iterator_read_next (&characters);
|
||||||
|
|
||||||
/* Surrogate fragments are allowed in JS, but not accepted by URI decoding. */
|
/* Surrogate fragments are allowed in JS, but not accepted by URI decoding. */
|
||||||
if (lit_is_code_unit_low_surrogate (character)
|
if (!original_byte)
|
||||||
|| lit_is_code_unit_high_surrogate (character))
|
|
||||||
{
|
{
|
||||||
valid_utf8 = false;
|
if (lit_is_code_unit_high_surrogate (character))
|
||||||
break;
|
{
|
||||||
|
/* Note: stray high/low surrogate pairs are not allowed in the stream. */
|
||||||
|
if (lit_utf8_iterator_is_eos (&characters))
|
||||||
|
{
|
||||||
|
valid_utf8 = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output_type_p[characters.buf_pos.offset] == URI_DECODE_ORIGINAL_BYTE
|
||||||
|
|| !lit_is_code_unit_low_surrogate (lit_utf8_iterator_read_next (&characters)))
|
||||||
|
{
|
||||||
|
valid_utf8 = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (lit_is_code_unit_low_surrogate (character))
|
||||||
|
{
|
||||||
|
valid_utf8 = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -120,3 +120,12 @@ assert (decodeURI ({ x:1 }) === "[object Object]");
|
|||||||
assert (encodeURI (void 0) === "undefined");
|
assert (encodeURI (void 0) === "undefined");
|
||||||
assert (encodeURI (216.000e1) === "2160");
|
assert (encodeURI (216.000e1) === "2160");
|
||||||
|
|
||||||
|
// Combining surrogate fragments
|
||||||
|
|
||||||
|
assert (decodeURI("\ud800\udc00 \ud800 \udc00") === "\ud800\udc00 \ud800 \udc00");
|
||||||
|
assert (decodeURI("%f0%90%80%80") === "\ud800\udc00");
|
||||||
|
assert (decodeURI("\ud800%f0%90%80%80\ud800") === "\ud800\ud800\udc00\ud800");
|
||||||
|
assert (decodeURI("\udc00%f0%90%80%80\udc00") === "\udc00\ud800\udc00\udc00");
|
||||||
|
|
||||||
|
checkDecodeURIParseError ("\ud800%ed%b0%80");
|
||||||
|
checkDecodeURIParseError ("%ed%a0%80\udc00");
|
||||||
|
|||||||
Reference in New Issue
Block a user