Fixes for URI decoding.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
This commit is contained in:
Zoltan Herczeg
2015-08-03 06:27:26 -07:00
parent caeae0f621
commit 2630048ecc
2 changed files with 47 additions and 5 deletions
@@ -747,6 +747,13 @@ static uint8_t unescaped_uri_component_set[16] =
*/
#define URI_ENCODED_BYTE_SIZE (3)
/*
* These two types shows whether the byte is present in
* the original stream or decoded from a %xx sequence.
*/
#define URI_DECODE_ORIGINAL_BYTE 0
#define URI_DECODE_DECODED_BYTE 1
/**
* Helper function to decode URI.
*
@@ -835,23 +842,27 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
if (ecma_is_completion_value_empty (ret_value))
{
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
output_size,
output_size * 2,
lit_utf8_byte_t);
input_char_p = input_start_p;
lit_utf8_byte_t *output_char_p = output_start_p;
lit_utf8_byte_t *output_type_p = output_start_p + output_size;
while (input_char_p < input_end_p)
{
/* Input decode. */
if (*input_char_p != '%')
{
*output_type_p++ = URI_DECODE_ORIGINAL_BYTE;
*output_char_p = *input_char_p;
output_char_p++;
input_char_p++;
continue;
}
*output_type_p++ = URI_DECODE_DECODED_BYTE;
lit_code_point_t decoded_byte;
lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte);
@@ -886,16 +897,38 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
if (valid_utf8)
{
lit_utf8_iterator_t characters = lit_utf8_iterator_create (output_start_p, output_size);
output_type_p = output_start_p + output_size;
while (!lit_utf8_iterator_is_eos (&characters))
{
bool original_byte = output_type_p[characters.buf_pos.offset] == URI_DECODE_ORIGINAL_BYTE;
ecma_char_t character = lit_utf8_iterator_read_next (&characters);
/* Surrogate fragments are allowed in JS, but not accepted by URI decoding. */
if (lit_is_code_unit_low_surrogate (character)
|| lit_is_code_unit_high_surrogate (character))
if (!original_byte)
{
valid_utf8 = false;
break;
if (lit_is_code_unit_high_surrogate (character))
{
/* Note: stray high/low surrogate pairs are not allowed in the stream. */
if (lit_utf8_iterator_is_eos (&characters))
{
valid_utf8 = false;
break;
}
if (output_type_p[characters.buf_pos.offset] == URI_DECODE_ORIGINAL_BYTE
|| !lit_is_code_unit_low_surrogate (lit_utf8_iterator_read_next (&characters)))
{
valid_utf8 = false;
break;
}
}
else if (lit_is_code_unit_low_surrogate (character))
{
valid_utf8 = false;
break;
}
}
}
}