Implement UnicodeEscape abstract method (#3959)
Also refactored ecma_builtin_json_quote to use the method above JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
@@ -849,7 +849,29 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r
|
||||
|
||||
while (str_p < str_end_p)
|
||||
{
|
||||
lit_utf8_byte_t c = *str_p++;
|
||||
ecma_char_t c = lit_cesu8_read_next (&str_p);
|
||||
|
||||
bool should_escape = false;
|
||||
|
||||
#if ENABLED (JERRY_ESNEXT)
|
||||
if (lit_is_code_point_utf16_high_surrogate (c))
|
||||
{
|
||||
const ecma_char_t next_ch = lit_cesu8_peek_next (str_p);
|
||||
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||
{
|
||||
str_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
should_escape = true;
|
||||
}
|
||||
}
|
||||
else if (lit_is_code_point_utf16_low_surrogate (c))
|
||||
{
|
||||
should_escape = true;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||
|
||||
if (c == LIT_CHAR_BACKSLASH || c == LIT_CHAR_DOUBLE_QUOTE)
|
||||
{
|
||||
@@ -858,56 +880,52 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r
|
||||
(lit_utf8_size_t) (str_p - regular_str_start_p - 1));
|
||||
regular_str_start_p = str_p;
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH);
|
||||
ecma_stringbuilder_append_byte (builder_p, c);
|
||||
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) c);
|
||||
}
|
||||
else if (c < LIT_CHAR_SP)
|
||||
else if (c < LIT_CHAR_SP || should_escape)
|
||||
{
|
||||
/**
|
||||
* In ES10 we should escape high or low surrogate characters,
|
||||
* so we shouldn't append the unescaped character to the stringbuilder
|
||||
*/
|
||||
uint8_t offset = should_escape ? LIT_UTF8_MAX_BYTES_IN_CODE_UNIT : 1;
|
||||
|
||||
ecma_stringbuilder_append_raw (builder_p,
|
||||
regular_str_start_p,
|
||||
(lit_utf8_size_t) (str_p - regular_str_start_p - 1));
|
||||
(lit_utf8_size_t) (str_p - regular_str_start_p - offset));
|
||||
|
||||
regular_str_start_p = str_p;
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH);
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case LIT_CHAR_BS:
|
||||
{
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_B);
|
||||
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\b", 2);
|
||||
break;
|
||||
}
|
||||
case LIT_CHAR_FF:
|
||||
{
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_F);
|
||||
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\f", 2);
|
||||
break;
|
||||
}
|
||||
case LIT_CHAR_LF:
|
||||
{
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_N);
|
||||
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\n", 2);
|
||||
break;
|
||||
}
|
||||
case LIT_CHAR_CR:
|
||||
{
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_R);
|
||||
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\r", 2);
|
||||
break;
|
||||
}
|
||||
case LIT_CHAR_TAB:
|
||||
{
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_T);
|
||||
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\t", 2);
|
||||
break;
|
||||
}
|
||||
default: /* Hexadecimal. */
|
||||
{
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_U);
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0);
|
||||
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0);
|
||||
|
||||
/* Max range 0-9, hex digits unnecessary. */
|
||||
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (LIT_CHAR_0 + (c >> 4)));
|
||||
|
||||
lit_utf8_byte_t c2 = (c & 0xf);
|
||||
ecma_stringbuilder_append_byte (builder_p,
|
||||
(lit_utf8_byte_t) (c2 + ((c2 <= 9)
|
||||
? LIT_CHAR_0
|
||||
: (LIT_CHAR_LOWERCASE_A - 10))));
|
||||
lit_char_unicode_escape (builder_p, c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -331,6 +331,26 @@ lit_char_is_binary_digit (ecma_char_t c) /** code unit */
|
||||
} /* lit_char_is_binary_digit */
|
||||
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||
|
||||
/**
|
||||
* UnicodeEscape abstract method
|
||||
*
|
||||
* See also: ECMA-262 v10, 24.5.2.3
|
||||
*/
|
||||
void
|
||||
lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, /**< stringbuilder to append */
|
||||
ecma_char_t c) /**< code unit to convert */
|
||||
{
|
||||
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\u", 2);
|
||||
|
||||
for (int8_t i = 3; i >= 0; i--)
|
||||
{
|
||||
int32_t result_char = (c >> (i * 4)) & 0xF;
|
||||
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (result_char + (result_char <= 9
|
||||
? LIT_CHAR_0
|
||||
: (LIT_CHAR_LOWERCASE_A - 10))));
|
||||
}
|
||||
} /* lit_char_unicode_escape */
|
||||
|
||||
/**
|
||||
* Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3
|
||||
*
|
||||
|
||||
@@ -223,6 +223,7 @@ bool lit_char_is_hex_digit (ecma_char_t c);
|
||||
#if ENABLED (JERRY_ESNEXT)
|
||||
bool lit_char_is_binary_digit (ecma_char_t c);
|
||||
#endif /* ENABLED (JERRY_ESNEXT) */
|
||||
void lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, ecma_char_t c);
|
||||
uint32_t lit_char_hex_to_int (ecma_char_t c);
|
||||
size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
|
||||
size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
|
||||
|
||||
@@ -46,3 +46,8 @@ try {
|
||||
} catch (e) {
|
||||
assert(e instanceof TypeError);
|
||||
}
|
||||
|
||||
// Checking quoting strings
|
||||
assert(JSON.stringify("ab𬄕c") === '"ab𬄕\\u001fc"');
|
||||
assert(JSON.stringify("ab\uDC01cd") === '"ab\\udc01c\\u001fd"');
|
||||
assert(JSON.stringify("ab\uDC01cd\uD8331e") === '"ab\\udc01c\\u001fd\\ud8331e"');
|
||||
|
||||
Reference in New Issue
Block a user