Implement UnicodeEscape abstract method (#3959)

Also refactored ecma_builtin_json_quote to use the method above

JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
Szilagyi Adam
2020-07-20 13:51:06 +02:00
committed by GitHub
parent b162e27418
commit 33359ac506
4 changed files with 66 additions and 22 deletions
@@ -849,7 +849,29 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r
while (str_p < str_end_p)
{
lit_utf8_byte_t c = *str_p++;
ecma_char_t c = lit_cesu8_read_next (&str_p);
bool should_escape = false;
#if ENABLED (JERRY_ESNEXT)
if (lit_is_code_point_utf16_high_surrogate (c))
{
const ecma_char_t next_ch = lit_cesu8_peek_next (str_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
str_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
continue;
}
else
{
should_escape = true;
}
}
else if (lit_is_code_point_utf16_low_surrogate (c))
{
should_escape = true;
}
#endif /* ENABLED (JERRY_ESNEXT) */
if (c == LIT_CHAR_BACKSLASH || c == LIT_CHAR_DOUBLE_QUOTE)
{
@@ -858,56 +880,52 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r
(lit_utf8_size_t) (str_p - regular_str_start_p - 1));
regular_str_start_p = str_p;
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH);
ecma_stringbuilder_append_byte (builder_p, c);
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) c);
}
else if (c < LIT_CHAR_SP)
else if (c < LIT_CHAR_SP || should_escape)
{
/**
* In ES10 we should escape high or low surrogate characters,
* so we shouldn't append the unescaped character to the stringbuilder
*/
uint8_t offset = should_escape ? LIT_UTF8_MAX_BYTES_IN_CODE_UNIT : 1;
ecma_stringbuilder_append_raw (builder_p,
regular_str_start_p,
(lit_utf8_size_t) (str_p - regular_str_start_p - 1));
(lit_utf8_size_t) (str_p - regular_str_start_p - offset));
regular_str_start_p = str_p;
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH);
switch (c)
{
case LIT_CHAR_BS:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_B);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\b", 2);
break;
}
case LIT_CHAR_FF:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_F);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\f", 2);
break;
}
case LIT_CHAR_LF:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_N);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\n", 2);
break;
}
case LIT_CHAR_CR:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_R);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\r", 2);
break;
}
case LIT_CHAR_TAB:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_T);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\t", 2);
break;
}
default: /* Hexadecimal. */
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_U);
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0);
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0);
/* Max range 0-9, hex digits unnecessary. */
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (LIT_CHAR_0 + (c >> 4)));
lit_utf8_byte_t c2 = (c & 0xf);
ecma_stringbuilder_append_byte (builder_p,
(lit_utf8_byte_t) (c2 + ((c2 <= 9)
? LIT_CHAR_0
: (LIT_CHAR_LOWERCASE_A - 10))));
lit_char_unicode_escape (builder_p, c);
break;
}
}
+20
View File
@@ -331,6 +331,26 @@ lit_char_is_binary_digit (ecma_char_t c) /** code unit */
} /* lit_char_is_binary_digit */
#endif /* ENABLED (JERRY_ESNEXT) */
/**
* UnicodeEscape abstract method
*
* See also: ECMA-262 v10, 24.5.2.3
*/
void
lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, /**< stringbuilder to append */
ecma_char_t c) /**< code unit to convert */
{
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\u", 2);
for (int8_t i = 3; i >= 0; i--)
{
int32_t result_char = (c >> (i * 4)) & 0xF;
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (result_char + (result_char <= 9
? LIT_CHAR_0
: (LIT_CHAR_LOWERCASE_A - 10))));
}
} /* lit_char_unicode_escape */
/**
* Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3
*
+1
View File
@@ -223,6 +223,7 @@ bool lit_char_is_hex_digit (ecma_char_t c);
#if ENABLED (JERRY_ESNEXT)
bool lit_char_is_binary_digit (ecma_char_t c);
#endif /* ENABLED (JERRY_ESNEXT) */
void lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, ecma_char_t c);
uint32_t lit_char_hex_to_int (ecma_char_t c);
size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
+5
View File
@@ -46,3 +46,8 @@ try {
} catch (e) {
assert(e instanceof TypeError);
}
// Checking quoting strings
assert(JSON.stringify("ab𬄕c") === '"ab𬄕\\u001fc"');
assert(JSON.stringify("ab\uDC01cd") === '"ab\\udc01c\\u001fd"');
assert(JSON.stringify("ab\uDC01cd\uD8331e") === '"ab\\udc01c\\u001fd\\ud8331e"');