Implement UnicodeEscape abstract method (#3959)

Also refactored ecma_builtin_json_quote to use the method above

JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu
This commit is contained in:
Szilagyi Adam
2020-07-20 13:51:06 +02:00
committed by GitHub
parent b162e27418
commit 33359ac506
4 changed files with 66 additions and 22 deletions
@@ -849,7 +849,29 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r
while (str_p < str_end_p)
{
lit_utf8_byte_t c = *str_p++;
ecma_char_t c = lit_cesu8_read_next (&str_p);
bool should_escape = false;
#if ENABLED (JERRY_ESNEXT)
if (lit_is_code_point_utf16_high_surrogate (c))
{
const ecma_char_t next_ch = lit_cesu8_peek_next (str_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
str_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
continue;
}
else
{
should_escape = true;
}
}
else if (lit_is_code_point_utf16_low_surrogate (c))
{
should_escape = true;
}
#endif /* ENABLED (JERRY_ESNEXT) */
if (c == LIT_CHAR_BACKSLASH || c == LIT_CHAR_DOUBLE_QUOTE)
{
@@ -858,56 +880,52 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r
(lit_utf8_size_t) (str_p - regular_str_start_p - 1));
regular_str_start_p = str_p;
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH);
ecma_stringbuilder_append_byte (builder_p, c);
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) c);
}
else if (c < LIT_CHAR_SP)
else if (c < LIT_CHAR_SP || should_escape)
{
/**
* In ES10 we should escape high or low surrogate characters,
* so we shouldn't append the unescaped character to the stringbuilder
*/
uint8_t offset = should_escape ? LIT_UTF8_MAX_BYTES_IN_CODE_UNIT : 1;
ecma_stringbuilder_append_raw (builder_p,
regular_str_start_p,
(lit_utf8_size_t) (str_p - regular_str_start_p - 1));
(lit_utf8_size_t) (str_p - regular_str_start_p - offset));
regular_str_start_p = str_p;
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH);
switch (c)
{
case LIT_CHAR_BS:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_B);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\b", 2);
break;
}
case LIT_CHAR_FF:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_F);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\f", 2);
break;
}
case LIT_CHAR_LF:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_N);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\n", 2);
break;
}
case LIT_CHAR_CR:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_R);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\r", 2);
break;
}
case LIT_CHAR_TAB:
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_T);
ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\t", 2);
break;
}
default: /* Hexadecimal. */
{
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_U);
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0);
ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0);
/* Max range 0-9, hex digits unnecessary. */
ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (LIT_CHAR_0 + (c >> 4)));
lit_utf8_byte_t c2 = (c & 0xf);
ecma_stringbuilder_append_byte (builder_p,
(lit_utf8_byte_t) (c2 + ((c2 <= 9)
? LIT_CHAR_0
: (LIT_CHAR_LOWERCASE_A - 10))));
lit_char_unicode_escape (builder_p, c);
break;
}
}