diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-json.c b/jerry-core/ecma/builtin-objects/ecma-builtin-json.c index f0f0daee6..c18da71f6 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-json.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-json.c @@ -849,7 +849,29 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r while (str_p < str_end_p) { - lit_utf8_byte_t c = *str_p++; + ecma_char_t c = lit_cesu8_read_next (&str_p); + + bool should_escape = false; + +#if ENABLED (JERRY_ESNEXT) + if (lit_is_code_point_utf16_high_surrogate (c)) + { + const ecma_char_t next_ch = lit_cesu8_peek_next (str_p); + if (lit_is_code_point_utf16_low_surrogate (next_ch)) + { + str_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT; + continue; + } + else + { + should_escape = true; + } + } + else if (lit_is_code_point_utf16_low_surrogate (c)) + { + should_escape = true; + } +#endif /* ENABLED (JERRY_ESNEXT) */ if (c == LIT_CHAR_BACKSLASH || c == LIT_CHAR_DOUBLE_QUOTE) { @@ -858,56 +880,52 @@ ecma_builtin_json_quote (ecma_stringbuilder_t *builder_p, /**< builder for the r (lit_utf8_size_t) (str_p - regular_str_start_p - 1)); regular_str_start_p = str_p; ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH); - ecma_stringbuilder_append_byte (builder_p, c); + ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) c); } - else if (c < LIT_CHAR_SP) + else if (c < LIT_CHAR_SP || should_escape) { + /** + * In ES10 we should escape high or low surrogate characters, + * so we shouldn't append the unescaped character to the stringbuilder + */ + uint8_t offset = should_escape ? LIT_UTF8_MAX_BYTES_IN_CODE_UNIT : 1; + ecma_stringbuilder_append_raw (builder_p, regular_str_start_p, - (lit_utf8_size_t) (str_p - regular_str_start_p - 1)); + (lit_utf8_size_t) (str_p - regular_str_start_p - offset)); + regular_str_start_p = str_p; - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_BACKSLASH); + switch (c) { case LIT_CHAR_BS: { - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_B); + ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\b", 2); break; } case LIT_CHAR_FF: { - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_F); + ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\f", 2); break; } case LIT_CHAR_LF: { - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_N); + ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\n", 2); break; } case LIT_CHAR_CR: { - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_R); + ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\r", 2); break; } case LIT_CHAR_TAB: { - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_T); + ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\t", 2); break; } default: /* Hexadecimal. */ { - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_LOWERCASE_U); - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0); - ecma_stringbuilder_append_byte (builder_p, LIT_CHAR_0); - - /* Max range 0-9, hex digits unnecessary. */ - ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (LIT_CHAR_0 + (c >> 4))); - - lit_utf8_byte_t c2 = (c & 0xf); - ecma_stringbuilder_append_byte (builder_p, - (lit_utf8_byte_t) (c2 + ((c2 <= 9) - ? LIT_CHAR_0 - : (LIT_CHAR_LOWERCASE_A - 10)))); + lit_char_unicode_escape (builder_p, c); break; } } diff --git a/jerry-core/lit/lit-char-helpers.c b/jerry-core/lit/lit-char-helpers.c index 6ecab2d13..9e808827b 100644 --- a/jerry-core/lit/lit-char-helpers.c +++ b/jerry-core/lit/lit-char-helpers.c @@ -331,6 +331,26 @@ lit_char_is_binary_digit (ecma_char_t c) /** code unit */ } /* lit_char_is_binary_digit */ #endif /* ENABLED (JERRY_ESNEXT) */ +/** + * UnicodeEscape abstract method + * + * See also: ECMA-262 v10, 24.5.2.3 + */ +void +lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, /**< stringbuilder to append */ + ecma_char_t c) /**< code unit to convert */ +{ + ecma_stringbuilder_append_raw (builder_p, (lit_utf8_byte_t *) "\\u", 2); + + for (int8_t i = 3; i >= 0; i--) + { + int32_t result_char = (c >> (i * 4)) & 0xF; + ecma_stringbuilder_append_byte (builder_p, (lit_utf8_byte_t) (result_char + (result_char <= 9 + ? LIT_CHAR_0 + : (LIT_CHAR_LOWERCASE_A - 10)))); + } +} /* lit_char_unicode_escape */ + /** * Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3 * diff --git a/jerry-core/lit/lit-char-helpers.h b/jerry-core/lit/lit-char-helpers.h index d3a6dc444..49eae214d 100644 --- a/jerry-core/lit/lit-char-helpers.h +++ b/jerry-core/lit/lit-char-helpers.h @@ -223,6 +223,7 @@ bool lit_char_is_hex_digit (ecma_char_t c); #if ENABLED (JERRY_ESNEXT) bool lit_char_is_binary_digit (ecma_char_t c); #endif /* ENABLED (JERRY_ESNEXT) */ +void lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, ecma_char_t c); uint32_t lit_char_hex_to_int (ecma_char_t c); size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point); size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point); diff --git a/tests/jerry/es.next/json-stringify.js b/tests/jerry/es.next/json-stringify.js index afb490906..70f1d29d8 100644 --- a/tests/jerry/es.next/json-stringify.js +++ b/tests/jerry/es.next/json-stringify.js @@ -46,3 +46,8 @@ try { } catch (e) { assert(e instanceof TypeError); } + +// Checking quoting strings +assert(JSON.stringify("ab𬄕c") === '"ab𬄕\\u001fc"'); +assert(JSON.stringify("ab\uDC01cd") === '"ab\\udc01c\\u001fd"'); +assert(JSON.stringify("ab\uDC01cd\uD8331e") === '"ab\\udc01c\\u001fd\\ud8331e"');