From b6f2ff1ba7e628d97f1e1b64fe9b77050a248152 Mon Sep 17 00:00:00 2001 From: Szilagyi Adam Date: Tue, 17 Dec 2019 11:42:29 +0100 Subject: [PATCH] Implement binary literal parsing (#3439) This patch will allow the user to use binary literals starting with 0b or 0B, these literals will be evaluated in parsing time resulting an integer Co-authored-by: Robert Fancsik frobert@inf.u-szeged.hu JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu --- jerry-core/lit/lit-char-helpers.c | 13 +++++++ jerry-core/lit/lit-char-helpers.h | 3 ++ jerry-core/parser/js/js-lexer.c | 37 +++++++++++++++++-- jerry-core/parser/js/js-lexer.h | 1 + jerry-core/parser/js/js-parser-util.c | 6 ++++ jerry-core/parser/js/js-parser.h | 3 ++ tests/jerry/es2015/binary-literal.js | 52 +++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 tests/jerry/es2015/binary-literal.js diff --git a/jerry-core/lit/lit-char-helpers.c b/jerry-core/lit/lit-char-helpers.c index c87a8185a..df1273274 100644 --- a/jerry-core/lit/lit-char-helpers.c +++ b/jerry-core/lit/lit-char-helpers.c @@ -293,6 +293,19 @@ lit_char_is_hex_digit (ecma_char_t c) /**< code unit */ && LEXER_TO_ASCII_LOWERCASE (c) <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)); } /* lit_char_is_hex_digit */ +#if ENABLED (JERRY_ES2015) +/** + * Check if specified character is one of BinaryDigits characters (ECMA-262 v6, 11.8.3) + * + * @return true / false + */ +bool +lit_char_is_binary_digit (ecma_char_t c) /** code unit */ +{ + return (c == LIT_CHAR_0 || c == LIT_CHAR_1); +} /* lit_char_is_binary_digit */ +#endif /* ENABLED (JERRY_ES2015) */ + /** * Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3 * diff --git a/jerry-core/lit/lit-char-helpers.h b/jerry-core/lit/lit-char-helpers.h index c1d511091..e6dbe6c58 100644 --- a/jerry-core/lit/lit-char-helpers.h +++ b/jerry-core/lit/lit-char-helpers.h @@ -212,6 +212,9 @@ bool lit_code_point_is_identifier_part (lit_code_point_t code_point); bool lit_char_is_octal_digit (ecma_char_t c); bool lit_char_is_decimal_digit (ecma_char_t c); bool lit_char_is_hex_digit (ecma_char_t c); +#if ENABLED (JERRY_ES2015) +bool lit_char_is_binary_digit (ecma_char_t c); +#endif /* ENABLED (JERRY_ES2015) */ uint32_t lit_char_hex_to_int (ecma_char_t c); size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point); size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point); diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c index cb1e7a41e..db94b3e0e 100644 --- a/jerry-core/parser/js/js-lexer.c +++ b/jerry-core/parser/js/js-lexer.c @@ -33,6 +33,9 @@ * @{ */ +JERRY_STATIC_ASSERT (LEXER_NUMBER_BINARY > LEXER_NUMBER_OCTAL, + lexer_number_binary_must_be_greater_than_lexer_number_octal); + /** * Check whether the UTF-8 intermediate is an octet or not */ @@ -1255,6 +1258,28 @@ lexer_parse_number (parser_context_t *context_p) /**< context */ { parser_raise_error (context_p, PARSER_ERR_INVALID_NUMBER); } +#if ENABLED (JERRY_ES2015) + else if (LEXER_TO_ASCII_LOWERCASE (source_p[1]) == LIT_CHAR_LOWERCASE_B) + { + context_p->token.extra_value = LEXER_NUMBER_BINARY; + context_p->token.lit_location.char_p++; + context_p->source_p++; + source_p += 2; + + if (source_p >= source_end_p + || !lit_char_is_binary_digit (source_p[0])) + { + parser_raise_error (context_p, PARSER_ERR_INVALID_BIN_DIGIT); + } + + do + { + source_p++; + } + while (source_p < source_end_p + && lit_char_is_binary_digit (source_p[0])); + } +#endif /* ENABLED (JERRY_ES2015) */ else { can_be_float = true; @@ -2201,7 +2226,7 @@ lexer_construct_number_object (parser_context_t *context_p, /**< context */ uint32_t literal_index = 0; prop_length_t length = context_p->token.lit_location.length; - if (context_p->token.extra_value != LEXER_NUMBER_OCTAL) + if (context_p->token.extra_value < LEXER_NUMBER_OCTAL) { num = ecma_utf8_string_to_number (context_p->token.lit_location.char_p, length); @@ -2210,12 +2235,20 @@ lexer_construct_number_object (parser_context_t *context_p, /**< context */ { const uint8_t *src_p = context_p->token.lit_location.char_p; const uint8_t *src_end_p = src_p + length - 1; + ecma_number_t multiplier = 8.0; + +#if ENABLED (JERRY_ES2015) + if (context_p->token.extra_value == LEXER_NUMBER_BINARY) + { + multiplier = 2.0; + } +#endif /* ENABLED (JERRY_ES2015) */ num = 0; do { src_p++; - num = num * 8 + (ecma_number_t) (*src_p - LIT_CHAR_0); + num = num * multiplier + (ecma_number_t) (*src_p - LIT_CHAR_0); } while (src_p < src_end_p); } diff --git a/jerry-core/parser/js/js-lexer.h b/jerry-core/parser/js/js-lexer.h index 00e18990b..fc94fe288 100644 --- a/jerry-core/parser/js/js-lexer.h +++ b/jerry-core/parser/js/js-lexer.h @@ -248,6 +248,7 @@ typedef enum LEXER_NUMBER_DECIMAL, /**< decimal number */ LEXER_NUMBER_HEXADECIMAL, /**< hexadecimal number */ LEXER_NUMBER_OCTAL, /**< octal number */ + LEXER_NUMBER_BINARY, /**< binary number */ } lexer_number_type_t; /** diff --git a/jerry-core/parser/js/js-parser-util.c b/jerry-core/parser/js/js-parser-util.c index 8e63786c7..9be865046 100644 --- a/jerry-core/parser/js/js-parser-util.c +++ b/jerry-core/parser/js/js-parser-util.c @@ -836,6 +836,12 @@ parser_error_to_string (parser_error_t error) /**< error code */ { return "Invalid hexadecimal digit."; } +#if ENABLED (JERRY_ES2015) + case PARSER_ERR_INVALID_BIN_DIGIT: + { + return "Invalid binary digit."; + } +#endif /* ENABLED (JERRY_ES2015) */ case PARSER_ERR_INVALID_ESCAPE_SEQUENCE: { return "Invalid escape sequence."; diff --git a/jerry-core/parser/js/js-parser.h b/jerry-core/parser/js/js-parser.h index 505d2c4f7..6643dab33 100644 --- a/jerry-core/parser/js/js-parser.h +++ b/jerry-core/parser/js/js-parser.h @@ -44,6 +44,9 @@ typedef enum PARSER_ERR_INVALID_CHARACTER, /**< unexpected character */ PARSER_ERR_INVALID_OCTAL_DIGIT, /**< invalid octal digit */ PARSER_ERR_INVALID_HEX_DIGIT, /**< invalid hexadecimal digit */ +#if ENABLED (JERRY_ES2015) + PARSER_ERR_INVALID_BIN_DIGIT, /**< invalid binary digit */ +#endif /* ENABLED (JERRY_ES2015) */ PARSER_ERR_INVALID_ESCAPE_SEQUENCE, /**< invalid escape sequence */ PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE, /**< invalid unicode escape sequence */ PARSER_ERR_INVALID_IDENTIFIER_START, /**< character cannot be start of an identifier */ diff --git a/tests/jerry/es2015/binary-literal.js b/tests/jerry/es2015/binary-literal.js new file mode 100644 index 000000000..ac70d0140 --- /dev/null +++ b/tests/jerry/es2015/binary-literal.js @@ -0,0 +1,52 @@ +/* Copyright JS Foundation and other contributors, http://js.foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function checkSyntaxError (str) { + try { + eval(str); + assert(false); + } catch (e) { + assert(e instanceof SyntaxError); + } +} + +// Test with invalid literals +checkSyntaxError("0c"); +checkSyntaxError("0b"); +checkSyntaxError("0b0123456"); +checkSyntaxError("0b2"); + +checkSyntaxError("0C"); +checkSyntaxError("0B"); +checkSyntaxError("0B2"); + +checkSyntaxError("000b01010101"); +checkSyntaxError("010b01010101"); +checkSyntaxError("11 0b01010101"); + +// Test with valid literals +assert(0b111 === 7); +assert(0b111110111 === 503); +assert(0b111101010101 === 3925); +assert(0b00000000000001 === 1); +assert(0b00000000000000 === 0); +assert(0b1101001 === parseInt ("1101001", 2)); + +assert(0B111 === 7); +assert(0B111110111 === 503); +assert(0B111101010101 === 3925); +assert(0B00000000000001 === 1); +assert(0B00000000000000 === 0); +assert(0B1101001 === parseInt ("1101001", 2));