Added RegExp dotAll flag (#4000)

JerryScript-DCO-1.0-Signed-off-by: bence gabor kis kisbg@inf.u-szeged.hu
This commit is contained in:
kisbg
2020-07-24 13:42:57 +02:00
committed by GitHub
parent cf097ca16b
commit d39a076b2e
11 changed files with 87 additions and 11 deletions
+2 -1
View File
@@ -1357,7 +1357,8 @@ void
ecma_bytecode_deref (ecma_compiled_code_t *bytecode_p) /**< byte code pointer */
{
JERRY_ASSERT (bytecode_p->refs > 0);
JERRY_ASSERT (!(bytecode_p->status_flags & CBC_CODE_FLAGS_STATIC_FUNCTION));
JERRY_ASSERT (!CBC_IS_FUNCTION (bytecode_p->status_flags)
|| !(bytecode_p->status_flags & CBC_CODE_FLAGS_STATIC_FUNCTION));
bytecode_p->refs--;
@@ -55,6 +55,7 @@ enum
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_MULTILINE,
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_STICKY,
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_UNICODE,
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_DOT_ALL,
#endif /* ENABLED (JERRY_ESNEXT) */
#if ENABLED (JERRY_BUILTIN_ANNEXB)
ECMA_REGEXP_PROTOTYPE_ROUTINE_COMPILE,
@@ -107,7 +108,8 @@ ecma_builtin_regexp_prototype_flags_helper (ecma_extended_object_t *re_obj_p, /*
RE_FLAG_IGNORE_CASE,
RE_FLAG_MULTILINE,
RE_FLAG_STICKY,
RE_FLAG_UNICODE
RE_FLAG_UNICODE,
RE_FLAG_DOTALL,
};
uint16_t offset = (uint16_t) (builtin_routine_id - ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_GLOBAL);
@@ -133,6 +135,7 @@ ecma_builtin_regexp_prototype_get_flags (ecma_object_t *object_p) /**< this obje
LIT_MAGIC_STRING_GLOBAL,
LIT_MAGIC_STRING_IGNORECASE_UL,
LIT_MAGIC_STRING_MULTILINE,
LIT_MAGIC_STRING_DOTALL,
LIT_MAGIC_STRING_UNICODE,
LIT_MAGIC_STRING_STICKY
};
@@ -142,6 +145,7 @@ ecma_builtin_regexp_prototype_get_flags (ecma_object_t *object_p) /**< this obje
LIT_CHAR_LOWERCASE_G,
LIT_CHAR_LOWERCASE_I,
LIT_CHAR_LOWERCASE_M,
LIT_CHAR_LOWERCASE_S,
LIT_CHAR_LOWERCASE_U,
LIT_CHAR_LOWERCASE_Y
};
@@ -607,6 +611,7 @@ ecma_builtin_regexp_prototype_dispatch_routine (uint16_t builtin_routine_id, /**
case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_MULTILINE:
case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_STICKY:
case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_UNICODE:
case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_DOT_ALL:
{
ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) obj_p;
@@ -43,6 +43,10 @@ ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_IGNORECASE_UL,
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_IGNORE_CASE,
ECMA_PROPERTY_FLAG_CONFIGURABLE)
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_DOTALL,
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_DOT_ALL,
ECMA_PROPERTY_FLAG_CONFIGURABLE)
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_MULTILINE,
ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_MULTILINE,
ECMA_PROPERTY_FLAG_CONFIGURABLE)
@@ -98,6 +98,13 @@ ecma_regexp_parse_flags (ecma_string_t *flags_str_p, /**< Input string with flag
flag = RE_FLAG_UNICODE;
break;
}
#if ENABLED (JERRY_ESNEXT)
case 's':
{
flag = RE_FLAG_DOTALL;
break;
}
#endif /* ENABLED (JERRY_ESNEXT) */
default:
{
flag = RE_FLAG_EMPTY;
@@ -1505,7 +1512,9 @@ class_found:
const lit_code_point_t cp = ecma_regexp_unicode_advance (&str_curr_p, re_ctx_p->input_end_p);
if (JERRY_UNLIKELY (cp <= LIT_UTF16_CODE_UNIT_MAX && lit_char_is_line_terminator ((ecma_char_t) cp)))
if (!(re_ctx_p->flags & RE_FLAG_DOTALL)
&& JERRY_UNLIKELY (cp <= LIT_UTF16_CODE_UNIT_MAX
&& lit_char_is_line_terminator ((ecma_char_t) cp)))
{
goto fail;
}
@@ -1521,8 +1530,13 @@ class_found:
}
const ecma_char_t ch = lit_cesu8_read_next (&str_curr_p);
#if !ENABLED (JERRY_ESNEXT)
bool has_dot_all_flag = false;
#else /* ENABLED (JERRY_ESNEXT) */
bool has_dot_all_flag = (re_ctx_p->flags & RE_FLAG_DOTALL) != 0;
#endif /* !ENABLED (JERRY_ESNEXT) */
if (lit_char_is_line_terminator (ch))
if (!has_dot_all_flag && lit_char_is_line_terminator (ch))
{
goto fail;
}
@@ -40,7 +40,8 @@ typedef enum
RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
RE_FLAG_MULTILINE = (1u << 3), /**< ECMA-262 v5, 15.10.7.4 */
RE_FLAG_STICKY = (1u << 4), /**< ECMA-262 v6, 21.2.5.12 */
RE_FLAG_UNICODE = (1u << 5) /**< ECMA-262 v6, 21.2.5.15 */
RE_FLAG_UNICODE = (1u << 5), /**< ECMA-262 v6, 21.2.5.15 */
RE_FLAG_DOTALL = (1u << 6) /**< ECMA-262 v9, 21.2.5.3 */
/* Bits from bit 13 is reserved for function types (see CBC_FUNCTION_TYPE_SHIFT). */
} ecma_regexp_flags_t;
+3
View File
@@ -338,6 +338,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CREATE, "create")
|| ENABLED (JERRY_BUILTIN_WEAKSET)
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DELETE, "delete")
#endif
#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ESNEXT)
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOTALL, "dotAll")
#endif
#if ENABLED (JERRY_BUILTIN_ANNEXB)
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ESCAPE, "escape")
#endif
+1
View File
@@ -182,6 +182,7 @@ LIT_MAGIC_STRING_SQRT1_2_U = "SQRT1_2"
LIT_MAGIC_STRING_BOOLEAN = "boolean"
LIT_MAGIC_STRING_COMPILE = "compile"
LIT_MAGIC_STRING_DEFAULT = "default"
LIT_MAGIC_STRING_DOTALL = "dotAll"
LIT_MAGIC_STRING_FOR_EACH_UL = "forEach"
LIT_MAGIC_STRING_GET_DATE_UL = "getDate"
LIT_MAGIC_STRING_GET_INT8_UL = "getInt8"
+6
View File
@@ -2816,6 +2816,12 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
{
flag = RE_FLAG_STICKY;
}
#if ENABLED (JERRY_ESNEXT)
else if (source_p[0] == LIT_CHAR_LOWERCASE_S)
{
flag = RE_FLAG_DOTALL;
}
#endif /* ENABLED (JERRY_ESNEXT) */
if (flag == 0)
{
-5
View File
@@ -36,11 +36,6 @@
*/
#define RE_CACHE_SIZE 8u
/**
* RegExp flags mask (first 10 bits are for reference count and the rest for the actual RegExp flags)
*/
#define RE_FLAGS_MASK 0x3F
/**
* Maximum value that can be encoded in the RegExp bytecode as a single byte.
*/
+1 -1
View File
@@ -61,7 +61,7 @@ re_cache_lookup (ecma_string_t *pattern_str_p, /**< pattern string */
ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->source);
if ((cached_bytecode_p->header.status_flags & RE_FLAGS_MASK) == flags
if (cached_bytecode_p->header.status_flags == flags
&& ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
{
return cached_bytecode_p;
+46
View File
@@ -0,0 +1,46 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
var str1 = 'bar\nexample foo example';
var str2 = 'bare\nxample foo example';
var regex_with_dotAll_flag = new RegExp ('bar.example','s');
var regex_without_dotAll_flag = new RegExp ('bar.example');
// testing regexp.prototype.dotAll
assert (regex_with_dotAll_flag.dotAll == true);
assert (regex_without_dotAll_flag.dotAll == false);
// basic dotAll flag test
assert (str1.replace (regex_with_dotAll_flag,'') == " foo example");
assert (str1.replace (regex_without_dotAll_flag,'') == str1);
assert (str2.replace (regex_with_dotAll_flag, "") == str2);
// testing dotAll with other flag
for (let re of [/^.$/su, /^.$/sum]) {
assert (re.test("a"));
assert (re.test("3"));
assert (re.test("π"));
assert (re.test("\u2027"));
assert (re.test("\u0085"));
assert (re.test("\v"));
assert (re.test("\f"));
assert (re.test("\u180E"));
assert (re.test("\u{10300}"));
assert (re.test("\n"));
assert (re.test("\r"));
assert (re.test("\u2028"));
assert (re.test("\u2029"));
assert (re.test("\uD800"));
assert (re.test("\uDFFF"));
}