Support \u200C \u200D unicode characters (#3266)
JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
@@ -137,6 +137,14 @@ class UnicodeCategorizer(object):
|
||||
if zero_width_space not in separators:
|
||||
bisect.insort(separators, int(zero_width_space))
|
||||
|
||||
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.1 format-control characters
|
||||
non_letters = self._categories['non_letters']
|
||||
zero_width_non_joiner = 0x200C
|
||||
zero_width_joiner = 0x200D
|
||||
|
||||
bisect.insort(non_letters, int(zero_width_non_joiner))
|
||||
bisect.insort(non_letters, int(zero_width_joiner))
|
||||
|
||||
return self._categories['letters'], self._categories['non_letters'], self._categories['separators']
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user