Support \u200C \u200D unicode characters (#3266)

JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
Robert Fancsik
2019-10-31 11:14:13 +01:00
committed by GitHub
parent 6a342fcdd6
commit eee41ec734
3 changed files with 49 additions and 10 deletions
+8
View File
@@ -137,6 +137,14 @@ class UnicodeCategorizer(object):
if zero_width_space not in separators:
bisect.insort(separators, int(zero_width_space))
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.1 format-control characters
non_letters = self._categories['non_letters']
zero_width_non_joiner = 0x200C
zero_width_joiner = 0x200D
bisect.insort(non_letters, int(zero_width_non_joiner))
bisect.insort(non_letters, int(zero_width_joiner))
return self._categories['letters'], self._categories['non_letters'], self._categories['separators']