From afa7b78b451032d70afa693bd52c34e7825dd7ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20K=C3=A1d=C3=A1r?= Date: Mon, 15 Feb 2016 16:31:48 +0100 Subject: [PATCH] New unicode character handling using array data structure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: István Kádár ikadar@inf.u-szeged.hu --- jerry-core/lit/lit-char-helpers.c | 275 +- jerry-core/lit/lit-char-helpers.h | 5 +- jerry-core/lit/lit-unicode-ranges.inc.h | 4500 +---------------------- jerry-core/parser/js/common.c | 4 +- tools/print-unicode-ranges.sh | 282 +- 5 files changed, 464 insertions(+), 4602 deletions(-) diff --git a/jerry-core/lit/lit-char-helpers.c b/jerry-core/lit/lit-char-helpers.c index 9da945ec5..b86b27ccd 100644 --- a/jerry-core/lit/lit-char-helpers.c +++ b/jerry-core/lit/lit-char-helpers.c @@ -14,9 +14,89 @@ */ #include "lit-char-helpers.h" - +#include "lit/lit-unicode-ranges.inc.h" #include "lit-strings.h" +#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0])) + +/** + * Binary search algorithm that searches the a + * character in the given char array. + * + * @return true - if the character is in the given array + * false - otherwise + */ +static bool +search_char_in_char_array (ecma_char_t c, /**< code unit */ + const ecma_char_t *array, /**< array */ + int size_of_array) /**< length of the array */ +{ + int bottom = 0; + int top = size_of_array - 1; + + while (bottom <= top) + { + int middle = (bottom + top) / 2; + ecma_char_t current = array[middle]; + + if (current == c) + { + return true; + } + + if (c < current) + { + top = middle - 1; + } + else + { + bottom = middle + 1; + } + } + + return false; +} /* search_char_in_char_array */ + +/** + * Binary search algorithm that searches a character in the given intervals. + * Intervals specifed by two arrays. The first one contains the starting points + * of the intervals, the second one contains the length of them. + * + * @return true - if the the character is included (inclusively) in one of the intervals in the given array + * false - otherwise + */ +static bool +search_char_in_interval_array (ecma_char_t c, /**< code unit */ + const ecma_char_t *array_sp, /**< array of interval starting points */ + const uint8_t *lengths, /**< array of interval lengths */ + int size_of_array) /**< length of the array */ +{ + int bottom = 0; + int top = size_of_array - 1; + + while (bottom <= top) + { + int middle = (bottom + top) / 2; + ecma_char_t current_sp = array_sp[middle]; + + if (current_sp <= c && c <= current_sp + lengths[middle]) + { + return true; + } + + if (c > current_sp) + { + bottom = middle + 1; + } + else + { + top = middle - 1; + } + } + + return false; +} /* search_char_in_interval_array */ + /** * Check if specified character is one of the Format-Control characters * @@ -32,30 +112,8 @@ lit_char_is_format_control (ecma_char_t c) /**< code unit */ } /* lit_char_is_format_control */ /** - * Check if specified character is the Space Separator character - * - * See also: - * ECMA-262 v5, Table 2 - * - * @return true - if the character falls into "Space, Separator" ("Zs") character category, - * false - otherwise. - */ -bool -lit_char_is_space_separator (ecma_char_t c) /**< code unit */ -{ - /* Zs */ -#define LIT_UNICODE_RANGE_ZS(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } -#include "lit-unicode-ranges.inc.h" - - return false; -} /* lit_char_is_space_separator */ - -/** - * Check if specified character is one of the Whitespace characters + * Check if specified character is one of the Whitespace characters including those + * that fall into "Space, Separator" ("Zs") Unicode character category. * * @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2, * false - otherwise. @@ -63,13 +121,21 @@ lit_char_is_space_separator (ecma_char_t c) /**< code unit */ bool lit_char_is_white_space (ecma_char_t c) /**< code unit */ { - return (c == LIT_CHAR_TAB - || c == LIT_CHAR_VTAB - || c == LIT_CHAR_FF - || c == LIT_CHAR_SP - || c == LIT_CHAR_NBSP - || c == LIT_CHAR_BOM - || lit_char_is_space_separator (c)); + if (c <= 127) + { + return (c == LIT_CHAR_TAB + || c == LIT_CHAR_VTAB + || c == LIT_CHAR_FF + || c == LIT_CHAR_SP); + } + else + { + return (c == LIT_CHAR_NBSP + || c == LIT_CHAR_BOM + || (c >= unicode_separator_char_interv_sps[0] + && c <= unicode_separator_char_interv_sps[0] + unicode_separator_char_interv_lens[0]) + || search_char_in_char_array (c, unicode_separator_chars, NUM_OF_ELEMENTS (unicode_separator_chars))); + } } /* lit_char_is_white_space */ /** @@ -119,60 +185,18 @@ lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */ return false; } - /* Lu */ -#define LIT_UNICODE_RANGE_LU(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - - /* Ll */ -#define LIT_UNICODE_RANGE_LL(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - - /* Lt */ -#define LIT_UNICODE_RANGE_LT(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - - /* Lm */ -#define LIT_UNICODE_RANGE_LM(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - - /* Lo */ -#define LIT_UNICODE_RANGE_LO(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - - /* Nl */ -#define LIT_UNICODE_RANGE_NL(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - -#include "lit-unicode-ranges.inc.h" - - return false; + return (search_char_in_interval_array (c, unicode_letter_interv_sps, unicode_letter_interv_lens, + NUM_OF_ELEMENTS (unicode_letter_interv_sps)) + || search_char_in_char_array (c, unicode_letter_chars, NUM_OF_ELEMENTS (unicode_letter_chars))); } /* lit_char_is_unicode_letter */ /** - * Check if specified character is a unicode combining mark - * - * Note: - * Unicode combining mark is a character, included into one of the following categories: - * - Non-spacing mark (Mn); - * - Combining spacing mark (Mc). + * Check if specified character is a non-letter character and can be used as a + * non-first character of an identifier. + * These characters coverd by the following unicode categories: + * - digit (Nd) + * - punctuation mark (Mn, Mc) + * - connector punctuation (Pc) * * See also: * ECMA-262 v5, 7.6 @@ -181,82 +205,21 @@ lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */ * false - otherwise. */ bool -lit_char_is_unicode_combining_mark (ecma_char_t c) /**< code unit */ +lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */ { - /* Mn */ -#define LIT_UNICODE_RANGE_MN(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ + if (c <= 127) + { + return LIT_CHAR_ASCII_DIGITS_BEGIN <= c && c <= LIT_CHAR_ASCII_DIGITS_END; } - - /* Mc */ -#define LIT_UNICODE_RANGE_MC(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ + else + { + return (search_char_in_interval_array (c, unicode_non_letter_ident_part_interv_sps, + unicode_non_letter_ident_part_interv_lens, + NUM_OF_ELEMENTS (unicode_non_letter_ident_part_interv_sps)) + || search_char_in_char_array (c, unicode_non_letter_ident_part_chars, + NUM_OF_ELEMENTS (unicode_non_letter_ident_part_chars))); } - -#include "lit-unicode-ranges.inc.h" - - return false; -} /* lit_char_is_unicode_combining_mark */ - -/** - * Check if specified character is a unicode digit - * - * Note: - * Unicode digit is a character, included into the following category: - * - Decimal number (Nd). - * - * See also: - * ECMA-262 v5, 7.6 - * - * @return true - if specified character falls into the specified category, - * false - otherwise. - */ -bool -lit_char_is_unicode_digit (ecma_char_t c) /**< code unit */ -{ - /* Nd */ -#define LIT_UNICODE_RANGE_ND(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - -#include "lit-unicode-ranges.inc.h" - - return false; -} /* lit_char_is_unicode_digit */ - -/** - * Check if specified character is a unicode connector punctuation - * - * Note: - * Unicode connector punctuation is a character, included into the following category: - * - Connector punctuation (Pc). - * - * See also: - * ECMA-262 v5, 7.6 - * - * @return true - if specified character falls into the specified category, - * false - otherwise. - */ -bool -lit_char_is_unicode_connector_punctuation (ecma_char_t c) /**< code unit */ -{ - /* Pc */ -#define LIT_UNICODE_RANGE_PC(range_begin, range_end) \ - if (c >= (range_begin) && c <= (range_end)) \ - { \ - return true; \ - } - -#include "lit-unicode-ranges.inc.h" - - return false; -} /* lit_char_is_unicode_connector_punctuation */ +} /* lit_char_is_unicode_non_letter_ident_part */ /** * Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2) diff --git a/jerry-core/lit/lit-char-helpers.h b/jerry-core/lit/lit-char-helpers.h index 63e460281..a01c2aed2 100644 --- a/jerry-core/lit/lit-char-helpers.h +++ b/jerry-core/lit/lit-char-helpers.h @@ -37,7 +37,6 @@ extern bool lit_char_is_format_control (ecma_char_t); #define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */ /* LIT_CHAR_BOM is defined above */ -extern bool lit_char_is_space_separator (ecma_char_t); extern bool lit_char_is_white_space (ecma_char_t); /* @@ -77,9 +76,7 @@ extern bool lit_char_is_line_terminator (ecma_char_t); /* LIT_CHAR_BACKSLASH defined above */ extern bool lit_char_is_unicode_letter (ecma_char_t); -extern bool lit_char_is_unicode_combining_mark (ecma_char_t); -extern bool lit_char_is_unicode_digit (ecma_char_t); -extern bool lit_char_is_unicode_connector_punctuation (ecma_char_t); +extern bool lit_char_is_unicode_non_letter_ident_part (ecma_char_t); /* * Punctuator characters (ECMA-262 v5, 7.7) diff --git a/jerry-core/lit/lit-unicode-ranges.inc.h b/jerry-core/lit/lit-unicode-ranges.inc.h index 233343b89..4e54c5810 100644 --- a/jerry-core/lit/lit-unicode-ranges.inc.h +++ b/jerry-core/lit/lit-unicode-ranges.inc.h @@ -1,4 +1,5 @@ -/* Copyright 2015 Samsung Electronics Co., Ltd. +/* Copyright 2015-2016 Samsung Electronics Co., Ltd. + * Copyright 2015-2016 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -11,4343 +12,200 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ - -/** - * List of unicode character ranges per category * + * + * Unicode characters and ranges generated by tools/print-unicode-ranges.sh + * from UnicodeData-3.0.0.txt. * See also: * http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt * http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html + */ + +#ifndef LIT_UNICODE_RANGES_INC_H_ +#define LIT_UNICODE_RANGES_INC_H_ + +/** + * Character interval starting points for the unicode letters. * - * The range lists were generated using tools/print-unicode-ranges.sh script from UnicodeData-3.0.0.txt + * The characters covered by these intervalse are from + * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl + */ +static const uint16_t unicode_letter_interv_sps[] = +{ +/* + * these are handled separetely + * 0x0041, len 25 + * 0x0061, len 25 + */ + 0x00C0, 0x00D8, 0XF8, 0X1F8, 0x0222, 0x0250, 0x02B0, 0x02BB, + 0x02D0, 0x02E0, 0x0388, 0x038E, 0x03A3, 0x03D0, 0x03DA, 0x0400, 0x048C, 0x04C7, + 0x04CB, 0x04D0, 0x04F8, 0x0531, 0x0561, 0x05D0, 0x05F0, 0x0621, 0x0640, 0x0671, + 0x06E5, 0x06FA, 0x0712, 0x0780, 0x0905, 0x0958, 0x0985, 0x098F, 0x0993, 0x09AA, + 0x09B6, 0x09DC, 0x09DF, 0x09F0, 0x0A05, 0x0A0F, 0x0A13, 0x0A2A, 0x0A32, 0x0A35, + 0x0A38, 0x0A59, 0x0A72, 0x0A85, 0x0A8F, 0x0A93, 0x0AAA, 0x0AB2, 0x0AB5, 0x0B05, + 0x0B0F, 0x0B13, 0x0B2A, 0x0B32, 0x0B36, 0x0B5C, 0x0B5F, 0x0B85, 0x0B8E, 0x0B92, + 0x0B99, 0x0B9E, 0x0BA3, 0x0BA8, 0x0BAE, 0x0BB7, 0x0C05, 0x0C0E, 0x0C12, 0x0C2A, + 0x0C35, 0x0C60, 0x0C85, 0x0C8E, 0x0C92, 0x0CAA, 0x0CB5, 0x0CE0, 0x0D05, 0x0D0E, + 0x0D12, 0x0D2A, 0x0D60, 0x0D85, 0x0D9A, 0x0DB3, 0x0DC0, 0x0E01, 0x0E32, 0x0E40, + 0x0E81, 0x0E87, 0x0E94, 0x0E99, 0x0EA1, 0x0EAA, 0x0EAD, 0x0EB2, 0x0EC0, 0x0EDC, + 0x0F40, 0x0F49, 0x0F88, 0x1000, 0x1023, 0x1029, 0x1050, 0x10A0, 0x10D0, 0x1100, + 0x115F, 0x11A8, 0x1200, 0x1208, 0x124A, 0x1250, 0x125A, 0x1260, 0x128A, 0x1290, + 0x12B2, 0x12B8, 0x12C2, 0x12C8, 0x12D0, 0x12D8, 0x12F0, 0x1312, 0x1318, 0x1320, + 0x1348, 0x13A0, 0X1401, 0X1501, 0X1601, 0x166F, 0x1681, 0x16A0, 0x1780, 0x1820, + 0x1880, 0x1E00, 0x1EA0, 0x1F00, 0x1F18, 0x1F20, 0x1F48, 0x1F50, 0x1F5F, 0x1F80, + 0x1FB6, 0x1FC2, 0x1FC6, 0x1FD0, 0x1FD6, 0x1FE0, 0x1FF2, 0x1FF6, 0x210A, 0x2119, + 0x212A, 0x212F, 0x2133, 0x2160, 0x3005, 0x3021, 0x3031, 0x3038, 0x3041, 0x309D, + 0x30A1, 0x30FC, 0x3105, 0x3131, 0x31A0, 0XA000, 0XA100, 0XA200, 0XA300, 0XA400, + 0XF900, 0XFA00, 0xFB00, 0xFB13, 0xFB1F, 0xFB2A, 0xFB38, 0xFB40, 0xFB43, 0xFB46, + 0XFBD3, 0XFCD3, 0xFD50, 0xFD92, 0xFDF0, 0xFE70, 0xFE76, 0xFF21, 0xFF41, 0xFF66, + 0xFFC2, 0xFFCA, 0xFFD2, 0xFFDA +}; + +/** + * Character lengths for the unicode letters. * - * NOTE: - * Some ranges in "Separator, Space" category were added manually, see the according definitions. + * The characters covered by these intervalse are from + * the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl */ +static const uint8_t unicode_letter_interv_lens[] = +{ + 22, 30, 255, 39, 17, 93, 8, 6, + 1, 4, 2, 19, 43, 7, 25, 129, 56, 1, + 1, 37, 1, 37, 38, 26, 2, 25, 10, 98, + 1, 2, 26, 37, 52, 9, 7, 1, 21, 6, + 3, 1, 2, 1, 5, 1, 21, 6, 1, 1, + 1, 3, 2, 6, 2, 21, 6, 1, 4, 7, + 1, 21, 6, 1, 3, 1, 2, 5, 2, 3, + 1, 1, 1, 2, 7, 2, 7, 2, 22, 9, + 4, 1, 7, 2, 22, 9, 4, 1, 7, 2, + 22, 15, 1, 17, 23, 8, 6, 47, 1, 6, + 1, 1, 3, 6, 2, 1, 3, 1, 4, 1, + 7, 33, 3, 33, 4, 1, 5, 37, 38, 89, + 67, 81, 6, 62, 3, 6, 3, 38, 3, 30, + 3, 6, 3, 6, 6, 22, 30, 3, 6, 38, + 18, 84, 255, 255, 107, 7, 25, 74, 51, 87, + 40, 155, 89, 21, 5, 37, 5, 7, 30, 52, + 6, 2, 6, 3, 5, 12, 2, 6, 9, 4, + 3, 2, 6, 35, 2, 8, 4, 2, 83, 1, + 89, 2, 39, 93, 23, 255, 255, 255, 255, 140, + 255, 45, 6, 4, 9, 12, 4, 1, 1, 107, + 255, 106, 63, 53, 11, 2, 134, 25, 25, 88, + 5, 5, 5, 2 +}; /** - * "Letter, Uppercase" category + * Those unicode letter characters that are not inside any of + * the intervals specified in unicode_letter_intervals array. + * + * The characters are from the following Unicode categories: + * Lu, Ll, Lt, Lm, Lo, Nl */ -#ifndef LIT_UNICODE_RANGE_LU -# define LIT_UNICODE_RANGE_LU(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_LU */ -LIT_UNICODE_RANGE_LU (0x0041, 0x005A) /* LATIN CAPITAL LETTER A - <---> - LATIN CAPITAL LETTER Z */ - -LIT_UNICODE_RANGE_LU (0x00C0, 0x00D6) /* LATIN CAPITAL LETTER A WITH GRAVE - <---> - LATIN CAPITAL LETTER O WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x00D8, 0x00DE) /* LATIN CAPITAL LETTER O WITH STROKE - <---> - LATIN CAPITAL LETTER THORN */ - -LIT_UNICODE_RANGE_LU (0x0100, 0x0100) /* LATIN CAPITAL LETTER A WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x0102, 0x0102) /* LATIN CAPITAL LETTER A WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x0104, 0x0104) /* LATIN CAPITAL LETTER A WITH OGONEK */ - -LIT_UNICODE_RANGE_LU (0x0106, 0x0106) /* LATIN CAPITAL LETTER C WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x0108, 0x0108) /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x010A, 0x010A) /* LATIN CAPITAL LETTER C WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x010C, 0x010C) /* LATIN CAPITAL LETTER C WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x010E, 0x010E) /* LATIN CAPITAL LETTER D WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x0110, 0x0110) /* LATIN CAPITAL LETTER D WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x0112, 0x0112) /* LATIN CAPITAL LETTER E WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x0114, 0x0114) /* LATIN CAPITAL LETTER E WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x0116, 0x0116) /* LATIN CAPITAL LETTER E WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x0118, 0x0118) /* LATIN CAPITAL LETTER E WITH OGONEK */ - -LIT_UNICODE_RANGE_LU (0x011A, 0x011A) /* LATIN CAPITAL LETTER E WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x011C, 0x011C) /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x011E, 0x011E) /* LATIN CAPITAL LETTER G WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x0120, 0x0120) /* LATIN CAPITAL LETTER G WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x0122, 0x0122) /* LATIN CAPITAL LETTER G WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x0124, 0x0124) /* LATIN CAPITAL LETTER H WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x0126, 0x0126) /* LATIN CAPITAL LETTER H WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x0128, 0x0128) /* LATIN CAPITAL LETTER I WITH TILDE */ - -LIT_UNICODE_RANGE_LU (0x012A, 0x012A) /* LATIN CAPITAL LETTER I WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x012C, 0x012C) /* LATIN CAPITAL LETTER I WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x012E, 0x012E) /* LATIN CAPITAL LETTER I WITH OGONEK */ - -LIT_UNICODE_RANGE_LU (0x0130, 0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x0132, 0x0132) /* LATIN CAPITAL LIGATURE IJ */ - -LIT_UNICODE_RANGE_LU (0x0134, 0x0134) /* LATIN CAPITAL LETTER J WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x0136, 0x0136) /* LATIN CAPITAL LETTER K WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x0139, 0x0139) /* LATIN CAPITAL LETTER L WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x013B, 0x013B) /* LATIN CAPITAL LETTER L WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x013D, 0x013D) /* LATIN CAPITAL LETTER L WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x013F, 0x013F) /* LATIN CAPITAL LETTER L WITH MIDDLE DOT */ - -LIT_UNICODE_RANGE_LU (0x0141, 0x0141) /* LATIN CAPITAL LETTER L WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x0143, 0x0143) /* LATIN CAPITAL LETTER N WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x0145, 0x0145) /* LATIN CAPITAL LETTER N WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x0147, 0x0147) /* LATIN CAPITAL LETTER N WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x014A, 0x014A) /* LATIN CAPITAL LETTER ENG */ - -LIT_UNICODE_RANGE_LU (0x014C, 0x014C) /* LATIN CAPITAL LETTER O WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x014E, 0x014E) /* LATIN CAPITAL LETTER O WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x0150, 0x0150) /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */ - -LIT_UNICODE_RANGE_LU (0x0152, 0x0152) /* LATIN CAPITAL LIGATURE OE */ - -LIT_UNICODE_RANGE_LU (0x0154, 0x0154) /* LATIN CAPITAL LETTER R WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x0156, 0x0156) /* LATIN CAPITAL LETTER R WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x0158, 0x0158) /* LATIN CAPITAL LETTER R WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x015A, 0x015A) /* LATIN CAPITAL LETTER S WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x015C, 0x015C) /* LATIN CAPITAL LETTER S WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x015E, 0x015E) /* LATIN CAPITAL LETTER S WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x0160, 0x0160) /* LATIN CAPITAL LETTER S WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x0162, 0x0162) /* LATIN CAPITAL LETTER T WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x0164, 0x0164) /* LATIN CAPITAL LETTER T WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x0166, 0x0166) /* LATIN CAPITAL LETTER T WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x0168, 0x0168) /* LATIN CAPITAL LETTER U WITH TILDE */ - -LIT_UNICODE_RANGE_LU (0x016A, 0x016A) /* LATIN CAPITAL LETTER U WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x016C, 0x016C) /* LATIN CAPITAL LETTER U WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x016E, 0x016E) /* LATIN CAPITAL LETTER U WITH RING ABOVE */ - -LIT_UNICODE_RANGE_LU (0x0170, 0x0170) /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */ - -LIT_UNICODE_RANGE_LU (0x0172, 0x0172) /* LATIN CAPITAL LETTER U WITH OGONEK */ - -LIT_UNICODE_RANGE_LU (0x0174, 0x0174) /* LATIN CAPITAL LETTER W WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x0176, 0x0176) /* LATIN CAPITAL LETTER Y WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x0178, 0x0179) /* LATIN CAPITAL LETTER Y WITH DIAERESIS - <---> - LATIN CAPITAL LETTER Z WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x017B, 0x017B) /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x017D, 0x017D) /* LATIN CAPITAL LETTER Z WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x0181, 0x0182) /* LATIN CAPITAL LETTER B WITH HOOK - <---> - LATIN CAPITAL LETTER B WITH TOPBAR */ - -LIT_UNICODE_RANGE_LU (0x0184, 0x0184) /* LATIN CAPITAL LETTER TONE SIX */ - -LIT_UNICODE_RANGE_LU (0x0186, 0x0187) /* LATIN CAPITAL LETTER OPEN O - <---> - LATIN CAPITAL LETTER C WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x0189, 0x018B) /* LATIN CAPITAL LETTER AFRICAN D - <---> - LATIN CAPITAL LETTER D WITH TOPBAR */ - -LIT_UNICODE_RANGE_LU (0x018E, 0x0191) /* LATIN CAPITAL LETTER REVERSED E - <---> - LATIN CAPITAL LETTER F WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x0193, 0x0194) /* LATIN CAPITAL LETTER G WITH HOOK - <---> - LATIN CAPITAL LETTER GAMMA */ - -LIT_UNICODE_RANGE_LU (0x0196, 0x0198) /* LATIN CAPITAL LETTER IOTA - <---> - LATIN CAPITAL LETTER K WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x019C, 0x019D) /* LATIN CAPITAL LETTER TURNED M - <---> - LATIN CAPITAL LETTER N WITH LEFT HOOK */ - -LIT_UNICODE_RANGE_LU (0x019F, 0x01A0) /* LATIN CAPITAL LETTER O WITH MIDDLE TILDE - <---> - LATIN CAPITAL LETTER O WITH HORN */ - -LIT_UNICODE_RANGE_LU (0x01A2, 0x01A2) /* LATIN CAPITAL LETTER OI */ - -LIT_UNICODE_RANGE_LU (0x01A4, 0x01A4) /* LATIN CAPITAL LETTER P WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x01A6, 0x01A7) /* LATIN LETTER YR - <---> - LATIN CAPITAL LETTER TONE TWO */ - -LIT_UNICODE_RANGE_LU (0x01A9, 0x01A9) /* LATIN CAPITAL LETTER ESH */ - -LIT_UNICODE_RANGE_LU (0x01AC, 0x01AC) /* LATIN CAPITAL LETTER T WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x01AE, 0x01AF) /* LATIN CAPITAL LETTER T WITH RETROFLEX HOOK - <---> - LATIN CAPITAL LETTER U WITH HORN */ - -LIT_UNICODE_RANGE_LU (0x01B1, 0x01B3) /* LATIN CAPITAL LETTER UPSILON - <---> - LATIN CAPITAL LETTER Y WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x01B5, 0x01B5) /* LATIN CAPITAL LETTER Z WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x01B7, 0x01B8) /* LATIN CAPITAL LETTER EZH - <---> - LATIN CAPITAL LETTER EZH REVERSED */ - -LIT_UNICODE_RANGE_LU (0x01BC, 0x01BC) /* LATIN CAPITAL LETTER TONE FIVE */ - -LIT_UNICODE_RANGE_LU (0x01C4, 0x01C4) /* LATIN CAPITAL LETTER DZ WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01C7, 0x01C7) /* LATIN CAPITAL LETTER LJ */ - -LIT_UNICODE_RANGE_LU (0x01CA, 0x01CA) /* LATIN CAPITAL LETTER NJ */ - -LIT_UNICODE_RANGE_LU (0x01CD, 0x01CD) /* LATIN CAPITAL LETTER A WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01CF, 0x01CF) /* LATIN CAPITAL LETTER I WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01D1, 0x01D1) /* LATIN CAPITAL LETTER O WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01D3, 0x01D3) /* LATIN CAPITAL LETTER U WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01D5, 0x01D5) /* LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x01D7, 0x01D7) /* LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x01D9, 0x01D9) /* LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON */ - -LIT_UNICODE_RANGE_LU (0x01DB, 0x01DB) /* LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x01DE, 0x01DE) /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x01E0, 0x01E0) /* LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x01E2, 0x01E2) /* LATIN CAPITAL LETTER AE WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x01E4, 0x01E4) /* LATIN CAPITAL LETTER G WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x01E6, 0x01E6) /* LATIN CAPITAL LETTER G WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01E8, 0x01E8) /* LATIN CAPITAL LETTER K WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01EA, 0x01EA) /* LATIN CAPITAL LETTER O WITH OGONEK */ - -LIT_UNICODE_RANGE_LU (0x01EC, 0x01EC) /* LATIN CAPITAL LETTER O WITH OGONEK AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x01EE, 0x01EE) /* LATIN CAPITAL LETTER EZH WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x01F1, 0x01F1) /* LATIN CAPITAL LETTER DZ */ - -LIT_UNICODE_RANGE_LU (0x01F4, 0x01F4) /* LATIN CAPITAL LETTER G WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x01F6, 0x01F8) /* LATIN CAPITAL LETTER HWAIR - <---> - LATIN CAPITAL LETTER N WITH GRAVE */ - -LIT_UNICODE_RANGE_LU (0x01FA, 0x01FA) /* LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x01FC, 0x01FC) /* LATIN CAPITAL LETTER AE WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x01FE, 0x01FE) /* LATIN CAPITAL LETTER O WITH STROKE AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x0200, 0x0200) /* LATIN CAPITAL LETTER A WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LU (0x0202, 0x0202) /* LATIN CAPITAL LETTER A WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LU (0x0204, 0x0204) /* LATIN CAPITAL LETTER E WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LU (0x0206, 0x0206) /* LATIN CAPITAL LETTER E WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LU (0x0208, 0x0208) /* LATIN CAPITAL LETTER I WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LU (0x020A, 0x020A) /* LATIN CAPITAL LETTER I WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LU (0x020C, 0x020C) /* LATIN CAPITAL LETTER O WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LU (0x020E, 0x020E) /* LATIN CAPITAL LETTER O WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LU (0x0210, 0x0210) /* LATIN CAPITAL LETTER R WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LU (0x0212, 0x0212) /* LATIN CAPITAL LETTER R WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LU (0x0214, 0x0214) /* LATIN CAPITAL LETTER U WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LU (0x0216, 0x0216) /* LATIN CAPITAL LETTER U WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LU (0x0218, 0x0218) /* LATIN CAPITAL LETTER S WITH COMMA BELOW */ - -LIT_UNICODE_RANGE_LU (0x021A, 0x021A) /* LATIN CAPITAL LETTER T WITH COMMA BELOW */ - -LIT_UNICODE_RANGE_LU (0x021C, 0x021C) /* LATIN CAPITAL LETTER YOGH */ - -LIT_UNICODE_RANGE_LU (0x021E, 0x021E) /* LATIN CAPITAL LETTER H WITH CARON */ - -LIT_UNICODE_RANGE_LU (0x0222, 0x0222) /* LATIN CAPITAL LETTER OU */ - -LIT_UNICODE_RANGE_LU (0x0224, 0x0224) /* LATIN CAPITAL LETTER Z WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x0226, 0x0226) /* LATIN CAPITAL LETTER A WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x0228, 0x0228) /* LATIN CAPITAL LETTER E WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x022A, 0x022A) /* LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x022C, 0x022C) /* LATIN CAPITAL LETTER O WITH TILDE AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x022E, 0x022E) /* LATIN CAPITAL LETTER O WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x0230, 0x0230) /* LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x0232, 0x0232) /* LATIN CAPITAL LETTER Y WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x0386, 0x0386) /* GREEK CAPITAL LETTER ALPHA WITH TONOS */ - -LIT_UNICODE_RANGE_LU (0x0388, 0x038A) /* GREEK CAPITAL LETTER EPSILON WITH TONOS - <---> - GREEK CAPITAL LETTER IOTA WITH TONOS */ - -LIT_UNICODE_RANGE_LU (0x038C, 0x038C) /* GREEK CAPITAL LETTER OMICRON WITH TONOS */ - -LIT_UNICODE_RANGE_LU (0x038E, 0x038F) /* GREEK CAPITAL LETTER UPSILON WITH TONOS - <---> - GREEK CAPITAL LETTER OMEGA WITH TONOS */ - -LIT_UNICODE_RANGE_LU (0x0391, 0x03A1) /* GREEK CAPITAL LETTER ALPHA - <---> - GREEK CAPITAL LETTER RHO */ - -LIT_UNICODE_RANGE_LU (0x03A3, 0x03AB) /* GREEK CAPITAL LETTER SIGMA - <---> - GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA */ - -LIT_UNICODE_RANGE_LU (0x03D2, 0x03D4) /* GREEK UPSILON WITH HOOK SYMBOL - <---> - GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL */ - -LIT_UNICODE_RANGE_LU (0x03DA, 0x03DA) /* GREEK LETTER STIGMA */ - -LIT_UNICODE_RANGE_LU (0x03DC, 0x03DC) /* GREEK LETTER DIGAMMA */ - -LIT_UNICODE_RANGE_LU (0x03DE, 0x03DE) /* GREEK LETTER KOPPA */ - -LIT_UNICODE_RANGE_LU (0x03E0, 0x03E0) /* GREEK LETTER SAMPI */ - -LIT_UNICODE_RANGE_LU (0x03E2, 0x03E2) /* COPTIC CAPITAL LETTER SHEI */ - -LIT_UNICODE_RANGE_LU (0x03E4, 0x03E4) /* COPTIC CAPITAL LETTER FEI */ - -LIT_UNICODE_RANGE_LU (0x03E6, 0x03E6) /* COPTIC CAPITAL LETTER KHEI */ - -LIT_UNICODE_RANGE_LU (0x03E8, 0x03E8) /* COPTIC CAPITAL LETTER HORI */ - -LIT_UNICODE_RANGE_LU (0x03EA, 0x03EA) /* COPTIC CAPITAL LETTER GANGIA */ - -LIT_UNICODE_RANGE_LU (0x03EC, 0x03EC) /* COPTIC CAPITAL LETTER SHIMA */ - -LIT_UNICODE_RANGE_LU (0x03EE, 0x03EE) /* COPTIC CAPITAL LETTER DEI */ - -LIT_UNICODE_RANGE_LU (0x0400, 0x042F) /* CYRILLIC CAPITAL LETTER IE WITH GRAVE - <---> - CYRILLIC CAPITAL LETTER YA */ - -LIT_UNICODE_RANGE_LU (0x0460, 0x0460) /* CYRILLIC CAPITAL LETTER OMEGA */ - -LIT_UNICODE_RANGE_LU (0x0462, 0x0462) /* CYRILLIC CAPITAL LETTER YAT */ - -LIT_UNICODE_RANGE_LU (0x0464, 0x0464) /* CYRILLIC CAPITAL LETTER IOTIFIED E */ - -LIT_UNICODE_RANGE_LU (0x0466, 0x0466) /* CYRILLIC CAPITAL LETTER LITTLE YUS */ - -LIT_UNICODE_RANGE_LU (0x0468, 0x0468) /* CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS */ - -LIT_UNICODE_RANGE_LU (0x046A, 0x046A) /* CYRILLIC CAPITAL LETTER BIG YUS */ - -LIT_UNICODE_RANGE_LU (0x046C, 0x046C) /* CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS */ - -LIT_UNICODE_RANGE_LU (0x046E, 0x046E) /* CYRILLIC CAPITAL LETTER KSI */ - -LIT_UNICODE_RANGE_LU (0x0470, 0x0470) /* CYRILLIC CAPITAL LETTER PSI */ - -LIT_UNICODE_RANGE_LU (0x0472, 0x0472) /* CYRILLIC CAPITAL LETTER FITA */ - -LIT_UNICODE_RANGE_LU (0x0474, 0x0474) /* CYRILLIC CAPITAL LETTER IZHITSA */ - -LIT_UNICODE_RANGE_LU (0x0476, 0x0476) /* CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */ - -LIT_UNICODE_RANGE_LU (0x0478, 0x0478) /* CYRILLIC CAPITAL LETTER UK */ - -LIT_UNICODE_RANGE_LU (0x047A, 0x047A) /* CYRILLIC CAPITAL LETTER ROUND OMEGA */ - -LIT_UNICODE_RANGE_LU (0x047C, 0x047C) /* CYRILLIC CAPITAL LETTER OMEGA WITH TITLO */ - -LIT_UNICODE_RANGE_LU (0x047E, 0x047E) /* CYRILLIC CAPITAL LETTER OT */ - -LIT_UNICODE_RANGE_LU (0x0480, 0x0480) /* CYRILLIC CAPITAL LETTER KOPPA */ - -LIT_UNICODE_RANGE_LU (0x048C, 0x048C) /* CYRILLIC CAPITAL LETTER SEMISOFT SIGN */ - -LIT_UNICODE_RANGE_LU (0x048E, 0x048E) /* CYRILLIC CAPITAL LETTER ER WITH TICK */ - -LIT_UNICODE_RANGE_LU (0x0490, 0x0490) /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */ - -LIT_UNICODE_RANGE_LU (0x0492, 0x0492) /* CYRILLIC CAPITAL LETTER GHE WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x0494, 0x0494) /* CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK */ - -LIT_UNICODE_RANGE_LU (0x0496, 0x0496) /* CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x0498, 0x0498) /* CYRILLIC CAPITAL LETTER ZE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x049A, 0x049A) /* CYRILLIC CAPITAL LETTER KA WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x049C, 0x049C) /* CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE */ - -LIT_UNICODE_RANGE_LU (0x049E, 0x049E) /* CYRILLIC CAPITAL LETTER KA WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x04A0, 0x04A0) /* CYRILLIC CAPITAL LETTER BASHKIR KA */ - -LIT_UNICODE_RANGE_LU (0x04A2, 0x04A2) /* CYRILLIC CAPITAL LETTER EN WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x04A4, 0x04A4) /* CYRILLIC CAPITAL LIGATURE EN GHE */ - -LIT_UNICODE_RANGE_LU (0x04A6, 0x04A6) /* CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK */ - -LIT_UNICODE_RANGE_LU (0x04A8, 0x04A8) /* CYRILLIC CAPITAL LETTER ABKHASIAN HA */ - -LIT_UNICODE_RANGE_LU (0x04AA, 0x04AA) /* CYRILLIC CAPITAL LETTER ES WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x04AC, 0x04AC) /* CYRILLIC CAPITAL LETTER TE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x04AE, 0x04AE) /* CYRILLIC CAPITAL LETTER STRAIGHT U */ - -LIT_UNICODE_RANGE_LU (0x04B0, 0x04B0) /* CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE */ - -LIT_UNICODE_RANGE_LU (0x04B2, 0x04B2) /* CYRILLIC CAPITAL LETTER HA WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x04B4, 0x04B4) /* CYRILLIC CAPITAL LIGATURE TE TSE */ - -LIT_UNICODE_RANGE_LU (0x04B6, 0x04B6) /* CYRILLIC CAPITAL LETTER CHE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x04B8, 0x04B8) /* CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE */ - -LIT_UNICODE_RANGE_LU (0x04BA, 0x04BA) /* CYRILLIC CAPITAL LETTER SHHA */ - -LIT_UNICODE_RANGE_LU (0x04BC, 0x04BC) /* CYRILLIC CAPITAL LETTER ABKHASIAN CHE */ - -LIT_UNICODE_RANGE_LU (0x04BE, 0x04BE) /* CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LU (0x04C0, 0x04C1) /* CYRILLIC LETTER PALOCHKA - <---> - CYRILLIC CAPITAL LETTER ZHE WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x04C3, 0x04C3) /* CYRILLIC CAPITAL LETTER KA WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x04C7, 0x04C7) /* CYRILLIC CAPITAL LETTER EN WITH HOOK */ - -LIT_UNICODE_RANGE_LU (0x04CB, 0x04CB) /* CYRILLIC CAPITAL LETTER KHAKASSIAN CHE */ - -LIT_UNICODE_RANGE_LU (0x04D0, 0x04D0) /* CYRILLIC CAPITAL LETTER A WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x04D2, 0x04D2) /* CYRILLIC CAPITAL LETTER A WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04D4, 0x04D4) /* CYRILLIC CAPITAL LIGATURE A IE */ - -LIT_UNICODE_RANGE_LU (0x04D6, 0x04D6) /* CYRILLIC CAPITAL LETTER IE WITH BREVE */ - -LIT_UNICODE_RANGE_LU (0x04D8, 0x04D8) /* CYRILLIC CAPITAL LETTER SCHWA */ - -LIT_UNICODE_RANGE_LU (0x04DA, 0x04DA) /* CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04DC, 0x04DC) /* CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04DE, 0x04DE) /* CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04E0, 0x04E0) /* CYRILLIC CAPITAL LETTER ABKHASIAN DZE */ - -LIT_UNICODE_RANGE_LU (0x04E2, 0x04E2) /* CYRILLIC CAPITAL LETTER I WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x04E4, 0x04E4) /* CYRILLIC CAPITAL LETTER I WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04E6, 0x04E6) /* CYRILLIC CAPITAL LETTER O WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04E8, 0x04E8) /* CYRILLIC CAPITAL LETTER BARRED O */ - -LIT_UNICODE_RANGE_LU (0x04EA, 0x04EA) /* CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04EC, 0x04EC) /* CYRILLIC CAPITAL LETTER E WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04EE, 0x04EE) /* CYRILLIC CAPITAL LETTER U WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x04F0, 0x04F0) /* CYRILLIC CAPITAL LETTER U WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04F2, 0x04F2) /* CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE */ - -LIT_UNICODE_RANGE_LU (0x04F4, 0x04F4) /* CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x04F8, 0x04F8) /* CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x0531, 0x0556) /* ARMENIAN CAPITAL LETTER AYB - <---> - ARMENIAN CAPITAL LETTER FEH */ - -LIT_UNICODE_RANGE_LU (0x10A0, 0x10C5) /* GEORGIAN CAPITAL LETTER AN - <---> - GEORGIAN CAPITAL LETTER HOE */ - -LIT_UNICODE_RANGE_LU (0x1E00, 0x1E00) /* LATIN CAPITAL LETTER A WITH RING BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E02, 0x1E02) /* LATIN CAPITAL LETTER B WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E04, 0x1E04) /* LATIN CAPITAL LETTER B WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E06, 0x1E06) /* LATIN CAPITAL LETTER B WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E08, 0x1E08) /* LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E0A, 0x1E0A) /* LATIN CAPITAL LETTER D WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E0C, 0x1E0C) /* LATIN CAPITAL LETTER D WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E0E, 0x1E0E) /* LATIN CAPITAL LETTER D WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E10, 0x1E10) /* LATIN CAPITAL LETTER D WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x1E12, 0x1E12) /* LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E14, 0x1E14) /* LATIN CAPITAL LETTER E WITH MACRON AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1E16, 0x1E16) /* LATIN CAPITAL LETTER E WITH MACRON AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E18, 0x1E18) /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E1A, 0x1E1A) /* LATIN CAPITAL LETTER E WITH TILDE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E1C, 0x1E1C) /* LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE */ - -LIT_UNICODE_RANGE_LU (0x1E1E, 0x1E1E) /* LATIN CAPITAL LETTER F WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E20, 0x1E20) /* LATIN CAPITAL LETTER G WITH MACRON */ - -LIT_UNICODE_RANGE_LU (0x1E22, 0x1E22) /* LATIN CAPITAL LETTER H WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E24, 0x1E24) /* LATIN CAPITAL LETTER H WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E26, 0x1E26) /* LATIN CAPITAL LETTER H WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x1E28, 0x1E28) /* LATIN CAPITAL LETTER H WITH CEDILLA */ - -LIT_UNICODE_RANGE_LU (0x1E2A, 0x1E2A) /* LATIN CAPITAL LETTER H WITH BREVE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E2C, 0x1E2C) /* LATIN CAPITAL LETTER I WITH TILDE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E2E, 0x1E2E) /* LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E30, 0x1E30) /* LATIN CAPITAL LETTER K WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E32, 0x1E32) /* LATIN CAPITAL LETTER K WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E34, 0x1E34) /* LATIN CAPITAL LETTER K WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E36, 0x1E36) /* LATIN CAPITAL LETTER L WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E38, 0x1E38) /* LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x1E3A, 0x1E3A) /* LATIN CAPITAL LETTER L WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E3C, 0x1E3C) /* LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E3E, 0x1E3E) /* LATIN CAPITAL LETTER M WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E40, 0x1E40) /* LATIN CAPITAL LETTER M WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E42, 0x1E42) /* LATIN CAPITAL LETTER M WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E44, 0x1E44) /* LATIN CAPITAL LETTER N WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E46, 0x1E46) /* LATIN CAPITAL LETTER N WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E48, 0x1E48) /* LATIN CAPITAL LETTER N WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E4A, 0x1E4A) /* LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E4C, 0x1E4C) /* LATIN CAPITAL LETTER O WITH TILDE AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E4E, 0x1E4E) /* LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x1E50, 0x1E50) /* LATIN CAPITAL LETTER O WITH MACRON AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1E52, 0x1E52) /* LATIN CAPITAL LETTER O WITH MACRON AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E54, 0x1E54) /* LATIN CAPITAL LETTER P WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E56, 0x1E56) /* LATIN CAPITAL LETTER P WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E58, 0x1E58) /* LATIN CAPITAL LETTER R WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E5A, 0x1E5A) /* LATIN CAPITAL LETTER R WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E5C, 0x1E5C) /* LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON */ - -LIT_UNICODE_RANGE_LU (0x1E5E, 0x1E5E) /* LATIN CAPITAL LETTER R WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E60, 0x1E60) /* LATIN CAPITAL LETTER S WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E62, 0x1E62) /* LATIN CAPITAL LETTER S WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E64, 0x1E64) /* LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E66, 0x1E66) /* LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E68, 0x1E68) /* LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E6A, 0x1E6A) /* LATIN CAPITAL LETTER T WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E6C, 0x1E6C) /* LATIN CAPITAL LETTER T WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E6E, 0x1E6E) /* LATIN CAPITAL LETTER T WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E70, 0x1E70) /* LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E72, 0x1E72) /* LATIN CAPITAL LETTER U WITH DIAERESIS BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E74, 0x1E74) /* LATIN CAPITAL LETTER U WITH TILDE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E76, 0x1E76) /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E78, 0x1E78) /* LATIN CAPITAL LETTER U WITH TILDE AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E7A, 0x1E7A) /* LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x1E7C, 0x1E7C) /* LATIN CAPITAL LETTER V WITH TILDE */ - -LIT_UNICODE_RANGE_LU (0x1E7E, 0x1E7E) /* LATIN CAPITAL LETTER V WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E80, 0x1E80) /* LATIN CAPITAL LETTER W WITH GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1E82, 0x1E82) /* LATIN CAPITAL LETTER W WITH ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1E84, 0x1E84) /* LATIN CAPITAL LETTER W WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x1E86, 0x1E86) /* LATIN CAPITAL LETTER W WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E88, 0x1E88) /* LATIN CAPITAL LETTER W WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E8A, 0x1E8A) /* LATIN CAPITAL LETTER X WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E8C, 0x1E8C) /* LATIN CAPITAL LETTER X WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LU (0x1E8E, 0x1E8E) /* LATIN CAPITAL LETTER Y WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1E90, 0x1E90) /* LATIN CAPITAL LETTER Z WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LU (0x1E92, 0x1E92) /* LATIN CAPITAL LETTER Z WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1E94, 0x1E94) /* LATIN CAPITAL LETTER Z WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EA0, 0x1EA0) /* LATIN CAPITAL LETTER A WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EA2, 0x1EA2) /* LATIN CAPITAL LETTER A WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EA4, 0x1EA4) /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1EA6, 0x1EA6) /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1EA8, 0x1EA8) /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EAA, 0x1EAA) /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */ - -LIT_UNICODE_RANGE_LU (0x1EAC, 0x1EAC) /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EAE, 0x1EAE) /* LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1EB0, 0x1EB0) /* LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1EB2, 0x1EB2) /* LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EB4, 0x1EB4) /* LATIN CAPITAL LETTER A WITH BREVE AND TILDE */ - -LIT_UNICODE_RANGE_LU (0x1EB6, 0x1EB6) /* LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EB8, 0x1EB8) /* LATIN CAPITAL LETTER E WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EBA, 0x1EBA) /* LATIN CAPITAL LETTER E WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EBC, 0x1EBC) /* LATIN CAPITAL LETTER E WITH TILDE */ - -LIT_UNICODE_RANGE_LU (0x1EBE, 0x1EBE) /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1EC0, 0x1EC0) /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1EC2, 0x1EC2) /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EC4, 0x1EC4) /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */ - -LIT_UNICODE_RANGE_LU (0x1EC6, 0x1EC6) /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EC8, 0x1EC8) /* LATIN CAPITAL LETTER I WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1ECA, 0x1ECA) /* LATIN CAPITAL LETTER I WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1ECC, 0x1ECC) /* LATIN CAPITAL LETTER O WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1ECE, 0x1ECE) /* LATIN CAPITAL LETTER O WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1ED0, 0x1ED0) /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1ED2, 0x1ED2) /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1ED4, 0x1ED4) /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1ED6, 0x1ED6) /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */ - -LIT_UNICODE_RANGE_LU (0x1ED8, 0x1ED8) /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EDA, 0x1EDA) /* LATIN CAPITAL LETTER O WITH HORN AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1EDC, 0x1EDC) /* LATIN CAPITAL LETTER O WITH HORN AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1EDE, 0x1EDE) /* LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EE0, 0x1EE0) /* LATIN CAPITAL LETTER O WITH HORN AND TILDE */ - -LIT_UNICODE_RANGE_LU (0x1EE2, 0x1EE2) /* LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EE4, 0x1EE4) /* LATIN CAPITAL LETTER U WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EE6, 0x1EE6) /* LATIN CAPITAL LETTER U WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EE8, 0x1EE8) /* LATIN CAPITAL LETTER U WITH HORN AND ACUTE */ - -LIT_UNICODE_RANGE_LU (0x1EEA, 0x1EEA) /* LATIN CAPITAL LETTER U WITH HORN AND GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1EEC, 0x1EEC) /* LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EEE, 0x1EEE) /* LATIN CAPITAL LETTER U WITH HORN AND TILDE */ - -LIT_UNICODE_RANGE_LU (0x1EF0, 0x1EF0) /* LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EF2, 0x1EF2) /* LATIN CAPITAL LETTER Y WITH GRAVE */ - -LIT_UNICODE_RANGE_LU (0x1EF4, 0x1EF4) /* LATIN CAPITAL LETTER Y WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LU (0x1EF6, 0x1EF6) /* LATIN CAPITAL LETTER Y WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LU (0x1EF8, 0x1EF8) /* LATIN CAPITAL LETTER Y WITH TILDE */ - -LIT_UNICODE_RANGE_LU (0x1F08, 0x1F0F) /* GREEK CAPITAL LETTER ALPHA WITH PSILI - <---> - GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LU (0x1F18, 0x1F1D) /* GREEK CAPITAL LETTER EPSILON WITH PSILI - <---> - GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA */ - -LIT_UNICODE_RANGE_LU (0x1F28, 0x1F2F) /* GREEK CAPITAL LETTER ETA WITH PSILI - <---> - GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LU (0x1F38, 0x1F3F) /* GREEK CAPITAL LETTER IOTA WITH PSILI - <---> - GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LU (0x1F48, 0x1F4D) /* GREEK CAPITAL LETTER OMICRON WITH PSILI - <---> - GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA */ - -LIT_UNICODE_RANGE_LU (0x1F59, 0x1F59) /* GREEK CAPITAL LETTER UPSILON WITH DASIA */ - -LIT_UNICODE_RANGE_LU (0x1F5B, 0x1F5B) /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA */ - -LIT_UNICODE_RANGE_LU (0x1F5D, 0x1F5D) /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA */ - -LIT_UNICODE_RANGE_LU (0x1F5F, 0x1F5F) /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LU (0x1F68, 0x1F6F) /* GREEK CAPITAL LETTER OMEGA WITH PSILI - <---> - GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LU (0x1FB8, 0x1FBB) /* GREEK CAPITAL LETTER ALPHA WITH VRACHY - <---> - GREEK CAPITAL LETTER ALPHA WITH OXIA */ - -LIT_UNICODE_RANGE_LU (0x1FC8, 0x1FCB) /* GREEK CAPITAL LETTER EPSILON WITH VARIA - <---> - GREEK CAPITAL LETTER ETA WITH OXIA */ - -LIT_UNICODE_RANGE_LU (0x1FD8, 0x1FDB) /* GREEK CAPITAL LETTER IOTA WITH VRACHY - <---> - GREEK CAPITAL LETTER IOTA WITH OXIA */ - -LIT_UNICODE_RANGE_LU (0x1FE8, 0x1FEC) /* GREEK CAPITAL LETTER UPSILON WITH VRACHY - <---> - GREEK CAPITAL LETTER RHO WITH DASIA */ - -LIT_UNICODE_RANGE_LU (0x1FF8, 0x1FFB) /* GREEK CAPITAL LETTER OMICRON WITH VARIA - <---> - GREEK CAPITAL LETTER OMEGA WITH OXIA */ - -LIT_UNICODE_RANGE_LU (0x2102, 0x2102) /* DOUBLE-STRUCK CAPITAL C */ - -LIT_UNICODE_RANGE_LU (0x2107, 0x2107) /* EULER CONSTANT */ - -LIT_UNICODE_RANGE_LU (0x210B, 0x210D) /* SCRIPT CAPITAL H - <---> - DOUBLE-STRUCK CAPITAL H */ - -LIT_UNICODE_RANGE_LU (0x2110, 0x2112) /* SCRIPT CAPITAL I - <---> - SCRIPT CAPITAL L */ - -LIT_UNICODE_RANGE_LU (0x2115, 0x2115) /* DOUBLE-STRUCK CAPITAL N */ - -LIT_UNICODE_RANGE_LU (0x2119, 0x211D) /* DOUBLE-STRUCK CAPITAL P - <---> - DOUBLE-STRUCK CAPITAL R */ - -LIT_UNICODE_RANGE_LU (0x2124, 0x2124) /* DOUBLE-STRUCK CAPITAL Z */ - -LIT_UNICODE_RANGE_LU (0x2126, 0x2126) /* OHM SIGN */ - -LIT_UNICODE_RANGE_LU (0x2128, 0x2128) /* BLACK-LETTER CAPITAL Z */ - -LIT_UNICODE_RANGE_LU (0x212A, 0x212D) /* KELVIN SIGN - <---> - BLACK-LETTER CAPITAL C */ - -LIT_UNICODE_RANGE_LU (0x2130, 0x2131) /* SCRIPT CAPITAL E - <---> - SCRIPT CAPITAL F */ - -LIT_UNICODE_RANGE_LU (0x2133, 0x2133) /* SCRIPT CAPITAL M */ - -LIT_UNICODE_RANGE_LU (0xFF21, 0xFF3A) /* FULLWIDTH LATIN CAPITAL LETTER A - <---> - FULLWIDTH LATIN CAPITAL LETTER Z */ +static const uint16_t unicode_letter_chars[] = +{ + 0x00AA, 0x00B5, 0x00BA, 0x02EE, 0x037A, 0x0386, 0x038C, 0x0559, 0x06D5, 0x0710, + 0x093D, 0x0950, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, + 0x0CDE, 0x0DBD, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EBD, 0x0EC6, 0x0F00, + 0x1248, 0x1258, 0x1288, 0x12B0, 0x12C0, 0x1310, 0x1F59, 0x1F5B, 0x1F5D, 0x1FBE, + 0x207F, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3400, 0x4DB5, 0x4E00, + 0x9FA5, 0xAC00, 0xD7A3, 0xFB1D, 0xFB3E, 0xFE74 +}; + +/** + * Character interval starting points for non-letter character + * that can be used as a non-first character of an identifier. + * + * The characters covered by these intervalse are from + * the following Unicode categories: Nd, Mn, Mc, Pc + */ +static const uint16_t unicode_non_letter_ident_part_interv_sps[] = +{ +/* + * decimal digits: handled separately + * 0x0030, len: 9 + */ + 0x0300, 0x0360, 0x0483, 0x0591, 0x05A3, 0x05BB, 0x05C1, 0x064B, 0x0660, + 0x06D6, 0x06DF, 0x06E7, 0x06EA, 0x06F0, 0x0730, 0x07A6, 0x0901, 0x093E, 0x0951, + 0x0962, 0x0966, 0x0981, 0x09BE, 0x09C7, 0x09CB, 0x09E2, 0x09E6, 0x0A3E, 0x0A47, + 0x0A4B, 0x0A66, 0x0A81, 0x0ABE, 0x0AC7, 0x0ACB, 0x0AE6, 0x0B01, 0x0B3E, 0x0B47, + 0x0B4B, 0x0B56, 0x0B66, 0x0B82, 0x0BBE, 0x0BC6, 0x0BCA, 0x0BE7, 0x0C01, 0x0C3E, + 0x0C46, 0x0C4A, 0x0C55, 0x0C66, 0x0C82, 0x0CBE, 0x0CC6, 0x0CCA, 0x0CD5, 0x0CE6, + 0x0D02, 0x0D3E, 0x0D46, 0x0D4A, 0x0D66, 0x0D82, 0x0DCF, 0x0DD8, 0x0DF2, 0x0E34, + 0x0E47, 0x0E50, 0x0EB4, 0x0EBB, 0x0EC8, 0x0ED0, 0x0F18, 0x0F20, 0x0F3E, 0x0F71, + 0x0F86, 0x0F90, 0x0F99, 0x102C, 0x1036, 0x1040, 0x1056, 0x1369, 0x17B4, 0x17E0, + 0x1810, 0x203F, 0x20D0, 0x302A, 0x3099, 0xFE20, 0xFE33, 0xFE4D, 0xFF10 +}; + +/** + * Character interval lengths for non-letter character + * that can be used as a non-first character of an identifier. + * + * The characters covered by these intervalse are from + * the following Unicode categories: Nd, Mn, Mc, Pc + */ +static const uint8_t unicode_non_letter_ident_part_interv_lens[] = +{ + 78, 2, 3, 16, 22, 2, 1, 10, 9, + 6, 5, 1, 3, 9, 26, 10, 2, 15, 3, + 1, 9, 2, 6, 1, 2, 1, 9, 4, 1, + 2, 11, 2, 7, 2, 2, 9, 2, 5, 1, + 2, 1, 9, 1, 4, 2, 3, 8, 2, 6, + 2, 3, 1, 9, 1, 6, 2, 3, 1, 9, + 1, 5, 2, 3, 9, 1, 5, 7, 1, 6, + 7, 9, 5, 1, 5, 9, 1, 9, 1, 19, + 1, 7, 35, 6, 3, 9, 3, 8, 31, 9, + 9, 1, 12, 5, 1, 3, 1, 2, 9 +}; + +/** + * Those non-letter characters that can be used as a non-first + * character of an identifier and not included in any of the intervals + * specified in unicode_non_letter_ident_part_intervals array. + * + * The characters are from the following Unicode categories: + * Nd, Mn, Mc, Pc + */ +static const uint16_t unicode_non_letter_ident_part_chars[] = +{ + 0x005F, 0x05BF, 0x05C4, 0x0670, 0x0711, 0x093C, 0x09BC, 0x09D7, 0x0A02, 0x0A3C, + 0x0ABC, 0x0B3C, 0x0BD7, 0x0D57, 0x0DCA, 0x0DD6, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, + 0x0F39, 0x0FC6, 0x18A9, 0x20E1, 0x30FB, 0xFB1E, 0xFF3F, 0xFF65 +}; /** - * "Letter, Lowercase" category + * Unicode separator character interval strting points from Unicode category: Zs */ -#ifndef LIT_UNICODE_RANGE_LL -# define LIT_UNICODE_RANGE_LL(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_LL */ -LIT_UNICODE_RANGE_LL (0x0061, 0x007A) /* LATIN SMALL LETTER A - <---> - LATIN SMALL LETTER Z */ - -LIT_UNICODE_RANGE_LL (0x00AA, 0x00AA) /* FEMININE ORDINAL INDICATOR */ - -LIT_UNICODE_RANGE_LL (0x00B5, 0x00B5) /* MICRO SIGN */ - -LIT_UNICODE_RANGE_LL (0x00BA, 0x00BA) /* MASCULINE ORDINAL INDICATOR */ - -LIT_UNICODE_RANGE_LL (0x00DF, 0x00F6) /* LATIN SMALL LETTER SHARP S - <---> - LATIN SMALL LETTER O WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x00F8, 0x00FF) /* LATIN SMALL LETTER O WITH STROKE - <---> - LATIN SMALL LETTER Y WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x0101, 0x0101) /* LATIN SMALL LETTER A WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x0103, 0x0103) /* LATIN SMALL LETTER A WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x0105, 0x0105) /* LATIN SMALL LETTER A WITH OGONEK */ - -LIT_UNICODE_RANGE_LL (0x0107, 0x0107) /* LATIN SMALL LETTER C WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x0109, 0x0109) /* LATIN SMALL LETTER C WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x010B, 0x010B) /* LATIN SMALL LETTER C WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x010D, 0x010D) /* LATIN SMALL LETTER C WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x010F, 0x010F) /* LATIN SMALL LETTER D WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x0111, 0x0111) /* LATIN SMALL LETTER D WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x0113, 0x0113) /* LATIN SMALL LETTER E WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x0115, 0x0115) /* LATIN SMALL LETTER E WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x0117, 0x0117) /* LATIN SMALL LETTER E WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x0119, 0x0119) /* LATIN SMALL LETTER E WITH OGONEK */ - -LIT_UNICODE_RANGE_LL (0x011B, 0x011B) /* LATIN SMALL LETTER E WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x011D, 0x011D) /* LATIN SMALL LETTER G WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x011F, 0x011F) /* LATIN SMALL LETTER G WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x0121, 0x0121) /* LATIN SMALL LETTER G WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x0123, 0x0123) /* LATIN SMALL LETTER G WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x0125, 0x0125) /* LATIN SMALL LETTER H WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x0127, 0x0127) /* LATIN SMALL LETTER H WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x0129, 0x0129) /* LATIN SMALL LETTER I WITH TILDE */ - -LIT_UNICODE_RANGE_LL (0x012B, 0x012B) /* LATIN SMALL LETTER I WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x012D, 0x012D) /* LATIN SMALL LETTER I WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x012F, 0x012F) /* LATIN SMALL LETTER I WITH OGONEK */ - -LIT_UNICODE_RANGE_LL (0x0131, 0x0131) /* LATIN SMALL LETTER DOTLESS I */ - -LIT_UNICODE_RANGE_LL (0x0133, 0x0133) /* LATIN SMALL LIGATURE IJ */ - -LIT_UNICODE_RANGE_LL (0x0135, 0x0135) /* LATIN SMALL LETTER J WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x0137, 0x0138) /* LATIN SMALL LETTER K WITH CEDILLA - <---> - LATIN SMALL LETTER KRA */ - -LIT_UNICODE_RANGE_LL (0x013A, 0x013A) /* LATIN SMALL LETTER L WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x013C, 0x013C) /* LATIN SMALL LETTER L WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x013E, 0x013E) /* LATIN SMALL LETTER L WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x0140, 0x0140) /* LATIN SMALL LETTER L WITH MIDDLE DOT */ - -LIT_UNICODE_RANGE_LL (0x0142, 0x0142) /* LATIN SMALL LETTER L WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x0144, 0x0144) /* LATIN SMALL LETTER N WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x0146, 0x0146) /* LATIN SMALL LETTER N WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x0148, 0x0149) /* LATIN SMALL LETTER N WITH CARON - <---> - LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ - -LIT_UNICODE_RANGE_LL (0x014B, 0x014B) /* LATIN SMALL LETTER ENG */ - -LIT_UNICODE_RANGE_LL (0x014D, 0x014D) /* LATIN SMALL LETTER O WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x014F, 0x014F) /* LATIN SMALL LETTER O WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x0151, 0x0151) /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */ - -LIT_UNICODE_RANGE_LL (0x0153, 0x0153) /* LATIN SMALL LIGATURE OE */ - -LIT_UNICODE_RANGE_LL (0x0155, 0x0155) /* LATIN SMALL LETTER R WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x0157, 0x0157) /* LATIN SMALL LETTER R WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x0159, 0x0159) /* LATIN SMALL LETTER R WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x015B, 0x015B) /* LATIN SMALL LETTER S WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x015D, 0x015D) /* LATIN SMALL LETTER S WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x015F, 0x015F) /* LATIN SMALL LETTER S WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x0161, 0x0161) /* LATIN SMALL LETTER S WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x0163, 0x0163) /* LATIN SMALL LETTER T WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x0165, 0x0165) /* LATIN SMALL LETTER T WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x0167, 0x0167) /* LATIN SMALL LETTER T WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x0169, 0x0169) /* LATIN SMALL LETTER U WITH TILDE */ - -LIT_UNICODE_RANGE_LL (0x016B, 0x016B) /* LATIN SMALL LETTER U WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x016D, 0x016D) /* LATIN SMALL LETTER U WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x016F, 0x016F) /* LATIN SMALL LETTER U WITH RING ABOVE */ - -LIT_UNICODE_RANGE_LL (0x0171, 0x0171) /* LATIN SMALL LETTER U WITH DOUBLE ACUTE */ - -LIT_UNICODE_RANGE_LL (0x0173, 0x0173) /* LATIN SMALL LETTER U WITH OGONEK */ - -LIT_UNICODE_RANGE_LL (0x0175, 0x0175) /* LATIN SMALL LETTER W WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x0177, 0x0177) /* LATIN SMALL LETTER Y WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x017A, 0x017A) /* LATIN SMALL LETTER Z WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x017C, 0x017C) /* LATIN SMALL LETTER Z WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x017E, 0x0180) /* LATIN SMALL LETTER Z WITH CARON - <---> - LATIN SMALL LETTER B WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x0183, 0x0183) /* LATIN SMALL LETTER B WITH TOPBAR */ - -LIT_UNICODE_RANGE_LL (0x0185, 0x0185) /* LATIN SMALL LETTER TONE SIX */ - -LIT_UNICODE_RANGE_LL (0x0188, 0x0188) /* LATIN SMALL LETTER C WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x018C, 0x018D) /* LATIN SMALL LETTER D WITH TOPBAR - <---> - LATIN SMALL LETTER TURNED DELTA */ - -LIT_UNICODE_RANGE_LL (0x0192, 0x0192) /* LATIN SMALL LETTER F WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x0195, 0x0195) /* LATIN SMALL LETTER HV */ - -LIT_UNICODE_RANGE_LL (0x0199, 0x019B) /* LATIN SMALL LETTER K WITH HOOK - <---> - LATIN SMALL LETTER LAMBDA WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x019E, 0x019E) /* LATIN SMALL LETTER N WITH LONG RIGHT LEG */ - -LIT_UNICODE_RANGE_LL (0x01A1, 0x01A1) /* LATIN SMALL LETTER O WITH HORN */ - -LIT_UNICODE_RANGE_LL (0x01A3, 0x01A3) /* LATIN SMALL LETTER OI */ - -LIT_UNICODE_RANGE_LL (0x01A5, 0x01A5) /* LATIN SMALL LETTER P WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x01A8, 0x01A8) /* LATIN SMALL LETTER TONE TWO */ - -LIT_UNICODE_RANGE_LL (0x01AA, 0x01AB) /* LATIN LETTER REVERSED ESH LOOP - <---> - LATIN SMALL LETTER T WITH PALATAL HOOK */ - -LIT_UNICODE_RANGE_LL (0x01AD, 0x01AD) /* LATIN SMALL LETTER T WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x01B0, 0x01B0) /* LATIN SMALL LETTER U WITH HORN */ - -LIT_UNICODE_RANGE_LL (0x01B4, 0x01B4) /* LATIN SMALL LETTER Y WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x01B6, 0x01B6) /* LATIN SMALL LETTER Z WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x01B9, 0x01BA) /* LATIN SMALL LETTER EZH REVERSED - <---> - LATIN SMALL LETTER EZH WITH TAIL */ - -LIT_UNICODE_RANGE_LL (0x01BD, 0x01BF) /* LATIN SMALL LETTER TONE FIVE - <---> - LATIN LETTER WYNN */ - -LIT_UNICODE_RANGE_LL (0x01C6, 0x01C6) /* LATIN SMALL LETTER DZ WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01C9, 0x01C9) /* LATIN SMALL LETTER LJ */ - -LIT_UNICODE_RANGE_LL (0x01CC, 0x01CC) /* LATIN SMALL LETTER NJ */ - -LIT_UNICODE_RANGE_LL (0x01CE, 0x01CE) /* LATIN SMALL LETTER A WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01D0, 0x01D0) /* LATIN SMALL LETTER I WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01D2, 0x01D2) /* LATIN SMALL LETTER O WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01D4, 0x01D4) /* LATIN SMALL LETTER U WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01D6, 0x01D6) /* LATIN SMALL LETTER U WITH DIAERESIS AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x01D8, 0x01D8) /* LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x01DA, 0x01DA) /* LATIN SMALL LETTER U WITH DIAERESIS AND CARON */ - -LIT_UNICODE_RANGE_LL (0x01DC, 0x01DD) /* LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE - <---> - LATIN SMALL LETTER TURNED E */ - -LIT_UNICODE_RANGE_LL (0x01DF, 0x01DF) /* LATIN SMALL LETTER A WITH DIAERESIS AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x01E1, 0x01E1) /* LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x01E3, 0x01E3) /* LATIN SMALL LETTER AE WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x01E5, 0x01E5) /* LATIN SMALL LETTER G WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x01E7, 0x01E7) /* LATIN SMALL LETTER G WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01E9, 0x01E9) /* LATIN SMALL LETTER K WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01EB, 0x01EB) /* LATIN SMALL LETTER O WITH OGONEK */ - -LIT_UNICODE_RANGE_LL (0x01ED, 0x01ED) /* LATIN SMALL LETTER O WITH OGONEK AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x01EF, 0x01F0) /* LATIN SMALL LETTER EZH WITH CARON - <---> - LATIN SMALL LETTER J WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x01F3, 0x01F3) /* LATIN SMALL LETTER DZ */ - -LIT_UNICODE_RANGE_LL (0x01F5, 0x01F5) /* LATIN SMALL LETTER G WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x01F9, 0x01F9) /* LATIN SMALL LETTER N WITH GRAVE */ - -LIT_UNICODE_RANGE_LL (0x01FB, 0x01FB) /* LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x01FD, 0x01FD) /* LATIN SMALL LETTER AE WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x01FF, 0x01FF) /* LATIN SMALL LETTER O WITH STROKE AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x0201, 0x0201) /* LATIN SMALL LETTER A WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LL (0x0203, 0x0203) /* LATIN SMALL LETTER A WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LL (0x0205, 0x0205) /* LATIN SMALL LETTER E WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LL (0x0207, 0x0207) /* LATIN SMALL LETTER E WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LL (0x0209, 0x0209) /* LATIN SMALL LETTER I WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LL (0x020B, 0x020B) /* LATIN SMALL LETTER I WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LL (0x020D, 0x020D) /* LATIN SMALL LETTER O WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LL (0x020F, 0x020F) /* LATIN SMALL LETTER O WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LL (0x0211, 0x0211) /* LATIN SMALL LETTER R WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LL (0x0213, 0x0213) /* LATIN SMALL LETTER R WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LL (0x0215, 0x0215) /* LATIN SMALL LETTER U WITH DOUBLE GRAVE */ - -LIT_UNICODE_RANGE_LL (0x0217, 0x0217) /* LATIN SMALL LETTER U WITH INVERTED BREVE */ - -LIT_UNICODE_RANGE_LL (0x0219, 0x0219) /* LATIN SMALL LETTER S WITH COMMA BELOW */ - -LIT_UNICODE_RANGE_LL (0x021B, 0x021B) /* LATIN SMALL LETTER T WITH COMMA BELOW */ - -LIT_UNICODE_RANGE_LL (0x021D, 0x021D) /* LATIN SMALL LETTER YOGH */ - -LIT_UNICODE_RANGE_LL (0x021F, 0x021F) /* LATIN SMALL LETTER H WITH CARON */ - -LIT_UNICODE_RANGE_LL (0x0223, 0x0223) /* LATIN SMALL LETTER OU */ - -LIT_UNICODE_RANGE_LL (0x0225, 0x0225) /* LATIN SMALL LETTER Z WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x0227, 0x0227) /* LATIN SMALL LETTER A WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x0229, 0x0229) /* LATIN SMALL LETTER E WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x022B, 0x022B) /* LATIN SMALL LETTER O WITH DIAERESIS AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x022D, 0x022D) /* LATIN SMALL LETTER O WITH TILDE AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x022F, 0x022F) /* LATIN SMALL LETTER O WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x0231, 0x0231) /* LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x0233, 0x0233) /* LATIN SMALL LETTER Y WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x0250, 0x02AD) /* LATIN SMALL LETTER TURNED A - <---> - LATIN LETTER BIDENTAL PERCUSSIVE */ - -LIT_UNICODE_RANGE_LL (0x0390, 0x0390) /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ - -LIT_UNICODE_RANGE_LL (0x03AC, 0x03CE) /* GREEK SMALL LETTER ALPHA WITH TONOS - <---> - GREEK SMALL LETTER OMEGA WITH TONOS */ - -LIT_UNICODE_RANGE_LL (0x03D0, 0x03D1) /* GREEK BETA SYMBOL - <---> - GREEK THETA SYMBOL */ - -LIT_UNICODE_RANGE_LL (0x03D5, 0x03D7) /* GREEK PHI SYMBOL - <---> - GREEK KAI SYMBOL */ - -LIT_UNICODE_RANGE_LL (0x03DB, 0x03DB) /* GREEK SMALL LETTER STIGMA */ - -LIT_UNICODE_RANGE_LL (0x03DD, 0x03DD) /* GREEK SMALL LETTER DIGAMMA */ - -LIT_UNICODE_RANGE_LL (0x03DF, 0x03DF) /* GREEK SMALL LETTER KOPPA */ - -LIT_UNICODE_RANGE_LL (0x03E1, 0x03E1) /* GREEK SMALL LETTER SAMPI */ - -LIT_UNICODE_RANGE_LL (0x03E3, 0x03E3) /* COPTIC SMALL LETTER SHEI */ - -LIT_UNICODE_RANGE_LL (0x03E5, 0x03E5) /* COPTIC SMALL LETTER FEI */ - -LIT_UNICODE_RANGE_LL (0x03E7, 0x03E7) /* COPTIC SMALL LETTER KHEI */ - -LIT_UNICODE_RANGE_LL (0x03E9, 0x03E9) /* COPTIC SMALL LETTER HORI */ - -LIT_UNICODE_RANGE_LL (0x03EB, 0x03EB) /* COPTIC SMALL LETTER GANGIA */ - -LIT_UNICODE_RANGE_LL (0x03ED, 0x03ED) /* COPTIC SMALL LETTER SHIMA */ - -LIT_UNICODE_RANGE_LL (0x03EF, 0x03F3) /* COPTIC SMALL LETTER DEI - <---> - GREEK LETTER YOT */ - -LIT_UNICODE_RANGE_LL (0x0430, 0x045F) /* CYRILLIC SMALL LETTER A - <---> - CYRILLIC SMALL LETTER DZHE */ - -LIT_UNICODE_RANGE_LL (0x0461, 0x0461) /* CYRILLIC SMALL LETTER OMEGA */ - -LIT_UNICODE_RANGE_LL (0x0463, 0x0463) /* CYRILLIC SMALL LETTER YAT */ - -LIT_UNICODE_RANGE_LL (0x0465, 0x0465) /* CYRILLIC SMALL LETTER IOTIFIED E */ - -LIT_UNICODE_RANGE_LL (0x0467, 0x0467) /* CYRILLIC SMALL LETTER LITTLE YUS */ - -LIT_UNICODE_RANGE_LL (0x0469, 0x0469) /* CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS */ - -LIT_UNICODE_RANGE_LL (0x046B, 0x046B) /* CYRILLIC SMALL LETTER BIG YUS */ - -LIT_UNICODE_RANGE_LL (0x046D, 0x046D) /* CYRILLIC SMALL LETTER IOTIFIED BIG YUS */ - -LIT_UNICODE_RANGE_LL (0x046F, 0x046F) /* CYRILLIC SMALL LETTER KSI */ - -LIT_UNICODE_RANGE_LL (0x0471, 0x0471) /* CYRILLIC SMALL LETTER PSI */ - -LIT_UNICODE_RANGE_LL (0x0473, 0x0473) /* CYRILLIC SMALL LETTER FITA */ - -LIT_UNICODE_RANGE_LL (0x0475, 0x0475) /* CYRILLIC SMALL LETTER IZHITSA */ - -LIT_UNICODE_RANGE_LL (0x0477, 0x0477) /* CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */ - -LIT_UNICODE_RANGE_LL (0x0479, 0x0479) /* CYRILLIC SMALL LETTER UK */ - -LIT_UNICODE_RANGE_LL (0x047B, 0x047B) /* CYRILLIC SMALL LETTER ROUND OMEGA */ - -LIT_UNICODE_RANGE_LL (0x047D, 0x047D) /* CYRILLIC SMALL LETTER OMEGA WITH TITLO */ - -LIT_UNICODE_RANGE_LL (0x047F, 0x047F) /* CYRILLIC SMALL LETTER OT */ - -LIT_UNICODE_RANGE_LL (0x0481, 0x0481) /* CYRILLIC SMALL LETTER KOPPA */ - -LIT_UNICODE_RANGE_LL (0x048D, 0x048D) /* CYRILLIC SMALL LETTER SEMISOFT SIGN */ - -LIT_UNICODE_RANGE_LL (0x048F, 0x048F) /* CYRILLIC SMALL LETTER ER WITH TICK */ - -LIT_UNICODE_RANGE_LL (0x0491, 0x0491) /* CYRILLIC SMALL LETTER GHE WITH UPTURN */ - -LIT_UNICODE_RANGE_LL (0x0493, 0x0493) /* CYRILLIC SMALL LETTER GHE WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x0495, 0x0495) /* CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK */ - -LIT_UNICODE_RANGE_LL (0x0497, 0x0497) /* CYRILLIC SMALL LETTER ZHE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x0499, 0x0499) /* CYRILLIC SMALL LETTER ZE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x049B, 0x049B) /* CYRILLIC SMALL LETTER KA WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x049D, 0x049D) /* CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE */ - -LIT_UNICODE_RANGE_LL (0x049F, 0x049F) /* CYRILLIC SMALL LETTER KA WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x04A1, 0x04A1) /* CYRILLIC SMALL LETTER BASHKIR KA */ - -LIT_UNICODE_RANGE_LL (0x04A3, 0x04A3) /* CYRILLIC SMALL LETTER EN WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x04A5, 0x04A5) /* CYRILLIC SMALL LIGATURE EN GHE */ - -LIT_UNICODE_RANGE_LL (0x04A7, 0x04A7) /* CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK */ - -LIT_UNICODE_RANGE_LL (0x04A9, 0x04A9) /* CYRILLIC SMALL LETTER ABKHASIAN HA */ - -LIT_UNICODE_RANGE_LL (0x04AB, 0x04AB) /* CYRILLIC SMALL LETTER ES WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x04AD, 0x04AD) /* CYRILLIC SMALL LETTER TE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x04AF, 0x04AF) /* CYRILLIC SMALL LETTER STRAIGHT U */ - -LIT_UNICODE_RANGE_LL (0x04B1, 0x04B1) /* CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE */ - -LIT_UNICODE_RANGE_LL (0x04B3, 0x04B3) /* CYRILLIC SMALL LETTER HA WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x04B5, 0x04B5) /* CYRILLIC SMALL LIGATURE TE TSE */ - -LIT_UNICODE_RANGE_LL (0x04B7, 0x04B7) /* CYRILLIC SMALL LETTER CHE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x04B9, 0x04B9) /* CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE */ - -LIT_UNICODE_RANGE_LL (0x04BB, 0x04BB) /* CYRILLIC SMALL LETTER SHHA */ - -LIT_UNICODE_RANGE_LL (0x04BD, 0x04BD) /* CYRILLIC SMALL LETTER ABKHASIAN CHE */ - -LIT_UNICODE_RANGE_LL (0x04BF, 0x04BF) /* CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER */ - -LIT_UNICODE_RANGE_LL (0x04C2, 0x04C2) /* CYRILLIC SMALL LETTER ZHE WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x04C4, 0x04C4) /* CYRILLIC SMALL LETTER KA WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x04C8, 0x04C8) /* CYRILLIC SMALL LETTER EN WITH HOOK */ - -LIT_UNICODE_RANGE_LL (0x04CC, 0x04CC) /* CYRILLIC SMALL LETTER KHAKASSIAN CHE */ - -LIT_UNICODE_RANGE_LL (0x04D1, 0x04D1) /* CYRILLIC SMALL LETTER A WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x04D3, 0x04D3) /* CYRILLIC SMALL LETTER A WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04D5, 0x04D5) /* CYRILLIC SMALL LIGATURE A IE */ - -LIT_UNICODE_RANGE_LL (0x04D7, 0x04D7) /* CYRILLIC SMALL LETTER IE WITH BREVE */ - -LIT_UNICODE_RANGE_LL (0x04D9, 0x04D9) /* CYRILLIC SMALL LETTER SCHWA */ - -LIT_UNICODE_RANGE_LL (0x04DB, 0x04DB) /* CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04DD, 0x04DD) /* CYRILLIC SMALL LETTER ZHE WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04DF, 0x04DF) /* CYRILLIC SMALL LETTER ZE WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04E1, 0x04E1) /* CYRILLIC SMALL LETTER ABKHASIAN DZE */ - -LIT_UNICODE_RANGE_LL (0x04E3, 0x04E3) /* CYRILLIC SMALL LETTER I WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x04E5, 0x04E5) /* CYRILLIC SMALL LETTER I WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04E7, 0x04E7) /* CYRILLIC SMALL LETTER O WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04E9, 0x04E9) /* CYRILLIC SMALL LETTER BARRED O */ - -LIT_UNICODE_RANGE_LL (0x04EB, 0x04EB) /* CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04ED, 0x04ED) /* CYRILLIC SMALL LETTER E WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04EF, 0x04EF) /* CYRILLIC SMALL LETTER U WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x04F1, 0x04F1) /* CYRILLIC SMALL LETTER U WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04F3, 0x04F3) /* CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE */ - -LIT_UNICODE_RANGE_LL (0x04F5, 0x04F5) /* CYRILLIC SMALL LETTER CHE WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x04F9, 0x04F9) /* CYRILLIC SMALL LETTER YERU WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x0561, 0x0587) /* ARMENIAN SMALL LETTER AYB - <---> - ARMENIAN SMALL LIGATURE ECH YIWN */ - -LIT_UNICODE_RANGE_LL (0x1E01, 0x1E01) /* LATIN SMALL LETTER A WITH RING BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E03, 0x1E03) /* LATIN SMALL LETTER B WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E05, 0x1E05) /* LATIN SMALL LETTER B WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E07, 0x1E07) /* LATIN SMALL LETTER B WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E09, 0x1E09) /* LATIN SMALL LETTER C WITH CEDILLA AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E0B, 0x1E0B) /* LATIN SMALL LETTER D WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E0D, 0x1E0D) /* LATIN SMALL LETTER D WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E0F, 0x1E0F) /* LATIN SMALL LETTER D WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E11, 0x1E11) /* LATIN SMALL LETTER D WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x1E13, 0x1E13) /* LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E15, 0x1E15) /* LATIN SMALL LETTER E WITH MACRON AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1E17, 0x1E17) /* LATIN SMALL LETTER E WITH MACRON AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E19, 0x1E19) /* LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E1B, 0x1E1B) /* LATIN SMALL LETTER E WITH TILDE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E1D, 0x1E1D) /* LATIN SMALL LETTER E WITH CEDILLA AND BREVE */ - -LIT_UNICODE_RANGE_LL (0x1E1F, 0x1E1F) /* LATIN SMALL LETTER F WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E21, 0x1E21) /* LATIN SMALL LETTER G WITH MACRON */ - -LIT_UNICODE_RANGE_LL (0x1E23, 0x1E23) /* LATIN SMALL LETTER H WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E25, 0x1E25) /* LATIN SMALL LETTER H WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E27, 0x1E27) /* LATIN SMALL LETTER H WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x1E29, 0x1E29) /* LATIN SMALL LETTER H WITH CEDILLA */ - -LIT_UNICODE_RANGE_LL (0x1E2B, 0x1E2B) /* LATIN SMALL LETTER H WITH BREVE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E2D, 0x1E2D) /* LATIN SMALL LETTER I WITH TILDE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E2F, 0x1E2F) /* LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E31, 0x1E31) /* LATIN SMALL LETTER K WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E33, 0x1E33) /* LATIN SMALL LETTER K WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E35, 0x1E35) /* LATIN SMALL LETTER K WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E37, 0x1E37) /* LATIN SMALL LETTER L WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E39, 0x1E39) /* LATIN SMALL LETTER L WITH DOT BELOW AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x1E3B, 0x1E3B) /* LATIN SMALL LETTER L WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E3D, 0x1E3D) /* LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E3F, 0x1E3F) /* LATIN SMALL LETTER M WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E41, 0x1E41) /* LATIN SMALL LETTER M WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E43, 0x1E43) /* LATIN SMALL LETTER M WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E45, 0x1E45) /* LATIN SMALL LETTER N WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E47, 0x1E47) /* LATIN SMALL LETTER N WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E49, 0x1E49) /* LATIN SMALL LETTER N WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E4B, 0x1E4B) /* LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E4D, 0x1E4D) /* LATIN SMALL LETTER O WITH TILDE AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E4F, 0x1E4F) /* LATIN SMALL LETTER O WITH TILDE AND DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x1E51, 0x1E51) /* LATIN SMALL LETTER O WITH MACRON AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1E53, 0x1E53) /* LATIN SMALL LETTER O WITH MACRON AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E55, 0x1E55) /* LATIN SMALL LETTER P WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E57, 0x1E57) /* LATIN SMALL LETTER P WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E59, 0x1E59) /* LATIN SMALL LETTER R WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E5B, 0x1E5B) /* LATIN SMALL LETTER R WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E5D, 0x1E5D) /* LATIN SMALL LETTER R WITH DOT BELOW AND MACRON */ - -LIT_UNICODE_RANGE_LL (0x1E5F, 0x1E5F) /* LATIN SMALL LETTER R WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E61, 0x1E61) /* LATIN SMALL LETTER S WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E63, 0x1E63) /* LATIN SMALL LETTER S WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E65, 0x1E65) /* LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E67, 0x1E67) /* LATIN SMALL LETTER S WITH CARON AND DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E69, 0x1E69) /* LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E6B, 0x1E6B) /* LATIN SMALL LETTER T WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E6D, 0x1E6D) /* LATIN SMALL LETTER T WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E6F, 0x1E6F) /* LATIN SMALL LETTER T WITH LINE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E71, 0x1E71) /* LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E73, 0x1E73) /* LATIN SMALL LETTER U WITH DIAERESIS BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E75, 0x1E75) /* LATIN SMALL LETTER U WITH TILDE BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E77, 0x1E77) /* LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E79, 0x1E79) /* LATIN SMALL LETTER U WITH TILDE AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E7B, 0x1E7B) /* LATIN SMALL LETTER U WITH MACRON AND DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x1E7D, 0x1E7D) /* LATIN SMALL LETTER V WITH TILDE */ - -LIT_UNICODE_RANGE_LL (0x1E7F, 0x1E7F) /* LATIN SMALL LETTER V WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E81, 0x1E81) /* LATIN SMALL LETTER W WITH GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1E83, 0x1E83) /* LATIN SMALL LETTER W WITH ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1E85, 0x1E85) /* LATIN SMALL LETTER W WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x1E87, 0x1E87) /* LATIN SMALL LETTER W WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E89, 0x1E89) /* LATIN SMALL LETTER W WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E8B, 0x1E8B) /* LATIN SMALL LETTER X WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E8D, 0x1E8D) /* LATIN SMALL LETTER X WITH DIAERESIS */ - -LIT_UNICODE_RANGE_LL (0x1E8F, 0x1E8F) /* LATIN SMALL LETTER Y WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1E91, 0x1E91) /* LATIN SMALL LETTER Z WITH CIRCUMFLEX */ - -LIT_UNICODE_RANGE_LL (0x1E93, 0x1E93) /* LATIN SMALL LETTER Z WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1E95, 0x1E9B) /* LATIN SMALL LETTER Z WITH LINE BELOW - <---> - LATIN SMALL LETTER LONG S WITH DOT ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EA1, 0x1EA1) /* LATIN SMALL LETTER A WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EA3, 0x1EA3) /* LATIN SMALL LETTER A WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EA5, 0x1EA5) /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1EA7, 0x1EA7) /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1EA9, 0x1EA9) /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EAB, 0x1EAB) /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE */ - -LIT_UNICODE_RANGE_LL (0x1EAD, 0x1EAD) /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EAF, 0x1EAF) /* LATIN SMALL LETTER A WITH BREVE AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1EB1, 0x1EB1) /* LATIN SMALL LETTER A WITH BREVE AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1EB3, 0x1EB3) /* LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EB5, 0x1EB5) /* LATIN SMALL LETTER A WITH BREVE AND TILDE */ - -LIT_UNICODE_RANGE_LL (0x1EB7, 0x1EB7) /* LATIN SMALL LETTER A WITH BREVE AND DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EB9, 0x1EB9) /* LATIN SMALL LETTER E WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EBB, 0x1EBB) /* LATIN SMALL LETTER E WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EBD, 0x1EBD) /* LATIN SMALL LETTER E WITH TILDE */ - -LIT_UNICODE_RANGE_LL (0x1EBF, 0x1EBF) /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1EC1, 0x1EC1) /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1EC3, 0x1EC3) /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EC5, 0x1EC5) /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE */ - -LIT_UNICODE_RANGE_LL (0x1EC7, 0x1EC7) /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EC9, 0x1EC9) /* LATIN SMALL LETTER I WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1ECB, 0x1ECB) /* LATIN SMALL LETTER I WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1ECD, 0x1ECD) /* LATIN SMALL LETTER O WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1ECF, 0x1ECF) /* LATIN SMALL LETTER O WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1ED1, 0x1ED1) /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1ED3, 0x1ED3) /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1ED5, 0x1ED5) /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1ED7, 0x1ED7) /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE */ - -LIT_UNICODE_RANGE_LL (0x1ED9, 0x1ED9) /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EDB, 0x1EDB) /* LATIN SMALL LETTER O WITH HORN AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1EDD, 0x1EDD) /* LATIN SMALL LETTER O WITH HORN AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1EDF, 0x1EDF) /* LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EE1, 0x1EE1) /* LATIN SMALL LETTER O WITH HORN AND TILDE */ - -LIT_UNICODE_RANGE_LL (0x1EE3, 0x1EE3) /* LATIN SMALL LETTER O WITH HORN AND DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EE5, 0x1EE5) /* LATIN SMALL LETTER U WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EE7, 0x1EE7) /* LATIN SMALL LETTER U WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EE9, 0x1EE9) /* LATIN SMALL LETTER U WITH HORN AND ACUTE */ - -LIT_UNICODE_RANGE_LL (0x1EEB, 0x1EEB) /* LATIN SMALL LETTER U WITH HORN AND GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1EED, 0x1EED) /* LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EEF, 0x1EEF) /* LATIN SMALL LETTER U WITH HORN AND TILDE */ - -LIT_UNICODE_RANGE_LL (0x1EF1, 0x1EF1) /* LATIN SMALL LETTER U WITH HORN AND DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EF3, 0x1EF3) /* LATIN SMALL LETTER Y WITH GRAVE */ - -LIT_UNICODE_RANGE_LL (0x1EF5, 0x1EF5) /* LATIN SMALL LETTER Y WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LL (0x1EF7, 0x1EF7) /* LATIN SMALL LETTER Y WITH HOOK ABOVE */ - -LIT_UNICODE_RANGE_LL (0x1EF9, 0x1EF9) /* LATIN SMALL LETTER Y WITH TILDE */ - -LIT_UNICODE_RANGE_LL (0x1F00, 0x1F07) /* GREEK SMALL LETTER ALPHA WITH PSILI - <---> - GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1F10, 0x1F15) /* GREEK SMALL LETTER EPSILON WITH PSILI - <---> - GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA */ - -LIT_UNICODE_RANGE_LL (0x1F20, 0x1F27) /* GREEK SMALL LETTER ETA WITH PSILI - <---> - GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1F30, 0x1F37) /* GREEK SMALL LETTER IOTA WITH PSILI - <---> - GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1F40, 0x1F45) /* GREEK SMALL LETTER OMICRON WITH PSILI - <---> - GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA */ - -LIT_UNICODE_RANGE_LL (0x1F50, 0x1F57) /* GREEK SMALL LETTER UPSILON WITH PSILI - <---> - GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1F60, 0x1F67) /* GREEK SMALL LETTER OMEGA WITH PSILI - <---> - GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1F70, 0x1F7D) /* GREEK SMALL LETTER ALPHA WITH VARIA - <---> - GREEK SMALL LETTER OMEGA WITH OXIA */ - -LIT_UNICODE_RANGE_LL (0x1F80, 0x1F87) /* GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI - <---> - GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1F90, 0x1F97) /* GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI - <---> - GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FA0, 0x1FA7) /* GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI - <---> - GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FB0, 0x1FB4) /* GREEK SMALL LETTER ALPHA WITH VRACHY - <---> - GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FB6, 0x1FB7) /* GREEK SMALL LETTER ALPHA WITH PERISPOMENI - <---> - GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FBE, 0x1FBE) /* GREEK PROSGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FC2, 0x1FC4) /* GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI - <---> - GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FC6, 0x1FC7) /* GREEK SMALL LETTER ETA WITH PERISPOMENI - <---> - GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FD0, 0x1FD3) /* GREEK SMALL LETTER IOTA WITH VRACHY - <---> - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA */ - -LIT_UNICODE_RANGE_LL (0x1FD6, 0x1FD7) /* GREEK SMALL LETTER IOTA WITH PERISPOMENI - <---> - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1FE0, 0x1FE7) /* GREEK SMALL LETTER UPSILON WITH VRACHY - <---> - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_LL (0x1FF2, 0x1FF4) /* GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI - <---> - GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x1FF6, 0x1FF7) /* GREEK SMALL LETTER OMEGA WITH PERISPOMENI - <---> - GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LL (0x207F, 0x207F) /* SUPERSCRIPT LATIN SMALL LETTER N */ - -LIT_UNICODE_RANGE_LL (0x210A, 0x210A) /* SCRIPT SMALL G */ - -LIT_UNICODE_RANGE_LL (0x210E, 0x210F) /* PLANCK CONSTANT - <---> - PLANCK CONSTANT OVER TWO PI */ - -LIT_UNICODE_RANGE_LL (0x2113, 0x2113) /* SCRIPT SMALL L */ - -LIT_UNICODE_RANGE_LL (0x212F, 0x212F) /* SCRIPT SMALL E */ - -LIT_UNICODE_RANGE_LL (0x2134, 0x2134) /* SCRIPT SMALL O */ - -LIT_UNICODE_RANGE_LL (0x2139, 0x2139) /* INFORMATION SOURCE */ - -LIT_UNICODE_RANGE_LL (0xFB00, 0xFB06) /* LATIN SMALL LIGATURE FF - <---> - LATIN SMALL LIGATURE ST */ - -LIT_UNICODE_RANGE_LL (0xFB13, 0xFB17) /* ARMENIAN SMALL LIGATURE MEN NOW - <---> - ARMENIAN SMALL LIGATURE MEN XEH */ - -LIT_UNICODE_RANGE_LL (0xFF41, 0xFF5A) /* FULLWIDTH LATIN SMALL LETTER A - <---> - FULLWIDTH LATIN SMALL LETTER Z */ - +static const uint16_t unicode_separator_char_interv_sps[] = +{ + 0x2000 +}; /** - * "Letter, Titlecase" category + * Unicode separator character interval lengths from Unicode category: Zs */ -#ifndef LIT_UNICODE_RANGE_LT -# define LIT_UNICODE_RANGE_LT(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_LT */ -LIT_UNICODE_RANGE_LT (0x01C5, 0x01C5) /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON */ - -LIT_UNICODE_RANGE_LT (0x01C8, 0x01C8) /* LATIN CAPITAL LETTER L WITH SMALL LETTER J */ - -LIT_UNICODE_RANGE_LT (0x01CB, 0x01CB) /* LATIN CAPITAL LETTER N WITH SMALL LETTER J */ - -LIT_UNICODE_RANGE_LT (0x01F2, 0x01F2) /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z */ - -LIT_UNICODE_RANGE_LT (0x1F88, 0x1F8F) /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI - <---> - GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */ - -LIT_UNICODE_RANGE_LT (0x1F98, 0x1F9F) /* GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI - <---> - GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */ - -LIT_UNICODE_RANGE_LT (0x1FA8, 0x1FAF) /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI - <---> - GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */ - -LIT_UNICODE_RANGE_LT (0x1FBC, 0x1FBC) /* GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI */ - -LIT_UNICODE_RANGE_LT (0x1FCC, 0x1FCC) /* GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI */ - -LIT_UNICODE_RANGE_LT (0x1FFC, 0x1FFC) /* GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI */ - +static const uint8_t unicode_separator_char_interv_lens[] = +{ + 11 +}; /** - * "Mark, Non-Spacing" category + * Unicode separator characters that are not in the + * unicode_separator_char_intervals array. + * + * Unicode category: Zs */ -#ifndef LIT_UNICODE_RANGE_MN -# define LIT_UNICODE_RANGE_MN(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_MN */ -LIT_UNICODE_RANGE_MN (0x0300, 0x034E) /* COMBINING GRAVE ACCENT - <---> - COMBINING UPWARDS ARROW BELOW */ - -LIT_UNICODE_RANGE_MN (0x0360, 0x0362) /* COMBINING DOUBLE TILDE - <---> - COMBINING DOUBLE RIGHTWARDS ARROW BELOW */ - -LIT_UNICODE_RANGE_MN (0x0483, 0x0486) /* COMBINING CYRILLIC TITLO - <---> - COMBINING CYRILLIC PSILI PNEUMATA */ - -LIT_UNICODE_RANGE_MN (0x0591, 0x05A1) /* HEBREW ACCENT ETNAHTA - <---> - HEBREW ACCENT PAZER */ - -LIT_UNICODE_RANGE_MN (0x05A3, 0x05B9) /* HEBREW ACCENT MUNAH - <---> - HEBREW POINT HOLAM */ - -LIT_UNICODE_RANGE_MN (0x05BB, 0x05BD) /* HEBREW POINT QUBUTS - <---> - HEBREW POINT METEG */ - -LIT_UNICODE_RANGE_MN (0x05BF, 0x05BF) /* HEBREW POINT RAFE */ - -LIT_UNICODE_RANGE_MN (0x05C1, 0x05C2) /* HEBREW POINT SHIN DOT - <---> - HEBREW POINT SIN DOT */ - -LIT_UNICODE_RANGE_MN (0x05C4, 0x05C4) /* HEBREW MARK UPPER DOT */ - -LIT_UNICODE_RANGE_MN (0x064B, 0x0655) /* ARABIC FATHATAN - <---> - ARABIC HAMZA BELOW */ - -LIT_UNICODE_RANGE_MN (0x0670, 0x0670) /* ARABIC LETTER SUPERSCRIPT ALEF */ - -LIT_UNICODE_RANGE_MN (0x06D6, 0x06DC) /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - <---> - ARABIC SMALL HIGH SEEN */ - -LIT_UNICODE_RANGE_MN (0x06DF, 0x06E4) /* ARABIC SMALL HIGH ROUNDED ZERO - <---> - ARABIC SMALL HIGH MADDA */ - -LIT_UNICODE_RANGE_MN (0x06E7, 0x06E8) /* ARABIC SMALL HIGH YEH - <---> - ARABIC SMALL HIGH NOON */ - -LIT_UNICODE_RANGE_MN (0x06EA, 0x06ED) /* ARABIC EMPTY CENTRE LOW STOP - <---> - ARABIC SMALL LOW MEEM */ - -LIT_UNICODE_RANGE_MN (0x0711, 0x0711) /* SYRIAC LETTER SUPERSCRIPT ALAPH */ - -LIT_UNICODE_RANGE_MN (0x0730, 0x074A) /* SYRIAC PTHAHA ABOVE - <---> - SYRIAC BARREKH */ - -LIT_UNICODE_RANGE_MN (0x07A6, 0x07B0) /* THAANA ABAFILI - <---> - THAANA SUKUN */ - -LIT_UNICODE_RANGE_MN (0x0901, 0x0902) /* DEVANAGARI SIGN CANDRABINDU - <---> - DEVANAGARI SIGN ANUSVARA */ - -LIT_UNICODE_RANGE_MN (0x093C, 0x093C) /* DEVANAGARI SIGN NUKTA */ - -LIT_UNICODE_RANGE_MN (0x0941, 0x0948) /* DEVANAGARI VOWEL SIGN U - <---> - DEVANAGARI VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MN (0x094D, 0x094D) /* DEVANAGARI SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0951, 0x0954) /* DEVANAGARI STRESS SIGN UDATTA - <---> - DEVANAGARI ACUTE ACCENT */ - -LIT_UNICODE_RANGE_MN (0x0962, 0x0963) /* DEVANAGARI VOWEL SIGN VOCALIC L - <---> - DEVANAGARI VOWEL SIGN VOCALIC LL */ - -LIT_UNICODE_RANGE_MN (0x0981, 0x0981) /* BENGALI SIGN CANDRABINDU */ - -LIT_UNICODE_RANGE_MN (0x09BC, 0x09BC) /* BENGALI SIGN NUKTA */ - -LIT_UNICODE_RANGE_MN (0x09C1, 0x09C4) /* BENGALI VOWEL SIGN U - <---> - BENGALI VOWEL SIGN VOCALIC RR */ - -LIT_UNICODE_RANGE_MN (0x09CD, 0x09CD) /* BENGALI SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x09E2, 0x09E3) /* BENGALI VOWEL SIGN VOCALIC L - <---> - BENGALI VOWEL SIGN VOCALIC LL */ - -LIT_UNICODE_RANGE_MN (0x0A02, 0x0A02) /* GURMUKHI SIGN BINDI */ - -LIT_UNICODE_RANGE_MN (0x0A3C, 0x0A3C) /* GURMUKHI SIGN NUKTA */ - -LIT_UNICODE_RANGE_MN (0x0A41, 0x0A42) /* GURMUKHI VOWEL SIGN U - <---> - GURMUKHI VOWEL SIGN UU */ - -LIT_UNICODE_RANGE_MN (0x0A47, 0x0A48) /* GURMUKHI VOWEL SIGN EE - <---> - GURMUKHI VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MN (0x0A4B, 0x0A4D) /* GURMUKHI VOWEL SIGN OO - <---> - GURMUKHI SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0A70, 0x0A71) /* GURMUKHI TIPPI - <---> - GURMUKHI ADDAK */ - -LIT_UNICODE_RANGE_MN (0x0A81, 0x0A82) /* GUJARATI SIGN CANDRABINDU - <---> - GUJARATI SIGN ANUSVARA */ - -LIT_UNICODE_RANGE_MN (0x0ABC, 0x0ABC) /* GUJARATI SIGN NUKTA */ - -LIT_UNICODE_RANGE_MN (0x0AC1, 0x0AC5) /* GUJARATI VOWEL SIGN U - <---> - GUJARATI VOWEL SIGN CANDRA E */ - -LIT_UNICODE_RANGE_MN (0x0AC7, 0x0AC8) /* GUJARATI VOWEL SIGN E - <---> - GUJARATI VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MN (0x0ACD, 0x0ACD) /* GUJARATI SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0B01, 0x0B01) /* ORIYA SIGN CANDRABINDU */ - -LIT_UNICODE_RANGE_MN (0x0B3C, 0x0B3C) /* ORIYA SIGN NUKTA */ - -LIT_UNICODE_RANGE_MN (0x0B3F, 0x0B3F) /* ORIYA VOWEL SIGN I */ - -LIT_UNICODE_RANGE_MN (0x0B41, 0x0B43) /* ORIYA VOWEL SIGN U - <---> - ORIYA VOWEL SIGN VOCALIC R */ - -LIT_UNICODE_RANGE_MN (0x0B4D, 0x0B4D) /* ORIYA SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0B56, 0x0B56) /* ORIYA AI LENGTH MARK */ - -LIT_UNICODE_RANGE_MN (0x0B82, 0x0B82) /* TAMIL SIGN ANUSVARA */ - -LIT_UNICODE_RANGE_MN (0x0BC0, 0x0BC0) /* TAMIL VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MN (0x0BCD, 0x0BCD) /* TAMIL SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0C3E, 0x0C40) /* TELUGU VOWEL SIGN AA - <---> - TELUGU VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MN (0x0C46, 0x0C48) /* TELUGU VOWEL SIGN E - <---> - TELUGU VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MN (0x0C4A, 0x0C4D) /* TELUGU VOWEL SIGN O - <---> - TELUGU SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0C55, 0x0C56) /* TELUGU LENGTH MARK - <---> - TELUGU AI LENGTH MARK */ - -LIT_UNICODE_RANGE_MN (0x0CBF, 0x0CBF) /* KANNADA VOWEL SIGN I */ - -LIT_UNICODE_RANGE_MN (0x0CC6, 0x0CC6) /* KANNADA VOWEL SIGN E */ - -LIT_UNICODE_RANGE_MN (0x0CCC, 0x0CCD) /* KANNADA VOWEL SIGN AU - <---> - KANNADA SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0D41, 0x0D43) /* MALAYALAM VOWEL SIGN U - <---> - MALAYALAM VOWEL SIGN VOCALIC R */ - -LIT_UNICODE_RANGE_MN (0x0D4D, 0x0D4D) /* MALAYALAM SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x0DCA, 0x0DCA) /* SINHALA SIGN AL-LAKUNA */ - -LIT_UNICODE_RANGE_MN (0x0DD2, 0x0DD4) /* SINHALA VOWEL SIGN KETTI IS-PILLA - <---> - SINHALA VOWEL SIGN KETTI PAA-PILLA */ - -LIT_UNICODE_RANGE_MN (0x0DD6, 0x0DD6) /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ - -LIT_UNICODE_RANGE_MN (0x0E31, 0x0E31) /* THAI CHARACTER MAI HAN-AKAT */ - -LIT_UNICODE_RANGE_MN (0x0E34, 0x0E3A) /* THAI CHARACTER SARA I - <---> - THAI CHARACTER PHINTHU */ - -LIT_UNICODE_RANGE_MN (0x0E47, 0x0E4E) /* THAI CHARACTER MAITAIKHU - <---> - THAI CHARACTER YAMAKKAN */ - -LIT_UNICODE_RANGE_MN (0x0EB1, 0x0EB1) /* LAO VOWEL SIGN MAI KAN */ - -LIT_UNICODE_RANGE_MN (0x0EB4, 0x0EB9) /* LAO VOWEL SIGN I - <---> - LAO VOWEL SIGN UU */ - -LIT_UNICODE_RANGE_MN (0x0EBB, 0x0EBC) /* LAO VOWEL SIGN MAI KON - <---> - LAO SEMIVOWEL SIGN LO */ - -LIT_UNICODE_RANGE_MN (0x0EC8, 0x0ECD) /* LAO TONE MAI EK - <---> - LAO NIGGAHITA */ - -LIT_UNICODE_RANGE_MN (0x0F18, 0x0F19) /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - <---> - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ - -LIT_UNICODE_RANGE_MN (0x0F35, 0x0F35) /* TIBETAN MARK NGAS BZUNG NYI ZLA */ - -LIT_UNICODE_RANGE_MN (0x0F37, 0x0F37) /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ - -LIT_UNICODE_RANGE_MN (0x0F39, 0x0F39) /* TIBETAN MARK TSA -PHRU */ - -LIT_UNICODE_RANGE_MN (0x0F71, 0x0F7E) /* TIBETAN VOWEL SIGN AA - <---> - TIBETAN SIGN RJES SU NGA RO */ - -LIT_UNICODE_RANGE_MN (0x0F80, 0x0F84) /* TIBETAN VOWEL SIGN REVERSED I - <---> - TIBETAN MARK HALANTA */ - -LIT_UNICODE_RANGE_MN (0x0F86, 0x0F87) /* TIBETAN SIGN LCI RTAGS - <---> - TIBETAN SIGN YANG RTAGS */ - -LIT_UNICODE_RANGE_MN (0x0F90, 0x0F97) /* TIBETAN SUBJOINED LETTER KA - <---> - TIBETAN SUBJOINED LETTER JA */ - -LIT_UNICODE_RANGE_MN (0x0F99, 0x0FBC) /* TIBETAN SUBJOINED LETTER NYA - <---> - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ - -LIT_UNICODE_RANGE_MN (0x0FC6, 0x0FC6) /* TIBETAN SYMBOL PADMA GDAN */ - -LIT_UNICODE_RANGE_MN (0x102D, 0x1030) /* MYANMAR VOWEL SIGN I - <---> - MYANMAR VOWEL SIGN UU */ - -LIT_UNICODE_RANGE_MN (0x1032, 0x1032) /* MYANMAR VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MN (0x1036, 0x1037) /* MYANMAR SIGN ANUSVARA - <---> - MYANMAR SIGN DOT BELOW */ - -LIT_UNICODE_RANGE_MN (0x1039, 0x1039) /* MYANMAR SIGN VIRAMA */ - -LIT_UNICODE_RANGE_MN (0x1058, 0x1059) /* MYANMAR VOWEL SIGN VOCALIC L - <---> - MYANMAR VOWEL SIGN VOCALIC LL */ - -LIT_UNICODE_RANGE_MN (0x17B7, 0x17BD) /* KHMER VOWEL SIGN I - <---> - KHMER VOWEL SIGN UA */ - -LIT_UNICODE_RANGE_MN (0x17C6, 0x17C6) /* KHMER SIGN NIKAHIT */ - -LIT_UNICODE_RANGE_MN (0x17C9, 0x17D3) /* KHMER SIGN MUUSIKATOAN - <---> - KHMER SIGN BATHAMASAT */ - -LIT_UNICODE_RANGE_MN (0x18A9, 0x18A9) /* MONGOLIAN LETTER ALI GALI DAGALGA */ - -LIT_UNICODE_RANGE_MN (0x20D0, 0x20DC) /* COMBINING LEFT HARPOON ABOVE - <---> - COMBINING FOUR DOTS ABOVE */ - -LIT_UNICODE_RANGE_MN (0x20E1, 0x20E1) /* COMBINING LEFT RIGHT ARROW ABOVE */ - -LIT_UNICODE_RANGE_MN (0x302A, 0x302F) /* IDEOGRAPHIC LEVEL TONE MARK - <---> - HANGUL DOUBLE DOT TONE MARK */ - -LIT_UNICODE_RANGE_MN (0x3099, 0x309A) /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - <---> - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ - -LIT_UNICODE_RANGE_MN (0xFB1E, 0xFB1E) /* HEBREW POINT JUDEO-SPANISH VARIKA */ - -LIT_UNICODE_RANGE_MN (0xFE20, 0xFE23) /* COMBINING LIGATURE LEFT HALF - <---> - COMBINING DOUBLE TILDE RIGHT HALF */ - - -/** - * "Mark, Spacing Combining" category - */ -#ifndef LIT_UNICODE_RANGE_MC -# define LIT_UNICODE_RANGE_MC(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_MC */ -LIT_UNICODE_RANGE_MC (0x0903, 0x0903) /* DEVANAGARI SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x093E, 0x0940) /* DEVANAGARI VOWEL SIGN AA - <---> - DEVANAGARI VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MC (0x0949, 0x094C) /* DEVANAGARI VOWEL SIGN CANDRA O - <---> - DEVANAGARI VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x0982, 0x0983) /* BENGALI SIGN ANUSVARA - <---> - BENGALI SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x09BE, 0x09C0) /* BENGALI VOWEL SIGN AA - <---> - BENGALI VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MC (0x09C7, 0x09C8) /* BENGALI VOWEL SIGN E - <---> - BENGALI VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MC (0x09CB, 0x09CC) /* BENGALI VOWEL SIGN O - <---> - BENGALI VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x09D7, 0x09D7) /* BENGALI AU LENGTH MARK */ - -LIT_UNICODE_RANGE_MC (0x0A3E, 0x0A40) /* GURMUKHI VOWEL SIGN AA - <---> - GURMUKHI VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MC (0x0A83, 0x0A83) /* GUJARATI SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x0ABE, 0x0AC0) /* GUJARATI VOWEL SIGN AA - <---> - GUJARATI VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MC (0x0AC9, 0x0AC9) /* GUJARATI VOWEL SIGN CANDRA O */ - -LIT_UNICODE_RANGE_MC (0x0ACB, 0x0ACC) /* GUJARATI VOWEL SIGN O - <---> - GUJARATI VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x0B02, 0x0B03) /* ORIYA SIGN ANUSVARA - <---> - ORIYA SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x0B3E, 0x0B3E) /* ORIYA VOWEL SIGN AA */ - -LIT_UNICODE_RANGE_MC (0x0B40, 0x0B40) /* ORIYA VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MC (0x0B47, 0x0B48) /* ORIYA VOWEL SIGN E - <---> - ORIYA VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MC (0x0B4B, 0x0B4C) /* ORIYA VOWEL SIGN O - <---> - ORIYA VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x0B57, 0x0B57) /* ORIYA AU LENGTH MARK */ - -LIT_UNICODE_RANGE_MC (0x0B83, 0x0B83) /* TAMIL SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x0BBE, 0x0BBF) /* TAMIL VOWEL SIGN AA - <---> - TAMIL VOWEL SIGN I */ - -LIT_UNICODE_RANGE_MC (0x0BC1, 0x0BC2) /* TAMIL VOWEL SIGN U - <---> - TAMIL VOWEL SIGN UU */ - -LIT_UNICODE_RANGE_MC (0x0BC6, 0x0BC8) /* TAMIL VOWEL SIGN E - <---> - TAMIL VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MC (0x0BCA, 0x0BCC) /* TAMIL VOWEL SIGN O - <---> - TAMIL VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x0BD7, 0x0BD7) /* TAMIL AU LENGTH MARK */ - -LIT_UNICODE_RANGE_MC (0x0C01, 0x0C03) /* TELUGU SIGN CANDRABINDU - <---> - TELUGU SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x0C41, 0x0C44) /* TELUGU VOWEL SIGN U - <---> - TELUGU VOWEL SIGN VOCALIC RR */ - -LIT_UNICODE_RANGE_MC (0x0C82, 0x0C83) /* KANNADA SIGN ANUSVARA - <---> - KANNADA SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x0CBE, 0x0CBE) /* KANNADA VOWEL SIGN AA */ - -LIT_UNICODE_RANGE_MC (0x0CC0, 0x0CC4) /* KANNADA VOWEL SIGN II - <---> - KANNADA VOWEL SIGN VOCALIC RR */ - -LIT_UNICODE_RANGE_MC (0x0CC7, 0x0CC8) /* KANNADA VOWEL SIGN EE - <---> - KANNADA VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MC (0x0CCA, 0x0CCB) /* KANNADA VOWEL SIGN O - <---> - KANNADA VOWEL SIGN OO */ - -LIT_UNICODE_RANGE_MC (0x0CD5, 0x0CD6) /* KANNADA LENGTH MARK - <---> - KANNADA AI LENGTH MARK */ - -LIT_UNICODE_RANGE_MC (0x0D02, 0x0D03) /* MALAYALAM SIGN ANUSVARA - <---> - MALAYALAM SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x0D3E, 0x0D40) /* MALAYALAM VOWEL SIGN AA - <---> - MALAYALAM VOWEL SIGN II */ - -LIT_UNICODE_RANGE_MC (0x0D46, 0x0D48) /* MALAYALAM VOWEL SIGN E - <---> - MALAYALAM VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_MC (0x0D4A, 0x0D4C) /* MALAYALAM VOWEL SIGN O - <---> - MALAYALAM VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x0D57, 0x0D57) /* MALAYALAM AU LENGTH MARK */ - -LIT_UNICODE_RANGE_MC (0x0D82, 0x0D83) /* SINHALA SIGN ANUSVARAYA - <---> - SINHALA SIGN VISARGAYA */ - -LIT_UNICODE_RANGE_MC (0x0DCF, 0x0DD1) /* SINHALA VOWEL SIGN AELA-PILLA - <---> - SINHALA VOWEL SIGN DIGA AEDA-PILLA */ - -LIT_UNICODE_RANGE_MC (0x0DD8, 0x0DDF) /* SINHALA VOWEL SIGN GAETTA-PILLA - <---> - SINHALA VOWEL SIGN GAYANUKITTA */ - -LIT_UNICODE_RANGE_MC (0x0DF2, 0x0DF3) /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - <---> - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ - -LIT_UNICODE_RANGE_MC (0x0F3E, 0x0F3F) /* TIBETAN SIGN YAR TSHES - <---> - TIBETAN SIGN MAR TSHES */ - -LIT_UNICODE_RANGE_MC (0x0F7F, 0x0F7F) /* TIBETAN SIGN RNAM BCAD */ - -LIT_UNICODE_RANGE_MC (0x102C, 0x102C) /* MYANMAR VOWEL SIGN AA */ - -LIT_UNICODE_RANGE_MC (0x1031, 0x1031) /* MYANMAR VOWEL SIGN E */ - -LIT_UNICODE_RANGE_MC (0x1038, 0x1038) /* MYANMAR SIGN VISARGA */ - -LIT_UNICODE_RANGE_MC (0x1056, 0x1057) /* MYANMAR VOWEL SIGN VOCALIC R - <---> - MYANMAR VOWEL SIGN VOCALIC RR */ - -LIT_UNICODE_RANGE_MC (0x17B4, 0x17B6) /* KHMER VOWEL INHERENT AQ - <---> - KHMER VOWEL SIGN AA */ - -LIT_UNICODE_RANGE_MC (0x17BE, 0x17C5) /* KHMER VOWEL SIGN OE - <---> - KHMER VOWEL SIGN AU */ - -LIT_UNICODE_RANGE_MC (0x17C7, 0x17C8) /* KHMER SIGN REAHMUK - <---> - KHMER SIGN YUUKALEAPINTU */ - - -/** - * "Mark, Enclosing" category - */ -#ifndef LIT_UNICODE_RANGE_ME -# define LIT_UNICODE_RANGE_ME(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_ME */ -LIT_UNICODE_RANGE_ME (0x0488, 0x0489) /* COMBINING CYRILLIC HUNDRED THOUSANDS SIGN - <---> - COMBINING CYRILLIC MILLIONS SIGN */ - -LIT_UNICODE_RANGE_ME (0x06DD, 0x06DE) /* ARABIC END OF AYAH - <---> - ARABIC START OF RUB EL HIZB */ - -LIT_UNICODE_RANGE_ME (0x20DD, 0x20E0) /* COMBINING ENCLOSING CIRCLE - <---> - COMBINING ENCLOSING CIRCLE BACKSLASH */ - -LIT_UNICODE_RANGE_ME (0x20E2, 0x20E3) /* COMBINING ENCLOSING SCREEN - <---> - COMBINING ENCLOSING KEYCAP */ - - -/** - * "Number, Decimal Digit" category - */ -#ifndef LIT_UNICODE_RANGE_ND -# define LIT_UNICODE_RANGE_ND(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_ND */ -LIT_UNICODE_RANGE_ND (0x0030, 0x0039) /* DIGIT ZERO - <---> - DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0660, 0x0669) /* ARABIC-INDIC DIGIT ZERO - <---> - ARABIC-INDIC DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x06F0, 0x06F9) /* EXTENDED ARABIC-INDIC DIGIT ZERO - <---> - EXTENDED ARABIC-INDIC DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0966, 0x096F) /* DEVANAGARI DIGIT ZERO - <---> - DEVANAGARI DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x09E6, 0x09EF) /* BENGALI DIGIT ZERO - <---> - BENGALI DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0A66, 0x0A6F) /* GURMUKHI DIGIT ZERO - <---> - GURMUKHI DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0AE6, 0x0AEF) /* GUJARATI DIGIT ZERO - <---> - GUJARATI DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0B66, 0x0B6F) /* ORIYA DIGIT ZERO - <---> - ORIYA DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0BE7, 0x0BEF) /* TAMIL DIGIT ONE - <---> - TAMIL DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0C66, 0x0C6F) /* TELUGU DIGIT ZERO - <---> - TELUGU DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0CE6, 0x0CEF) /* KANNADA DIGIT ZERO - <---> - KANNADA DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0D66, 0x0D6F) /* MALAYALAM DIGIT ZERO - <---> - MALAYALAM DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0E50, 0x0E59) /* THAI DIGIT ZERO - <---> - THAI DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0ED0, 0x0ED9) /* LAO DIGIT ZERO - <---> - LAO DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x0F20, 0x0F29) /* TIBETAN DIGIT ZERO - <---> - TIBETAN DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x1040, 0x1049) /* MYANMAR DIGIT ZERO - <---> - MYANMAR DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x1369, 0x1371) /* ETHIOPIC DIGIT ONE - <---> - ETHIOPIC DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x17E0, 0x17E9) /* KHMER DIGIT ZERO - <---> - KHMER DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0x1810, 0x1819) /* MONGOLIAN DIGIT ZERO - <---> - MONGOLIAN DIGIT NINE */ - -LIT_UNICODE_RANGE_ND (0xFF10, 0xFF19) /* FULLWIDTH DIGIT ZERO - <---> - FULLWIDTH DIGIT NINE */ - - -/** - * "Number, Letter" category - */ -#ifndef LIT_UNICODE_RANGE_NL -# define LIT_UNICODE_RANGE_NL(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_NL */ -LIT_UNICODE_RANGE_NL (0x2160, 0x2183) /* ROMAN NUMERAL ONE - <---> - ROMAN NUMERAL REVERSED ONE HUNDRED */ - -LIT_UNICODE_RANGE_NL (0x3007, 0x3007) /* IDEOGRAPHIC NUMBER ZERO */ - -LIT_UNICODE_RANGE_NL (0x3021, 0x3029) /* HANGZHOU NUMERAL ONE - <---> - HANGZHOU NUMERAL NINE */ - -LIT_UNICODE_RANGE_NL (0x3038, 0x303A) /* HANGZHOU NUMERAL TEN - <---> - HANGZHOU NUMERAL THIRTY */ - - -/** - * "Number, Other" category - */ -#ifndef LIT_UNICODE_RANGE_NO -# define LIT_UNICODE_RANGE_NO(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_NO */ -LIT_UNICODE_RANGE_NO (0x00B2, 0x00B3) /* SUPERSCRIPT TWO - <---> - SUPERSCRIPT THREE */ - -LIT_UNICODE_RANGE_NO (0x00B9, 0x00B9) /* SUPERSCRIPT ONE */ - -LIT_UNICODE_RANGE_NO (0x00BC, 0x00BE) /* VULGAR FRACTION ONE QUARTER - <---> - VULGAR FRACTION THREE QUARTERS */ - -LIT_UNICODE_RANGE_NO (0x09F4, 0x09F9) /* BENGALI CURRENCY NUMERATOR ONE - <---> - BENGALI CURRENCY DENOMINATOR SIXTEEN */ - -LIT_UNICODE_RANGE_NO (0x0BF0, 0x0BF2) /* TAMIL NUMBER TEN - <---> - TAMIL NUMBER ONE THOUSAND */ - -LIT_UNICODE_RANGE_NO (0x0F2A, 0x0F33) /* TIBETAN DIGIT HALF ONE - <---> - TIBETAN DIGIT HALF ZERO */ - -LIT_UNICODE_RANGE_NO (0x1372, 0x137C) /* ETHIOPIC NUMBER TEN - <---> - ETHIOPIC NUMBER TEN THOUSAND */ - -LIT_UNICODE_RANGE_NO (0x16EE, 0x16F0) /* RUNIC ARLAUG SYMBOL - <---> - RUNIC BELGTHOR SYMBOL */ - -LIT_UNICODE_RANGE_NO (0x2070, 0x2070) /* SUPERSCRIPT ZERO */ - -LIT_UNICODE_RANGE_NO (0x2074, 0x2079) /* SUPERSCRIPT FOUR - <---> - SUPERSCRIPT NINE */ - -LIT_UNICODE_RANGE_NO (0x2080, 0x2089) /* SUBSCRIPT ZERO - <---> - SUBSCRIPT NINE */ - -LIT_UNICODE_RANGE_NO (0x2153, 0x215F) /* VULGAR FRACTION ONE THIRD - <---> - FRACTION NUMERATOR ONE */ - -LIT_UNICODE_RANGE_NO (0x2460, 0x249B) /* CIRCLED DIGIT ONE - <---> - NUMBER TWENTY FULL STOP */ - -LIT_UNICODE_RANGE_NO (0x24EA, 0x24EA) /* CIRCLED DIGIT ZERO */ - -LIT_UNICODE_RANGE_NO (0x2776, 0x2793) /* DINGBAT NEGATIVE CIRCLED DIGIT ONE - <---> - DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN */ - -LIT_UNICODE_RANGE_NO (0x3192, 0x3195) /* IDEOGRAPHIC ANNOTATION ONE MARK - <---> - IDEOGRAPHIC ANNOTATION FOUR MARK */ - -LIT_UNICODE_RANGE_NO (0x3220, 0x3229) /* PARENTHESIZED IDEOGRAPH ONE - <---> - PARENTHESIZED IDEOGRAPH TEN */ - -LIT_UNICODE_RANGE_NO (0x3280, 0x3289) /* CIRCLED IDEOGRAPH ONE - <---> - CIRCLED IDEOGRAPH TEN */ - - -/** - * "Separator, Space" category - */ -#ifndef LIT_UNICODE_RANGE_ZS -# define LIT_UNICODE_RANGE_ZS(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_ZS */ - -LIT_UNICODE_RANGE_ZS (0x180E, 0x180E) /* MONGOLIAN VOWEL SEPARATOR (manually added) - * This character doesn't belong to Zs category according - * UnicodeData-3.0.0.txt, but it should be supported according to - * ch09/9.3/9.3.1/S9.3.1_A2.js form test262 suite. */ - -LIT_UNICODE_RANGE_ZS (0x205F, 0x205F) /* MEDIUM MATHEMATICAL SPACE (manually added) - * This character doesn't belong to Zs category according - * UnicodeData-3.0.0.txt, but it should be supported according to - * ch09/9.3/9.3.1/S9.3.1_A2.js form test262 suite. */ - -LIT_UNICODE_RANGE_ZS (0x0020, 0x0020) /* SPACE */ - -LIT_UNICODE_RANGE_ZS (0x00A0, 0x00A0) /* NO-BREAK SPACE */ - -LIT_UNICODE_RANGE_ZS (0x1680, 0x1680) /* OGHAM SPACE MARK */ - -LIT_UNICODE_RANGE_ZS (0x2000, 0x200B) /* EN QUAD - <---> - ZERO WIDTH SPACE */ - -LIT_UNICODE_RANGE_ZS (0x202F, 0x202F) /* NARROW NO-BREAK SPACE */ - -LIT_UNICODE_RANGE_ZS (0x3000, 0x3000) /* IDEOGRAPHIC SPACE */ - - -/** - * "Separator, Line" category - */ -#ifndef LIT_UNICODE_RANGE_ZL -# define LIT_UNICODE_RANGE_ZL(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_ZL */ -LIT_UNICODE_RANGE_ZL (0x2028, 0x2028) /* LINE SEPARATOR */ - - -/** - * "Separator, Paragraph" category - */ -#ifndef LIT_UNICODE_RANGE_ZP -# define LIT_UNICODE_RANGE_ZP(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_ZP */ -LIT_UNICODE_RANGE_ZP (0x2029, 0x2029) /* PARAGRAPH SEPARATOR */ - - -/** - * "Other, Control" category - */ -#ifndef LIT_UNICODE_RANGE_CC -# define LIT_UNICODE_RANGE_CC(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_CC */ -LIT_UNICODE_RANGE_CC (0x0000, 0x001F) /* - <---> - */ - -LIT_UNICODE_RANGE_CC (0x007F, 0x009F) /* - <---> - */ - - -/** - * "Other, Format" category - */ -#ifndef LIT_UNICODE_RANGE_CF -# define LIT_UNICODE_RANGE_CF(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_CF */ -LIT_UNICODE_RANGE_CF (0x070F, 0x070F) /* SYRIAC ABBREVIATION MARK */ - -LIT_UNICODE_RANGE_CF (0x180B, 0x180E) /* MONGOLIAN FREE VARIATION SELECTOR ONE - <---> - MONGOLIAN VOWEL SEPARATOR */ - -LIT_UNICODE_RANGE_CF (0x200C, 0x200F) /* ZERO WIDTH NON-JOINER - <---> - RIGHT-TO-LEFT MARK */ - -LIT_UNICODE_RANGE_CF (0x202A, 0x202E) /* LEFT-TO-RIGHT EMBEDDING - <---> - RIGHT-TO-LEFT OVERRIDE */ - -LIT_UNICODE_RANGE_CF (0x206A, 0x206F) /* INHIBIT SYMMETRIC SWAPPING - <---> - NOMINAL DIGIT SHAPES */ - -LIT_UNICODE_RANGE_CF (0xFEFF, 0xFEFF) /* ZERO WIDTH NO-BREAK SPACE */ - -LIT_UNICODE_RANGE_CF (0xFFF9, 0xFFFB) /* INTERLINEAR ANNOTATION ANCHOR - <---> - INTERLINEAR ANNOTATION TERMINATOR */ - - -/** - * "Other, Surrogate" category - */ -#ifndef LIT_UNICODE_RANGE_CS -# define LIT_UNICODE_RANGE_CS(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_CS */ -LIT_UNICODE_RANGE_CS (0xD800, 0xD800) /* */ - -LIT_UNICODE_RANGE_CS (0xDB7F, 0xDB80) /* - <---> - */ - -LIT_UNICODE_RANGE_CS (0xDBFF, 0xDC00) /* - <---> - */ - -LIT_UNICODE_RANGE_CS (0xDFFF, 0xDFFF) /* */ - - -/** - * "Other, Private Use" category - */ -#ifndef LIT_UNICODE_RANGE_CO -# define LIT_UNICODE_RANGE_CO(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_CO */ -LIT_UNICODE_RANGE_CO (0xE000, 0xE000) /* */ - -LIT_UNICODE_RANGE_CO (0xF8FF, 0xF8FF) /* */ - - -/** - * "Letter, Modifier" category - */ -#ifndef LIT_UNICODE_RANGE_LM -# define LIT_UNICODE_RANGE_LM(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_LM */ -LIT_UNICODE_RANGE_LM (0x02B0, 0x02B8) /* MODIFIER LETTER SMALL H - <---> - MODIFIER LETTER SMALL Y */ - -LIT_UNICODE_RANGE_LM (0x02BB, 0x02C1) /* MODIFIER LETTER TURNED COMMA - <---> - MODIFIER LETTER REVERSED GLOTTAL STOP */ - -LIT_UNICODE_RANGE_LM (0x02D0, 0x02D1) /* MODIFIER LETTER TRIANGULAR COLON - <---> - MODIFIER LETTER HALF TRIANGULAR COLON */ - -LIT_UNICODE_RANGE_LM (0x02E0, 0x02E4) /* MODIFIER LETTER SMALL GAMMA - <---> - MODIFIER LETTER SMALL REVERSED GLOTTAL STOP */ - -LIT_UNICODE_RANGE_LM (0x02EE, 0x02EE) /* MODIFIER LETTER DOUBLE APOSTROPHE */ - -LIT_UNICODE_RANGE_LM (0x037A, 0x037A) /* GREEK YPOGEGRAMMENI */ - -LIT_UNICODE_RANGE_LM (0x0559, 0x0559) /* ARMENIAN MODIFIER LETTER LEFT HALF RING */ - -LIT_UNICODE_RANGE_LM (0x0640, 0x0640) /* ARABIC TATWEEL */ - -LIT_UNICODE_RANGE_LM (0x06E5, 0x06E6) /* ARABIC SMALL WAW - <---> - ARABIC SMALL YEH */ - -LIT_UNICODE_RANGE_LM (0x0E46, 0x0E46) /* THAI CHARACTER MAIYAMOK */ - -LIT_UNICODE_RANGE_LM (0x0EC6, 0x0EC6) /* LAO KO LA */ - -LIT_UNICODE_RANGE_LM (0x1843, 0x1843) /* MONGOLIAN LETTER TODO LONG VOWEL SIGN */ - -LIT_UNICODE_RANGE_LM (0x3005, 0x3005) /* IDEOGRAPHIC ITERATION MARK */ - -LIT_UNICODE_RANGE_LM (0x3031, 0x3035) /* VERTICAL KANA REPEAT MARK - <---> - VERTICAL KANA REPEAT MARK LOWER HALF */ - -LIT_UNICODE_RANGE_LM (0x309D, 0x309E) /* HIRAGANA ITERATION MARK - <---> - HIRAGANA VOICED ITERATION MARK */ - -LIT_UNICODE_RANGE_LM (0x30FC, 0x30FE) /* KATAKANA-HIRAGANA PROLONGED SOUND MARK - <---> - KATAKANA VOICED ITERATION MARK */ - -LIT_UNICODE_RANGE_LM (0xFF70, 0xFF70) /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ - -LIT_UNICODE_RANGE_LM (0xFF9E, 0xFF9F) /* HALFWIDTH KATAKANA VOICED SOUND MARK - <---> - HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ - - -/** - * "Letter, Other" category - */ -#ifndef LIT_UNICODE_RANGE_LO -# define LIT_UNICODE_RANGE_LO(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_LO */ -LIT_UNICODE_RANGE_LO (0x01BB, 0x01BB) /* LATIN LETTER TWO WITH STROKE */ - -LIT_UNICODE_RANGE_LO (0x01C0, 0x01C3) /* LATIN LETTER DENTAL CLICK - <---> - LATIN LETTER RETROFLEX CLICK */ - -LIT_UNICODE_RANGE_LO (0x05D0, 0x05EA) /* HEBREW LETTER ALEF - <---> - HEBREW LETTER TAV */ - -LIT_UNICODE_RANGE_LO (0x05F0, 0x05F2) /* HEBREW LIGATURE YIDDISH DOUBLE VAV - <---> - HEBREW LIGATURE YIDDISH DOUBLE YOD */ - -LIT_UNICODE_RANGE_LO (0x0621, 0x063A) /* ARABIC LETTER HAMZA - <---> - ARABIC LETTER GHAIN */ - -LIT_UNICODE_RANGE_LO (0x0641, 0x064A) /* ARABIC LETTER FEH - <---> - ARABIC LETTER YEH */ - -LIT_UNICODE_RANGE_LO (0x0671, 0x06D3) /* ARABIC LETTER ALEF WASLA - <---> - ARABIC LETTER YEH BARREE WITH HAMZA ABOVE */ - -LIT_UNICODE_RANGE_LO (0x06D5, 0x06D5) /* ARABIC LETTER AE */ - -LIT_UNICODE_RANGE_LO (0x06FA, 0x06FC) /* ARABIC LETTER SHEEN WITH DOT BELOW - <---> - ARABIC LETTER GHAIN WITH DOT BELOW */ - -LIT_UNICODE_RANGE_LO (0x0710, 0x0710) /* SYRIAC LETTER ALAPH */ - -LIT_UNICODE_RANGE_LO (0x0712, 0x072C) /* SYRIAC LETTER BETH - <---> - SYRIAC LETTER TAW */ - -LIT_UNICODE_RANGE_LO (0x0780, 0x07A5) /* THAANA LETTER HAA - <---> - THAANA LETTER WAAVU */ - -LIT_UNICODE_RANGE_LO (0x0905, 0x0939) /* DEVANAGARI LETTER A - <---> - DEVANAGARI LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x093D, 0x093D) /* DEVANAGARI SIGN AVAGRAHA */ - -LIT_UNICODE_RANGE_LO (0x0950, 0x0950) /* DEVANAGARI OM */ - -LIT_UNICODE_RANGE_LO (0x0958, 0x0961) /* DEVANAGARI LETTER QA - <---> - DEVANAGARI LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x0985, 0x098C) /* BENGALI LETTER A - <---> - BENGALI LETTER VOCALIC L */ - -LIT_UNICODE_RANGE_LO (0x098F, 0x0990) /* BENGALI LETTER E - <---> - BENGALI LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0993, 0x09A8) /* BENGALI LETTER O - <---> - BENGALI LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x09AA, 0x09B0) /* BENGALI LETTER PA - <---> - BENGALI LETTER RA */ - -LIT_UNICODE_RANGE_LO (0x09B2, 0x09B2) /* BENGALI LETTER LA */ - -LIT_UNICODE_RANGE_LO (0x09B6, 0x09B9) /* BENGALI LETTER SHA - <---> - BENGALI LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x09DC, 0x09DD) /* BENGALI LETTER RRA - <---> - BENGALI LETTER RHA */ - -LIT_UNICODE_RANGE_LO (0x09DF, 0x09E1) /* BENGALI LETTER YYA - <---> - BENGALI LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x09F0, 0x09F1) /* BENGALI LETTER RA WITH MIDDLE DIAGONAL - <---> - BENGALI LETTER RA WITH LOWER DIAGONAL */ - -LIT_UNICODE_RANGE_LO (0x0A05, 0x0A0A) /* GURMUKHI LETTER A - <---> - GURMUKHI LETTER UU */ - -LIT_UNICODE_RANGE_LO (0x0A0F, 0x0A10) /* GURMUKHI LETTER EE - <---> - GURMUKHI LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0A13, 0x0A28) /* GURMUKHI LETTER OO - <---> - GURMUKHI LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x0A2A, 0x0A30) /* GURMUKHI LETTER PA - <---> - GURMUKHI LETTER RA */ - -LIT_UNICODE_RANGE_LO (0x0A32, 0x0A33) /* GURMUKHI LETTER LA - <---> - GURMUKHI LETTER LLA */ - -LIT_UNICODE_RANGE_LO (0x0A35, 0x0A36) /* GURMUKHI LETTER VA - <---> - GURMUKHI LETTER SHA */ - -LIT_UNICODE_RANGE_LO (0x0A38, 0x0A39) /* GURMUKHI LETTER SA - <---> - GURMUKHI LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0A59, 0x0A5C) /* GURMUKHI LETTER KHHA - <---> - GURMUKHI LETTER RRA */ - -LIT_UNICODE_RANGE_LO (0x0A5E, 0x0A5E) /* GURMUKHI LETTER FA */ - -LIT_UNICODE_RANGE_LO (0x0A72, 0x0A74) /* GURMUKHI IRI - <---> - GURMUKHI EK ONKAR */ - -LIT_UNICODE_RANGE_LO (0x0A85, 0x0A8B) /* GUJARATI LETTER A - <---> - GUJARATI LETTER VOCALIC R */ - -LIT_UNICODE_RANGE_LO (0x0A8D, 0x0A8D) /* GUJARATI VOWEL CANDRA E */ - -LIT_UNICODE_RANGE_LO (0x0A8F, 0x0A91) /* GUJARATI LETTER E - <---> - GUJARATI VOWEL CANDRA O */ - -LIT_UNICODE_RANGE_LO (0x0A93, 0x0AA8) /* GUJARATI LETTER O - <---> - GUJARATI LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x0AAA, 0x0AB0) /* GUJARATI LETTER PA - <---> - GUJARATI LETTER RA */ - -LIT_UNICODE_RANGE_LO (0x0AB2, 0x0AB3) /* GUJARATI LETTER LA - <---> - GUJARATI LETTER LLA */ - -LIT_UNICODE_RANGE_LO (0x0AB5, 0x0AB9) /* GUJARATI LETTER VA - <---> - GUJARATI LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0ABD, 0x0ABD) /* GUJARATI SIGN AVAGRAHA */ - -LIT_UNICODE_RANGE_LO (0x0AD0, 0x0AD0) /* GUJARATI OM */ - -LIT_UNICODE_RANGE_LO (0x0AE0, 0x0AE0) /* GUJARATI LETTER VOCALIC RR */ - -LIT_UNICODE_RANGE_LO (0x0B05, 0x0B0C) /* ORIYA LETTER A - <---> - ORIYA LETTER VOCALIC L */ - -LIT_UNICODE_RANGE_LO (0x0B0F, 0x0B10) /* ORIYA LETTER E - <---> - ORIYA LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0B13, 0x0B28) /* ORIYA LETTER O - <---> - ORIYA LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x0B2A, 0x0B30) /* ORIYA LETTER PA - <---> - ORIYA LETTER RA */ - -LIT_UNICODE_RANGE_LO (0x0B32, 0x0B33) /* ORIYA LETTER LA - <---> - ORIYA LETTER LLA */ - -LIT_UNICODE_RANGE_LO (0x0B36, 0x0B39) /* ORIYA LETTER SHA - <---> - ORIYA LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0B3D, 0x0B3D) /* ORIYA SIGN AVAGRAHA */ - -LIT_UNICODE_RANGE_LO (0x0B5C, 0x0B5D) /* ORIYA LETTER RRA - <---> - ORIYA LETTER RHA */ - -LIT_UNICODE_RANGE_LO (0x0B5F, 0x0B61) /* ORIYA LETTER YYA - <---> - ORIYA LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x0B85, 0x0B8A) /* TAMIL LETTER A - <---> - TAMIL LETTER UU */ - -LIT_UNICODE_RANGE_LO (0x0B8E, 0x0B90) /* TAMIL LETTER E - <---> - TAMIL LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0B92, 0x0B95) /* TAMIL LETTER O - <---> - TAMIL LETTER KA */ - -LIT_UNICODE_RANGE_LO (0x0B99, 0x0B9A) /* TAMIL LETTER NGA - <---> - TAMIL LETTER CA */ - -LIT_UNICODE_RANGE_LO (0x0B9C, 0x0B9C) /* TAMIL LETTER JA */ - -LIT_UNICODE_RANGE_LO (0x0B9E, 0x0B9F) /* TAMIL LETTER NYA - <---> - TAMIL LETTER TTA */ - -LIT_UNICODE_RANGE_LO (0x0BA3, 0x0BA4) /* TAMIL LETTER NNA - <---> - TAMIL LETTER TA */ - -LIT_UNICODE_RANGE_LO (0x0BA8, 0x0BAA) /* TAMIL LETTER NA - <---> - TAMIL LETTER PA */ - -LIT_UNICODE_RANGE_LO (0x0BAE, 0x0BB5) /* TAMIL LETTER MA - <---> - TAMIL LETTER VA */ - -LIT_UNICODE_RANGE_LO (0x0BB7, 0x0BB9) /* TAMIL LETTER SSA - <---> - TAMIL LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0C05, 0x0C0C) /* TELUGU LETTER A - <---> - TELUGU LETTER VOCALIC L */ - -LIT_UNICODE_RANGE_LO (0x0C0E, 0x0C10) /* TELUGU LETTER E - <---> - TELUGU LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0C12, 0x0C28) /* TELUGU LETTER O - <---> - TELUGU LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x0C2A, 0x0C33) /* TELUGU LETTER PA - <---> - TELUGU LETTER LLA */ - -LIT_UNICODE_RANGE_LO (0x0C35, 0x0C39) /* TELUGU LETTER VA - <---> - TELUGU LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0C60, 0x0C61) /* TELUGU LETTER VOCALIC RR - <---> - TELUGU LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x0C85, 0x0C8C) /* KANNADA LETTER A - <---> - KANNADA LETTER VOCALIC L */ - -LIT_UNICODE_RANGE_LO (0x0C8E, 0x0C90) /* KANNADA LETTER E - <---> - KANNADA LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0C92, 0x0CA8) /* KANNADA LETTER O - <---> - KANNADA LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x0CAA, 0x0CB3) /* KANNADA LETTER PA - <---> - KANNADA LETTER LLA */ - -LIT_UNICODE_RANGE_LO (0x0CB5, 0x0CB9) /* KANNADA LETTER VA - <---> - KANNADA LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0CDE, 0x0CDE) /* KANNADA LETTER FA */ - -LIT_UNICODE_RANGE_LO (0x0CE0, 0x0CE1) /* KANNADA LETTER VOCALIC RR - <---> - KANNADA LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x0D05, 0x0D0C) /* MALAYALAM LETTER A - <---> - MALAYALAM LETTER VOCALIC L */ - -LIT_UNICODE_RANGE_LO (0x0D0E, 0x0D10) /* MALAYALAM LETTER E - <---> - MALAYALAM LETTER AI */ - -LIT_UNICODE_RANGE_LO (0x0D12, 0x0D28) /* MALAYALAM LETTER O - <---> - MALAYALAM LETTER NA */ - -LIT_UNICODE_RANGE_LO (0x0D2A, 0x0D39) /* MALAYALAM LETTER PA - <---> - MALAYALAM LETTER HA */ - -LIT_UNICODE_RANGE_LO (0x0D60, 0x0D61) /* MALAYALAM LETTER VOCALIC RR - <---> - MALAYALAM LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x0D85, 0x0D96) /* SINHALA LETTER AYANNA - <---> - SINHALA LETTER AUYANNA */ - -LIT_UNICODE_RANGE_LO (0x0D9A, 0x0DB1) /* SINHALA LETTER ALPAPRAANA KAYANNA - <---> - SINHALA LETTER DANTAJA NAYANNA */ - -LIT_UNICODE_RANGE_LO (0x0DB3, 0x0DBB) /* SINHALA LETTER SANYAKA DAYANNA - <---> - SINHALA LETTER RAYANNA */ - -LIT_UNICODE_RANGE_LO (0x0DBD, 0x0DBD) /* SINHALA LETTER DANTAJA LAYANNA */ - -LIT_UNICODE_RANGE_LO (0x0DC0, 0x0DC6) /* SINHALA LETTER VAYANNA - <---> - SINHALA LETTER FAYANNA */ - -LIT_UNICODE_RANGE_LO (0x0E01, 0x0E30) /* THAI CHARACTER KO KAI - <---> - THAI CHARACTER SARA A */ - -LIT_UNICODE_RANGE_LO (0x0E32, 0x0E33) /* THAI CHARACTER SARA AA - <---> - THAI CHARACTER SARA AM */ - -LIT_UNICODE_RANGE_LO (0x0E40, 0x0E45) /* THAI CHARACTER SARA E - <---> - THAI CHARACTER LAKKHANGYAO */ - -LIT_UNICODE_RANGE_LO (0x0E81, 0x0E82) /* LAO LETTER KO - <---> - LAO LETTER KHO SUNG */ - -LIT_UNICODE_RANGE_LO (0x0E84, 0x0E84) /* LAO LETTER KHO TAM */ - -LIT_UNICODE_RANGE_LO (0x0E87, 0x0E88) /* LAO LETTER NGO - <---> - LAO LETTER CO */ - -LIT_UNICODE_RANGE_LO (0x0E8A, 0x0E8A) /* LAO LETTER SO TAM */ - -LIT_UNICODE_RANGE_LO (0x0E8D, 0x0E8D) /* LAO LETTER NYO */ - -LIT_UNICODE_RANGE_LO (0x0E94, 0x0E97) /* LAO LETTER DO - <---> - LAO LETTER THO TAM */ - -LIT_UNICODE_RANGE_LO (0x0E99, 0x0E9F) /* LAO LETTER NO - <---> - LAO LETTER FO SUNG */ - -LIT_UNICODE_RANGE_LO (0x0EA1, 0x0EA3) /* LAO LETTER MO - <---> - LAO LETTER LO LING */ - -LIT_UNICODE_RANGE_LO (0x0EA5, 0x0EA5) /* LAO LETTER LO LOOT */ - -LIT_UNICODE_RANGE_LO (0x0EA7, 0x0EA7) /* LAO LETTER WO */ - -LIT_UNICODE_RANGE_LO (0x0EAA, 0x0EAB) /* LAO LETTER SO SUNG - <---> - LAO LETTER HO SUNG */ - -LIT_UNICODE_RANGE_LO (0x0EAD, 0x0EB0) /* LAO LETTER O - <---> - LAO VOWEL SIGN A */ - -LIT_UNICODE_RANGE_LO (0x0EB2, 0x0EB3) /* LAO VOWEL SIGN AA - <---> - LAO VOWEL SIGN AM */ - -LIT_UNICODE_RANGE_LO (0x0EBD, 0x0EBD) /* LAO SEMIVOWEL SIGN NYO */ - -LIT_UNICODE_RANGE_LO (0x0EC0, 0x0EC4) /* LAO VOWEL SIGN E - <---> - LAO VOWEL SIGN AI */ - -LIT_UNICODE_RANGE_LO (0x0EDC, 0x0EDD) /* LAO HO NO - <---> - LAO HO MO */ - -LIT_UNICODE_RANGE_LO (0x0F00, 0x0F00) /* TIBETAN SYLLABLE OM */ - -LIT_UNICODE_RANGE_LO (0x0F40, 0x0F47) /* TIBETAN LETTER KA - <---> - TIBETAN LETTER JA */ - -LIT_UNICODE_RANGE_LO (0x0F49, 0x0F6A) /* TIBETAN LETTER NYA - <---> - TIBETAN LETTER FIXED-FORM RA */ - -LIT_UNICODE_RANGE_LO (0x0F88, 0x0F8B) /* TIBETAN SIGN LCE TSA CAN - <---> - TIBETAN SIGN GRU MED RGYINGS */ - -LIT_UNICODE_RANGE_LO (0x1000, 0x1021) /* MYANMAR LETTER KA - <---> - MYANMAR LETTER A */ - -LIT_UNICODE_RANGE_LO (0x1023, 0x1027) /* MYANMAR LETTER I - <---> - MYANMAR LETTER E */ - -LIT_UNICODE_RANGE_LO (0x1029, 0x102A) /* MYANMAR LETTER O - <---> - MYANMAR LETTER AU */ - -LIT_UNICODE_RANGE_LO (0x1050, 0x1055) /* MYANMAR LETTER SHA - <---> - MYANMAR LETTER VOCALIC LL */ - -LIT_UNICODE_RANGE_LO (0x10D0, 0x10F6) /* GEORGIAN LETTER AN - <---> - GEORGIAN LETTER FI */ - -LIT_UNICODE_RANGE_LO (0x1100, 0x1159) /* HANGUL CHOSEONG KIYEOK - <---> - HANGUL CHOSEONG YEORINHIEUH */ - -LIT_UNICODE_RANGE_LO (0x115F, 0x11A2) /* HANGUL CHOSEONG FILLER - <---> - HANGUL JUNGSEONG SSANGARAEA */ - -LIT_UNICODE_RANGE_LO (0x11A8, 0x11F9) /* HANGUL JONGSEONG KIYEOK - <---> - HANGUL JONGSEONG YEORINHIEUH */ - -LIT_UNICODE_RANGE_LO (0x1200, 0x1206) /* ETHIOPIC SYLLABLE HA - <---> - ETHIOPIC SYLLABLE HO */ - -LIT_UNICODE_RANGE_LO (0x1208, 0x1246) /* ETHIOPIC SYLLABLE LA - <---> - ETHIOPIC SYLLABLE QO */ - -LIT_UNICODE_RANGE_LO (0x1248, 0x1248) /* ETHIOPIC SYLLABLE QWA */ - -LIT_UNICODE_RANGE_LO (0x124A, 0x124D) /* ETHIOPIC SYLLABLE QWI - <---> - ETHIOPIC SYLLABLE QWE */ - -LIT_UNICODE_RANGE_LO (0x1250, 0x1256) /* ETHIOPIC SYLLABLE QHA - <---> - ETHIOPIC SYLLABLE QHO */ - -LIT_UNICODE_RANGE_LO (0x1258, 0x1258) /* ETHIOPIC SYLLABLE QHWA */ - -LIT_UNICODE_RANGE_LO (0x125A, 0x125D) /* ETHIOPIC SYLLABLE QHWI - <---> - ETHIOPIC SYLLABLE QHWE */ - -LIT_UNICODE_RANGE_LO (0x1260, 0x1286) /* ETHIOPIC SYLLABLE BA - <---> - ETHIOPIC SYLLABLE XO */ - -LIT_UNICODE_RANGE_LO (0x1288, 0x1288) /* ETHIOPIC SYLLABLE XWA */ - -LIT_UNICODE_RANGE_LO (0x128A, 0x128D) /* ETHIOPIC SYLLABLE XWI - <---> - ETHIOPIC SYLLABLE XWE */ - -LIT_UNICODE_RANGE_LO (0x1290, 0x12AE) /* ETHIOPIC SYLLABLE NA - <---> - ETHIOPIC SYLLABLE KO */ - -LIT_UNICODE_RANGE_LO (0x12B0, 0x12B0) /* ETHIOPIC SYLLABLE KWA */ - -LIT_UNICODE_RANGE_LO (0x12B2, 0x12B5) /* ETHIOPIC SYLLABLE KWI - <---> - ETHIOPIC SYLLABLE KWE */ - -LIT_UNICODE_RANGE_LO (0x12B8, 0x12BE) /* ETHIOPIC SYLLABLE KXA - <---> - ETHIOPIC SYLLABLE KXO */ - -LIT_UNICODE_RANGE_LO (0x12C0, 0x12C0) /* ETHIOPIC SYLLABLE KXWA */ - -LIT_UNICODE_RANGE_LO (0x12C2, 0x12C5) /* ETHIOPIC SYLLABLE KXWI - <---> - ETHIOPIC SYLLABLE KXWE */ - -LIT_UNICODE_RANGE_LO (0x12C8, 0x12CE) /* ETHIOPIC SYLLABLE WA - <---> - ETHIOPIC SYLLABLE WO */ - -LIT_UNICODE_RANGE_LO (0x12D0, 0x12D6) /* ETHIOPIC SYLLABLE PHARYNGEAL A - <---> - ETHIOPIC SYLLABLE PHARYNGEAL O */ - -LIT_UNICODE_RANGE_LO (0x12D8, 0x12EE) /* ETHIOPIC SYLLABLE ZA - <---> - ETHIOPIC SYLLABLE YO */ - -LIT_UNICODE_RANGE_LO (0x12F0, 0x130E) /* ETHIOPIC SYLLABLE DA - <---> - ETHIOPIC SYLLABLE GO */ - -LIT_UNICODE_RANGE_LO (0x1310, 0x1310) /* ETHIOPIC SYLLABLE GWA */ - -LIT_UNICODE_RANGE_LO (0x1312, 0x1315) /* ETHIOPIC SYLLABLE GWI - <---> - ETHIOPIC SYLLABLE GWE */ - -LIT_UNICODE_RANGE_LO (0x1318, 0x131E) /* ETHIOPIC SYLLABLE GGA - <---> - ETHIOPIC SYLLABLE GGO */ - -LIT_UNICODE_RANGE_LO (0x1320, 0x1346) /* ETHIOPIC SYLLABLE THA - <---> - ETHIOPIC SYLLABLE TZO */ - -LIT_UNICODE_RANGE_LO (0x1348, 0x135A) /* ETHIOPIC SYLLABLE FA - <---> - ETHIOPIC SYLLABLE FYA */ - -LIT_UNICODE_RANGE_LO (0x13A0, 0x13F4) /* CHEROKEE LETTER A - <---> - CHEROKEE LETTER YV */ - -LIT_UNICODE_RANGE_LO (0x1401, 0x166C) /* CANADIAN SYLLABICS E - <---> - CANADIAN SYLLABICS CARRIER TTSA */ - -LIT_UNICODE_RANGE_LO (0x166F, 0x1676) /* CANADIAN SYLLABICS QAI - <---> - CANADIAN SYLLABICS NNGAA */ - -LIT_UNICODE_RANGE_LO (0x1681, 0x169A) /* OGHAM LETTER BEITH - <---> - OGHAM LETTER PEITH */ - -LIT_UNICODE_RANGE_LO (0x16A0, 0x16EA) /* RUNIC LETTER FEHU FEOH FE F - <---> - RUNIC LETTER X */ - -LIT_UNICODE_RANGE_LO (0x1780, 0x17B3) /* KHMER LETTER KA - <---> - KHMER INDEPENDENT VOWEL QAU */ - -LIT_UNICODE_RANGE_LO (0x1820, 0x1842) /* MONGOLIAN LETTER A - <---> - MONGOLIAN LETTER CHI */ - -LIT_UNICODE_RANGE_LO (0x1844, 0x1877) /* MONGOLIAN LETTER TODO E - <---> - MONGOLIAN LETTER MANCHU ZHA */ - -LIT_UNICODE_RANGE_LO (0x1880, 0x18A8) /* MONGOLIAN LETTER ALI GALI ANUSVARA ONE - <---> - MONGOLIAN LETTER MANCHU ALI GALI BHA */ - -LIT_UNICODE_RANGE_LO (0x2135, 0x2138) /* ALEF SYMBOL - <---> - DALET SYMBOL */ - -LIT_UNICODE_RANGE_LO (0x3006, 0x3006) /* IDEOGRAPHIC CLOSING MARK */ - -LIT_UNICODE_RANGE_LO (0x3041, 0x3094) /* HIRAGANA LETTER SMALL A - <---> - HIRAGANA LETTER VU */ - -LIT_UNICODE_RANGE_LO (0x30A1, 0x30FA) /* KATAKANA LETTER SMALL A - <---> - KATAKANA LETTER VO */ - -LIT_UNICODE_RANGE_LO (0x3105, 0x312C) /* BOPOMOFO LETTER B - <---> - BOPOMOFO LETTER GN */ - -LIT_UNICODE_RANGE_LO (0x3131, 0x318E) /* HANGUL LETTER KIYEOK - <---> - HANGUL LETTER ARAEAE */ - -LIT_UNICODE_RANGE_LO (0x31A0, 0x31B7) /* BOPOMOFO LETTER BU - <---> - BOPOMOFO FINAL LETTER H */ - -LIT_UNICODE_RANGE_LO (0x3400, 0x3400) /* */ - -LIT_UNICODE_RANGE_LO (0x4DB5, 0x4DB5) /* */ - -LIT_UNICODE_RANGE_LO (0x4E00, 0x4E00) /* */ - -LIT_UNICODE_RANGE_LO (0x9FA5, 0x9FA5) /* */ - -LIT_UNICODE_RANGE_LO (0xA000, 0xA48C) /* YI SYLLABLE IT - <---> - YI SYLLABLE YYR */ - -LIT_UNICODE_RANGE_LO (0xAC00, 0xAC00) /* */ - -LIT_UNICODE_RANGE_LO (0xD7A3, 0xD7A3) /* */ - -LIT_UNICODE_RANGE_LO (0xF900, 0xFA2D) /* CJK COMPATIBILITY IDEOGRAPH-F900 - <---> - CJK COMPATIBILITY IDEOGRAPH-FA2D */ - -LIT_UNICODE_RANGE_LO (0xFB1D, 0xFB1D) /* HEBREW LETTER YOD WITH HIRIQ */ - -LIT_UNICODE_RANGE_LO (0xFB1F, 0xFB28) /* HEBREW LIGATURE YIDDISH YOD YOD PATAH - <---> - HEBREW LETTER WIDE TAV */ - -LIT_UNICODE_RANGE_LO (0xFB2A, 0xFB36) /* HEBREW LETTER SHIN WITH SHIN DOT - <---> - HEBREW LETTER ZAYIN WITH DAGESH */ - -LIT_UNICODE_RANGE_LO (0xFB38, 0xFB3C) /* HEBREW LETTER TET WITH DAGESH - <---> - HEBREW LETTER LAMED WITH DAGESH */ - -LIT_UNICODE_RANGE_LO (0xFB3E, 0xFB3E) /* HEBREW LETTER MEM WITH DAGESH */ - -LIT_UNICODE_RANGE_LO (0xFB40, 0xFB41) /* HEBREW LETTER NUN WITH DAGESH - <---> - HEBREW LETTER SAMEKH WITH DAGESH */ - -LIT_UNICODE_RANGE_LO (0xFB43, 0xFB44) /* HEBREW LETTER FINAL PE WITH DAGESH - <---> - HEBREW LETTER PE WITH DAGESH */ - -LIT_UNICODE_RANGE_LO (0xFB46, 0xFBB1) /* HEBREW LETTER TSADI WITH DAGESH - <---> - ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM */ - -LIT_UNICODE_RANGE_LO (0xFBD3, 0xFD3D) /* ARABIC LETTER NG ISOLATED FORM - <---> - ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM */ - -LIT_UNICODE_RANGE_LO (0xFD50, 0xFD8F) /* ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM - <---> - ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM */ - -LIT_UNICODE_RANGE_LO (0xFD92, 0xFDC7) /* ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM - <---> - ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM */ - -LIT_UNICODE_RANGE_LO (0xFDF0, 0xFDFB) /* ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM - <---> - ARABIC LIGATURE JALLAJALALOUHOU */ - -LIT_UNICODE_RANGE_LO (0xFE70, 0xFE72) /* ARABIC FATHATAN ISOLATED FORM - <---> - ARABIC DAMMATAN ISOLATED FORM */ - -LIT_UNICODE_RANGE_LO (0xFE74, 0xFE74) /* ARABIC KASRATAN ISOLATED FORM */ - -LIT_UNICODE_RANGE_LO (0xFE76, 0xFEFC) /* ARABIC FATHA ISOLATED FORM - <---> - ARABIC LIGATURE LAM WITH ALEF FINAL FORM */ - -LIT_UNICODE_RANGE_LO (0xFF66, 0xFF6F) /* HALFWIDTH KATAKANA LETTER WO - <---> - HALFWIDTH KATAKANA LETTER SMALL TU */ - -LIT_UNICODE_RANGE_LO (0xFF71, 0xFF9D) /* HALFWIDTH KATAKANA LETTER A - <---> - HALFWIDTH KATAKANA LETTER N */ - -LIT_UNICODE_RANGE_LO (0xFFA0, 0xFFBE) /* HALFWIDTH HANGUL FILLER - <---> - HALFWIDTH HANGUL LETTER HIEUH */ - -LIT_UNICODE_RANGE_LO (0xFFC2, 0xFFC7) /* HALFWIDTH HANGUL LETTER A - <---> - HALFWIDTH HANGUL LETTER E */ - -LIT_UNICODE_RANGE_LO (0xFFCA, 0xFFCF) /* HALFWIDTH HANGUL LETTER YEO - <---> - HALFWIDTH HANGUL LETTER OE */ - -LIT_UNICODE_RANGE_LO (0xFFD2, 0xFFD7) /* HALFWIDTH HANGUL LETTER YO - <---> - HALFWIDTH HANGUL LETTER YU */ - -LIT_UNICODE_RANGE_LO (0xFFDA, 0xFFDC) /* HALFWIDTH HANGUL LETTER EU - <---> - HALFWIDTH HANGUL LETTER I */ - - -/** - * "Punctuation, Connector" category - */ -#ifndef LIT_UNICODE_RANGE_PC -# define LIT_UNICODE_RANGE_PC(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PC */ -LIT_UNICODE_RANGE_PC (0x005F, 0x005F) /* LOW LINE */ - -LIT_UNICODE_RANGE_PC (0x203F, 0x2040) /* UNDERTIE - <---> - CHARACTER TIE */ - -LIT_UNICODE_RANGE_PC (0x30FB, 0x30FB) /* KATAKANA MIDDLE DOT */ - -LIT_UNICODE_RANGE_PC (0xFE33, 0xFE34) /* PRESENTATION FORM FOR VERTICAL LOW LINE - <---> - PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */ - -LIT_UNICODE_RANGE_PC (0xFE4D, 0xFE4F) /* DASHED LOW LINE - <---> - WAVY LOW LINE */ - -LIT_UNICODE_RANGE_PC (0xFF3F, 0xFF3F) /* FULLWIDTH LOW LINE */ - -LIT_UNICODE_RANGE_PC (0xFF65, 0xFF65) /* HALFWIDTH KATAKANA MIDDLE DOT */ - - -/** - * "Punctuation, Dash" category - */ -#ifndef LIT_UNICODE_RANGE_PD -# define LIT_UNICODE_RANGE_PD(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PD */ -LIT_UNICODE_RANGE_PD (0x002D, 0x002D) /* HYPHEN-MINUS */ - -LIT_UNICODE_RANGE_PD (0x00AD, 0x00AD) /* SOFT HYPHEN */ - -LIT_UNICODE_RANGE_PD (0x058A, 0x058A) /* ARMENIAN HYPHEN */ - -LIT_UNICODE_RANGE_PD (0x1806, 0x1806) /* MONGOLIAN TODO SOFT HYPHEN */ - -LIT_UNICODE_RANGE_PD (0x2010, 0x2015) /* HYPHEN - <---> - HORIZONTAL BAR */ - -LIT_UNICODE_RANGE_PD (0x301C, 0x301C) /* WAVE DASH */ - -LIT_UNICODE_RANGE_PD (0x3030, 0x3030) /* WAVY DASH */ - -LIT_UNICODE_RANGE_PD (0xFE31, 0xFE32) /* PRESENTATION FORM FOR VERTICAL EM DASH - <---> - PRESENTATION FORM FOR VERTICAL EN DASH */ - -LIT_UNICODE_RANGE_PD (0xFE58, 0xFE58) /* SMALL EM DASH */ - -LIT_UNICODE_RANGE_PD (0xFE63, 0xFE63) /* SMALL HYPHEN-MINUS */ - -LIT_UNICODE_RANGE_PD (0xFF0D, 0xFF0D) /* FULLWIDTH HYPHEN-MINUS */ - - -/** - * "Punctuation, Open" category - */ -#ifndef LIT_UNICODE_RANGE_PS -# define LIT_UNICODE_RANGE_PS(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PS */ -LIT_UNICODE_RANGE_PS (0x0028, 0x0028) /* LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0x005B, 0x005B) /* LEFT SQUARE BRACKET */ - -LIT_UNICODE_RANGE_PS (0x007B, 0x007B) /* LEFT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PS (0x0F3A, 0x0F3A) /* TIBETAN MARK GUG RTAGS GYON */ - -LIT_UNICODE_RANGE_PS (0x0F3C, 0x0F3C) /* TIBETAN MARK ANG KHANG GYON */ - -LIT_UNICODE_RANGE_PS (0x169B, 0x169B) /* OGHAM FEATHER MARK */ - -LIT_UNICODE_RANGE_PS (0x201A, 0x201A) /* SINGLE LOW-9 QUOTATION MARK */ - -LIT_UNICODE_RANGE_PS (0x201E, 0x201E) /* DOUBLE LOW-9 QUOTATION MARK */ - -LIT_UNICODE_RANGE_PS (0x2045, 0x2045) /* LEFT SQUARE BRACKET WITH QUILL */ - -LIT_UNICODE_RANGE_PS (0x207D, 0x207D) /* SUPERSCRIPT LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0x208D, 0x208D) /* SUBSCRIPT LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0x2329, 0x2329) /* LEFT-POINTING ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PS (0x3008, 0x3008) /* LEFT ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PS (0x300A, 0x300A) /* LEFT DOUBLE ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PS (0x300C, 0x300C) /* LEFT CORNER BRACKET */ - -LIT_UNICODE_RANGE_PS (0x300E, 0x300E) /* LEFT WHITE CORNER BRACKET */ - -LIT_UNICODE_RANGE_PS (0x3010, 0x3010) /* LEFT BLACK LENTICULAR BRACKET */ - -LIT_UNICODE_RANGE_PS (0x3014, 0x3014) /* LEFT TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PS (0x3016, 0x3016) /* LEFT WHITE LENTICULAR BRACKET */ - -LIT_UNICODE_RANGE_PS (0x3018, 0x3018) /* LEFT WHITE TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PS (0x301A, 0x301A) /* LEFT WHITE SQUARE BRACKET */ - -LIT_UNICODE_RANGE_PS (0x301D, 0x301D) /* REVERSED DOUBLE PRIME QUOTATION MARK */ - -LIT_UNICODE_RANGE_PS (0xFD3E, 0xFD3E) /* ORNATE LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0xFE35, 0xFE35) /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0xFE37, 0xFE37) /* PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE39, 0xFE39) /* PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE3B, 0xFE3B) /* PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE3D, 0xFE3D) /* PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE3F, 0xFE3F) /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE41, 0xFE41) /* PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE43, 0xFE43) /* PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE59, 0xFE59) /* SMALL LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0xFE5B, 0xFE5B) /* SMALL LEFT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFE5D, 0xFE5D) /* SMALL LEFT TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFF08, 0xFF08) /* FULLWIDTH LEFT PARENTHESIS */ - -LIT_UNICODE_RANGE_PS (0xFF3B, 0xFF3B) /* FULLWIDTH LEFT SQUARE BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFF5B, 0xFF5B) /* FULLWIDTH LEFT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PS (0xFF62, 0xFF62) /* HALFWIDTH LEFT CORNER BRACKET */ - - -/** - * "Punctuation, Close" category - */ -#ifndef LIT_UNICODE_RANGE_PE -# define LIT_UNICODE_RANGE_PE(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PE */ -LIT_UNICODE_RANGE_PE (0x0029, 0x0029) /* RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0x005D, 0x005D) /* RIGHT SQUARE BRACKET */ - -LIT_UNICODE_RANGE_PE (0x007D, 0x007D) /* RIGHT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PE (0x0F3B, 0x0F3B) /* TIBETAN MARK GUG RTAGS GYAS */ - -LIT_UNICODE_RANGE_PE (0x0F3D, 0x0F3D) /* TIBETAN MARK ANG KHANG GYAS */ - -LIT_UNICODE_RANGE_PE (0x169C, 0x169C) /* OGHAM REVERSED FEATHER MARK */ - -LIT_UNICODE_RANGE_PE (0x2046, 0x2046) /* RIGHT SQUARE BRACKET WITH QUILL */ - -LIT_UNICODE_RANGE_PE (0x207E, 0x207E) /* SUPERSCRIPT RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0x208E, 0x208E) /* SUBSCRIPT RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0x232A, 0x232A) /* RIGHT-POINTING ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PE (0x3009, 0x3009) /* RIGHT ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PE (0x300B, 0x300B) /* RIGHT DOUBLE ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PE (0x300D, 0x300D) /* RIGHT CORNER BRACKET */ - -LIT_UNICODE_RANGE_PE (0x300F, 0x300F) /* RIGHT WHITE CORNER BRACKET */ - -LIT_UNICODE_RANGE_PE (0x3011, 0x3011) /* RIGHT BLACK LENTICULAR BRACKET */ - -LIT_UNICODE_RANGE_PE (0x3015, 0x3015) /* RIGHT TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PE (0x3017, 0x3017) /* RIGHT WHITE LENTICULAR BRACKET */ - -LIT_UNICODE_RANGE_PE (0x3019, 0x3019) /* RIGHT WHITE TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PE (0x301B, 0x301B) /* RIGHT WHITE SQUARE BRACKET */ - -LIT_UNICODE_RANGE_PE (0x301E, 0x301F) /* DOUBLE PRIME QUOTATION MARK - <---> - LOW DOUBLE PRIME QUOTATION MARK */ - -LIT_UNICODE_RANGE_PE (0xFD3F, 0xFD3F) /* ORNATE RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0xFE36, 0xFE36) /* PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0xFE38, 0xFE38) /* PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE3A, 0xFE3A) /* PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE3C, 0xFE3C) /* PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE3E, 0xFE3E) /* PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE40, 0xFE40) /* PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE42, 0xFE42) /* PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE44, 0xFE44) /* PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE5A, 0xFE5A) /* SMALL RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0xFE5C, 0xFE5C) /* SMALL RIGHT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFE5E, 0xFE5E) /* SMALL RIGHT TORTOISE SHELL BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFF09, 0xFF09) /* FULLWIDTH RIGHT PARENTHESIS */ - -LIT_UNICODE_RANGE_PE (0xFF3D, 0xFF3D) /* FULLWIDTH RIGHT SQUARE BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFF5D, 0xFF5D) /* FULLWIDTH RIGHT CURLY BRACKET */ - -LIT_UNICODE_RANGE_PE (0xFF63, 0xFF63) /* HALFWIDTH RIGHT CORNER BRACKET */ - - -/** - * "Punctuation, Initial quote" category - */ -#ifndef LIT_UNICODE_RANGE_PI -# define LIT_UNICODE_RANGE_PI(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PI */ -LIT_UNICODE_RANGE_PI (0x00AB, 0x00AB) /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ - -LIT_UNICODE_RANGE_PI (0x2018, 0x2018) /* LEFT SINGLE QUOTATION MARK */ - -LIT_UNICODE_RANGE_PI (0x201B, 0x201C) /* SINGLE HIGH-REVERSED-9 QUOTATION MARK - <---> - LEFT DOUBLE QUOTATION MARK */ - -LIT_UNICODE_RANGE_PI (0x201F, 0x201F) /* DOUBLE HIGH-REVERSED-9 QUOTATION MARK */ - -LIT_UNICODE_RANGE_PI (0x2039, 0x2039) /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ - - -/** - * "Punctuation, Final quote" category - */ -#ifndef LIT_UNICODE_RANGE_PF -# define LIT_UNICODE_RANGE_PF(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PF */ -LIT_UNICODE_RANGE_PF (0x00BB, 0x00BB) /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ - -LIT_UNICODE_RANGE_PF (0x2019, 0x2019) /* RIGHT SINGLE QUOTATION MARK */ - -LIT_UNICODE_RANGE_PF (0x201D, 0x201D) /* RIGHT DOUBLE QUOTATION MARK */ - -LIT_UNICODE_RANGE_PF (0x203A, 0x203A) /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ - - -/** - * "Punctuation, Other" category - */ -#ifndef LIT_UNICODE_RANGE_PO -# define LIT_UNICODE_RANGE_PO(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_PO */ -LIT_UNICODE_RANGE_PO (0x0021, 0x0023) /* EXCLAMATION MARK - <---> - NUMBER SIGN */ - -LIT_UNICODE_RANGE_PO (0x0025, 0x0027) /* PERCENT SIGN - <---> - APOSTROPHE */ - -LIT_UNICODE_RANGE_PO (0x002A, 0x002A) /* ASTERISK */ - -LIT_UNICODE_RANGE_PO (0x002C, 0x002C) /* COMMA */ - -LIT_UNICODE_RANGE_PO (0x002E, 0x002F) /* FULL STOP - <---> - SOLIDUS */ - -LIT_UNICODE_RANGE_PO (0x003A, 0x003B) /* COLON - <---> - SEMICOLON */ - -LIT_UNICODE_RANGE_PO (0x003F, 0x0040) /* QUESTION MARK - <---> - COMMERCIAL AT */ - -LIT_UNICODE_RANGE_PO (0x005C, 0x005C) /* REVERSE SOLIDUS */ - -LIT_UNICODE_RANGE_PO (0x00A1, 0x00A1) /* INVERTED EXCLAMATION MARK */ - -LIT_UNICODE_RANGE_PO (0x00B7, 0x00B7) /* MIDDLE DOT */ - -LIT_UNICODE_RANGE_PO (0x00BF, 0x00BF) /* INVERTED QUESTION MARK */ - -LIT_UNICODE_RANGE_PO (0x037E, 0x037E) /* GREEK QUESTION MARK */ - -LIT_UNICODE_RANGE_PO (0x0387, 0x0387) /* GREEK ANO TELEIA */ - -LIT_UNICODE_RANGE_PO (0x055A, 0x055F) /* ARMENIAN APOSTROPHE - <---> - ARMENIAN ABBREVIATION MARK */ - -LIT_UNICODE_RANGE_PO (0x0589, 0x0589) /* ARMENIAN FULL STOP */ - -LIT_UNICODE_RANGE_PO (0x05BE, 0x05BE) /* HEBREW PUNCTUATION MAQAF */ - -LIT_UNICODE_RANGE_PO (0x05C0, 0x05C0) /* HEBREW PUNCTUATION PASEQ */ - -LIT_UNICODE_RANGE_PO (0x05C3, 0x05C3) /* HEBREW PUNCTUATION SOF PASUQ */ - -LIT_UNICODE_RANGE_PO (0x05F3, 0x05F4) /* HEBREW PUNCTUATION GERESH - <---> - HEBREW PUNCTUATION GERSHAYIM */ - -LIT_UNICODE_RANGE_PO (0x060C, 0x060C) /* ARABIC COMMA */ - -LIT_UNICODE_RANGE_PO (0x061B, 0x061B) /* ARABIC SEMICOLON */ - -LIT_UNICODE_RANGE_PO (0x061F, 0x061F) /* ARABIC QUESTION MARK */ - -LIT_UNICODE_RANGE_PO (0x066A, 0x066D) /* ARABIC PERCENT SIGN - <---> - ARABIC FIVE POINTED STAR */ - -LIT_UNICODE_RANGE_PO (0x06D4, 0x06D4) /* ARABIC FULL STOP */ - -LIT_UNICODE_RANGE_PO (0x0700, 0x070D) /* SYRIAC END OF PARAGRAPH - <---> - SYRIAC HARKLEAN ASTERISCUS */ - -LIT_UNICODE_RANGE_PO (0x0964, 0x0965) /* DEVANAGARI DANDA - <---> - DEVANAGARI DOUBLE DANDA */ - -LIT_UNICODE_RANGE_PO (0x0970, 0x0970) /* DEVANAGARI ABBREVIATION SIGN */ - -LIT_UNICODE_RANGE_PO (0x0DF4, 0x0DF4) /* SINHALA PUNCTUATION KUNDDALIYA */ - -LIT_UNICODE_RANGE_PO (0x0E4F, 0x0E4F) /* THAI CHARACTER FONGMAN */ - -LIT_UNICODE_RANGE_PO (0x0E5A, 0x0E5B) /* THAI CHARACTER ANGKHANKHU - <---> - THAI CHARACTER KHOMUT */ - -LIT_UNICODE_RANGE_PO (0x0F04, 0x0F12) /* TIBETAN MARK INITIAL YIG MGO MDUN MA - <---> - TIBETAN MARK RGYA GRAM SHAD */ - -LIT_UNICODE_RANGE_PO (0x0F85, 0x0F85) /* TIBETAN MARK PALUTA */ - -LIT_UNICODE_RANGE_PO (0x104A, 0x104F) /* MYANMAR SIGN LITTLE SECTION - <---> - MYANMAR SYMBOL GENITIVE */ - -LIT_UNICODE_RANGE_PO (0x10FB, 0x10FB) /* GEORGIAN PARAGRAPH SEPARATOR */ - -LIT_UNICODE_RANGE_PO (0x1361, 0x1368) /* ETHIOPIC WORDSPACE - <---> - ETHIOPIC PARAGRAPH SEPARATOR */ - -LIT_UNICODE_RANGE_PO (0x166D, 0x166E) /* CANADIAN SYLLABICS CHI SIGN - <---> - CANADIAN SYLLABICS FULL STOP */ - -LIT_UNICODE_RANGE_PO (0x16EB, 0x16ED) /* RUNIC SINGLE PUNCTUATION - <---> - RUNIC CROSS PUNCTUATION */ - -LIT_UNICODE_RANGE_PO (0x17D4, 0x17DA) /* KHMER SIGN KHAN - <---> - KHMER SIGN KOOMUUT */ - -LIT_UNICODE_RANGE_PO (0x17DC, 0x17DC) /* KHMER SIGN AVAKRAHASANYA */ - -LIT_UNICODE_RANGE_PO (0x1800, 0x1805) /* MONGOLIAN BIRGA - <---> - MONGOLIAN FOUR DOTS */ - -LIT_UNICODE_RANGE_PO (0x1807, 0x180A) /* MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER - <---> - MONGOLIAN NIRUGU */ - -LIT_UNICODE_RANGE_PO (0x2016, 0x2017) /* DOUBLE VERTICAL LINE - <---> - DOUBLE LOW LINE */ - -LIT_UNICODE_RANGE_PO (0x2020, 0x2027) /* DAGGER - <---> - HYPHENATION POINT */ - -LIT_UNICODE_RANGE_PO (0x2030, 0x2038) /* PER MILLE SIGN - <---> - CARET */ - -LIT_UNICODE_RANGE_PO (0x203B, 0x203E) /* REFERENCE MARK - <---> - OVERLINE */ - -LIT_UNICODE_RANGE_PO (0x2041, 0x2043) /* CARET INSERTION POINT - <---> - HYPHEN BULLET */ - -LIT_UNICODE_RANGE_PO (0x2048, 0x204D) /* QUESTION EXCLAMATION MARK - <---> - BLACK RIGHTWARDS BULLET */ - -LIT_UNICODE_RANGE_PO (0x3001, 0x3003) /* IDEOGRAPHIC COMMA - <---> - DITTO MARK */ - -LIT_UNICODE_RANGE_PO (0xFE30, 0xFE30) /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER */ - -LIT_UNICODE_RANGE_PO (0xFE49, 0xFE4C) /* DASHED OVERLINE - <---> - DOUBLE WAVY OVERLINE */ - -LIT_UNICODE_RANGE_PO (0xFE50, 0xFE52) /* SMALL COMMA - <---> - SMALL FULL STOP */ - -LIT_UNICODE_RANGE_PO (0xFE54, 0xFE57) /* SMALL SEMICOLON - <---> - SMALL EXCLAMATION MARK */ - -LIT_UNICODE_RANGE_PO (0xFE5F, 0xFE61) /* SMALL NUMBER SIGN - <---> - SMALL ASTERISK */ - -LIT_UNICODE_RANGE_PO (0xFE68, 0xFE68) /* SMALL REVERSE SOLIDUS */ - -LIT_UNICODE_RANGE_PO (0xFE6A, 0xFE6B) /* SMALL PERCENT SIGN - <---> - SMALL COMMERCIAL AT */ - -LIT_UNICODE_RANGE_PO (0xFF01, 0xFF03) /* FULLWIDTH EXCLAMATION MARK - <---> - FULLWIDTH NUMBER SIGN */ - -LIT_UNICODE_RANGE_PO (0xFF05, 0xFF07) /* FULLWIDTH PERCENT SIGN - <---> - FULLWIDTH APOSTROPHE */ - -LIT_UNICODE_RANGE_PO (0xFF0A, 0xFF0A) /* FULLWIDTH ASTERISK */ - -LIT_UNICODE_RANGE_PO (0xFF0C, 0xFF0C) /* FULLWIDTH COMMA */ - -LIT_UNICODE_RANGE_PO (0xFF0E, 0xFF0F) /* FULLWIDTH FULL STOP - <---> - FULLWIDTH SOLIDUS */ - -LIT_UNICODE_RANGE_PO (0xFF1A, 0xFF1B) /* FULLWIDTH COLON - <---> - FULLWIDTH SEMICOLON */ - -LIT_UNICODE_RANGE_PO (0xFF1F, 0xFF20) /* FULLWIDTH QUESTION MARK - <---> - FULLWIDTH COMMERCIAL AT */ - -LIT_UNICODE_RANGE_PO (0xFF3C, 0xFF3C) /* FULLWIDTH REVERSE SOLIDUS */ - -LIT_UNICODE_RANGE_PO (0xFF61, 0xFF61) /* HALFWIDTH IDEOGRAPHIC FULL STOP */ - -LIT_UNICODE_RANGE_PO (0xFF64, 0xFF64) /* HALFWIDTH IDEOGRAPHIC COMMA */ - - -/** - * "Symbol, Math" category - */ -#ifndef LIT_UNICODE_RANGE_SM -# define LIT_UNICODE_RANGE_SM(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_SM */ -LIT_UNICODE_RANGE_SM (0x002B, 0x002B) /* PLUS SIGN */ - -LIT_UNICODE_RANGE_SM (0x003C, 0x003E) /* LESS-THAN SIGN - <---> - GREATER-THAN SIGN */ - -LIT_UNICODE_RANGE_SM (0x007C, 0x007C) /* VERTICAL LINE */ - -LIT_UNICODE_RANGE_SM (0x007E, 0x007E) /* TILDE */ - -LIT_UNICODE_RANGE_SM (0x00AC, 0x00AC) /* NOT SIGN */ - -LIT_UNICODE_RANGE_SM (0x00B1, 0x00B1) /* PLUS-MINUS SIGN */ - -LIT_UNICODE_RANGE_SM (0x00D7, 0x00D7) /* MULTIPLICATION SIGN */ - -LIT_UNICODE_RANGE_SM (0x00F7, 0x00F7) /* DIVISION SIGN */ - -LIT_UNICODE_RANGE_SM (0x2044, 0x2044) /* FRACTION SLASH */ - -LIT_UNICODE_RANGE_SM (0x207A, 0x207C) /* SUPERSCRIPT PLUS SIGN - <---> - SUPERSCRIPT EQUALS SIGN */ - -LIT_UNICODE_RANGE_SM (0x208A, 0x208C) /* SUBSCRIPT PLUS SIGN - <---> - SUBSCRIPT EQUALS SIGN */ - -LIT_UNICODE_RANGE_SM (0x2190, 0x2194) /* LEFTWARDS ARROW - <---> - LEFT RIGHT ARROW */ - -LIT_UNICODE_RANGE_SM (0x219A, 0x219B) /* LEFTWARDS ARROW WITH STROKE - <---> - RIGHTWARDS ARROW WITH STROKE */ - -LIT_UNICODE_RANGE_SM (0x21A0, 0x21A0) /* RIGHTWARDS TWO HEADED ARROW */ - -LIT_UNICODE_RANGE_SM (0x21A3, 0x21A3) /* RIGHTWARDS ARROW WITH TAIL */ - -LIT_UNICODE_RANGE_SM (0x21A6, 0x21A6) /* RIGHTWARDS ARROW FROM BAR */ - -LIT_UNICODE_RANGE_SM (0x21AE, 0x21AE) /* LEFT RIGHT ARROW WITH STROKE */ - -LIT_UNICODE_RANGE_SM (0x21CE, 0x21CF) /* LEFT RIGHT DOUBLE ARROW WITH STROKE - <---> - RIGHTWARDS DOUBLE ARROW WITH STROKE */ - -LIT_UNICODE_RANGE_SM (0x21D2, 0x21D2) /* RIGHTWARDS DOUBLE ARROW */ - -LIT_UNICODE_RANGE_SM (0x21D4, 0x21D4) /* LEFT RIGHT DOUBLE ARROW */ - -LIT_UNICODE_RANGE_SM (0x2200, 0x22F1) /* FOR ALL - <---> - DOWN RIGHT DIAGONAL ELLIPSIS */ - -LIT_UNICODE_RANGE_SM (0x2308, 0x230B) /* LEFT CEILING - <---> - RIGHT FLOOR */ - -LIT_UNICODE_RANGE_SM (0x2320, 0x2321) /* TOP HALF INTEGRAL - <---> - BOTTOM HALF INTEGRAL */ - -LIT_UNICODE_RANGE_SM (0x25B7, 0x25B7) /* WHITE RIGHT-POINTING TRIANGLE */ - -LIT_UNICODE_RANGE_SM (0x25C1, 0x25C1) /* WHITE LEFT-POINTING TRIANGLE */ - -LIT_UNICODE_RANGE_SM (0x266F, 0x266F) /* MUSIC SHARP SIGN */ - -LIT_UNICODE_RANGE_SM (0xFB29, 0xFB29) /* HEBREW LETTER ALTERNATIVE PLUS SIGN */ - -LIT_UNICODE_RANGE_SM (0xFE62, 0xFE62) /* SMALL PLUS SIGN */ - -LIT_UNICODE_RANGE_SM (0xFE64, 0xFE66) /* SMALL LESS-THAN SIGN - <---> - SMALL EQUALS SIGN */ - -LIT_UNICODE_RANGE_SM (0xFF0B, 0xFF0B) /* FULLWIDTH PLUS SIGN */ - -LIT_UNICODE_RANGE_SM (0xFF1C, 0xFF1E) /* FULLWIDTH LESS-THAN SIGN - <---> - FULLWIDTH GREATER-THAN SIGN */ - -LIT_UNICODE_RANGE_SM (0xFF5C, 0xFF5C) /* FULLWIDTH VERTICAL LINE */ - -LIT_UNICODE_RANGE_SM (0xFF5E, 0xFF5E) /* FULLWIDTH TILDE */ - -LIT_UNICODE_RANGE_SM (0xFFE2, 0xFFE2) /* FULLWIDTH NOT SIGN */ - -LIT_UNICODE_RANGE_SM (0xFFE9, 0xFFEC) /* HALFWIDTH LEFTWARDS ARROW - <---> - HALFWIDTH DOWNWARDS ARROW */ - - -/** - * "Symbol, Currency" category - */ -#ifndef LIT_UNICODE_RANGE_SC -# define LIT_UNICODE_RANGE_SC(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_SC */ -LIT_UNICODE_RANGE_SC (0x0024, 0x0024) /* DOLLAR SIGN */ - -LIT_UNICODE_RANGE_SC (0x00A2, 0x00A5) /* CENT SIGN - <---> - YEN SIGN */ - -LIT_UNICODE_RANGE_SC (0x09F2, 0x09F3) /* BENGALI RUPEE MARK - <---> - BENGALI RUPEE SIGN */ - -LIT_UNICODE_RANGE_SC (0x0E3F, 0x0E3F) /* THAI CURRENCY SYMBOL BAHT */ - -LIT_UNICODE_RANGE_SC (0x17DB, 0x17DB) /* KHMER CURRENCY SYMBOL RIEL */ - -LIT_UNICODE_RANGE_SC (0x20A0, 0x20AF) /* EURO-CURRENCY SIGN - <---> - DRACHMA SIGN */ - -LIT_UNICODE_RANGE_SC (0xFE69, 0xFE69) /* SMALL DOLLAR SIGN */ - -LIT_UNICODE_RANGE_SC (0xFF04, 0xFF04) /* FULLWIDTH DOLLAR SIGN */ - -LIT_UNICODE_RANGE_SC (0xFFE0, 0xFFE1) /* FULLWIDTH CENT SIGN - <---> - FULLWIDTH POUND SIGN */ - -LIT_UNICODE_RANGE_SC (0xFFE5, 0xFFE6) /* FULLWIDTH YEN SIGN - <---> - FULLWIDTH WON SIGN */ - - -/** - * "Symbol, Modifier" category - */ -#ifndef LIT_UNICODE_RANGE_SK -# define LIT_UNICODE_RANGE_SK(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_SK */ -LIT_UNICODE_RANGE_SK (0x005E, 0x005E) /* CIRCUMFLEX ACCENT */ - -LIT_UNICODE_RANGE_SK (0x0060, 0x0060) /* GRAVE ACCENT */ - -LIT_UNICODE_RANGE_SK (0x00A8, 0x00A8) /* DIAERESIS */ - -LIT_UNICODE_RANGE_SK (0x00AF, 0x00AF) /* MACRON */ - -LIT_UNICODE_RANGE_SK (0x00B4, 0x00B4) /* ACUTE ACCENT */ - -LIT_UNICODE_RANGE_SK (0x00B8, 0x00B8) /* CEDILLA */ - -LIT_UNICODE_RANGE_SK (0x02B9, 0x02BA) /* MODIFIER LETTER PRIME - <---> - MODIFIER LETTER DOUBLE PRIME */ - -LIT_UNICODE_RANGE_SK (0x02C2, 0x02CF) /* MODIFIER LETTER LEFT ARROWHEAD - <---> - MODIFIER LETTER LOW ACUTE ACCENT */ - -LIT_UNICODE_RANGE_SK (0x02D2, 0x02DF) /* MODIFIER LETTER CENTRED RIGHT HALF RING - <---> - MODIFIER LETTER CROSS ACCENT */ - -LIT_UNICODE_RANGE_SK (0x02E5, 0x02ED) /* MODIFIER LETTER EXTRA-HIGH TONE BAR - <---> - MODIFIER LETTER UNASPIRATED */ - -LIT_UNICODE_RANGE_SK (0x0374, 0x0375) /* GREEK NUMERAL SIGN - <---> - GREEK LOWER NUMERAL SIGN */ - -LIT_UNICODE_RANGE_SK (0x0384, 0x0385) /* GREEK TONOS - <---> - GREEK DIALYTIKA TONOS */ - -LIT_UNICODE_RANGE_SK (0x1FBD, 0x1FBD) /* GREEK KORONIS */ - -LIT_UNICODE_RANGE_SK (0x1FBF, 0x1FC1) /* GREEK PSILI - <---> - GREEK DIALYTIKA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_SK (0x1FCD, 0x1FCF) /* GREEK PSILI AND VARIA - <---> - GREEK PSILI AND PERISPOMENI */ - -LIT_UNICODE_RANGE_SK (0x1FDD, 0x1FDF) /* GREEK DASIA AND VARIA - <---> - GREEK DASIA AND PERISPOMENI */ - -LIT_UNICODE_RANGE_SK (0x1FED, 0x1FEF) /* GREEK DIALYTIKA AND VARIA - <---> - GREEK VARIA */ - -LIT_UNICODE_RANGE_SK (0x1FFD, 0x1FFE) /* GREEK OXIA - <---> - GREEK DASIA */ - -LIT_UNICODE_RANGE_SK (0x309B, 0x309C) /* KATAKANA-HIRAGANA VOICED SOUND MARK - <---> - KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ - -LIT_UNICODE_RANGE_SK (0xFF3E, 0xFF3E) /* FULLWIDTH CIRCUMFLEX ACCENT */ - -LIT_UNICODE_RANGE_SK (0xFF40, 0xFF40) /* FULLWIDTH GRAVE ACCENT */ - -LIT_UNICODE_RANGE_SK (0xFFE3, 0xFFE3) /* FULLWIDTH MACRON */ - - -/** - * "Symbol, Other" category - */ -#ifndef LIT_UNICODE_RANGE_SO -# define LIT_UNICODE_RANGE_SO(range_begin, range_end) -#endif /* !LIT_UNICODE_RANGE_SO */ -LIT_UNICODE_RANGE_SO (0x00A6, 0x00A7) /* BROKEN BAR - <---> - SECTION SIGN */ - -LIT_UNICODE_RANGE_SO (0x00A9, 0x00A9) /* COPYRIGHT SIGN */ - -LIT_UNICODE_RANGE_SO (0x00AE, 0x00AE) /* REGISTERED SIGN */ - -LIT_UNICODE_RANGE_SO (0x00B0, 0x00B0) /* DEGREE SIGN */ - -LIT_UNICODE_RANGE_SO (0x00B6, 0x00B6) /* PILCROW SIGN */ - -LIT_UNICODE_RANGE_SO (0x0482, 0x0482) /* CYRILLIC THOUSANDS SIGN */ - -LIT_UNICODE_RANGE_SO (0x06E9, 0x06E9) /* ARABIC PLACE OF SAJDAH */ - -LIT_UNICODE_RANGE_SO (0x06FD, 0x06FE) /* ARABIC SIGN SINDHI AMPERSAND - <---> - ARABIC SIGN SINDHI POSTPOSITION MEN */ - -LIT_UNICODE_RANGE_SO (0x09FA, 0x09FA) /* BENGALI ISSHAR */ - -LIT_UNICODE_RANGE_SO (0x0B70, 0x0B70) /* ORIYA ISSHAR */ - -LIT_UNICODE_RANGE_SO (0x0F01, 0x0F03) /* TIBETAN MARK GTER YIG MGO TRUNCATED A - <---> - TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA */ - -LIT_UNICODE_RANGE_SO (0x0F13, 0x0F17) /* TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN - <---> - TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS */ - -LIT_UNICODE_RANGE_SO (0x0F1A, 0x0F1F) /* TIBETAN SIGN RDEL DKAR GCIG - <---> - TIBETAN SIGN RDEL DKAR RDEL NAG */ - -LIT_UNICODE_RANGE_SO (0x0F34, 0x0F34) /* TIBETAN MARK BSDUS RTAGS */ - -LIT_UNICODE_RANGE_SO (0x0F36, 0x0F36) /* TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN */ - -LIT_UNICODE_RANGE_SO (0x0F38, 0x0F38) /* TIBETAN MARK CHE MGO */ - -LIT_UNICODE_RANGE_SO (0x0FBE, 0x0FC5) /* TIBETAN KU RU KHA - <---> - TIBETAN SYMBOL RDO RJE */ - -LIT_UNICODE_RANGE_SO (0x0FC7, 0x0FCC) /* TIBETAN SYMBOL RDO RJE RGYA GRAM - <---> - TIBETAN SYMBOL NOR BU BZHI -KHYIL */ - -LIT_UNICODE_RANGE_SO (0x0FCF, 0x0FCF) /* TIBETAN SIGN RDEL NAG GSUM */ - -LIT_UNICODE_RANGE_SO (0x2100, 0x2101) /* ACCOUNT OF - <---> - ADDRESSED TO THE SUBJECT */ - -LIT_UNICODE_RANGE_SO (0x2103, 0x2106) /* DEGREE CELSIUS - <---> - CADA UNA */ - -LIT_UNICODE_RANGE_SO (0x2108, 0x2109) /* SCRUPLE - <---> - DEGREE FAHRENHEIT */ - -LIT_UNICODE_RANGE_SO (0x2114, 0x2114) /* L B BAR SYMBOL */ - -LIT_UNICODE_RANGE_SO (0x2116, 0x2118) /* NUMERO SIGN - <---> - SCRIPT CAPITAL P */ - -LIT_UNICODE_RANGE_SO (0x211E, 0x2123) /* PRESCRIPTION TAKE - <---> - VERSICLE */ - -LIT_UNICODE_RANGE_SO (0x2125, 0x2125) /* OUNCE SIGN */ - -LIT_UNICODE_RANGE_SO (0x2127, 0x2127) /* INVERTED OHM SIGN */ - -LIT_UNICODE_RANGE_SO (0x2129, 0x2129) /* TURNED GREEK SMALL LETTER IOTA */ - -LIT_UNICODE_RANGE_SO (0x212E, 0x212E) /* ESTIMATED SYMBOL */ - -LIT_UNICODE_RANGE_SO (0x2132, 0x2132) /* TURNED CAPITAL F */ - -LIT_UNICODE_RANGE_SO (0x213A, 0x213A) /* ROTATED CAPITAL Q */ - -LIT_UNICODE_RANGE_SO (0x2195, 0x2199) /* UP DOWN ARROW - <---> - SOUTH WEST ARROW */ - -LIT_UNICODE_RANGE_SO (0x219C, 0x219F) /* LEFTWARDS WAVE ARROW - <---> - UPWARDS TWO HEADED ARROW */ - -LIT_UNICODE_RANGE_SO (0x21A1, 0x21A2) /* DOWNWARDS TWO HEADED ARROW - <---> - LEFTWARDS ARROW WITH TAIL */ - -LIT_UNICODE_RANGE_SO (0x21A4, 0x21A5) /* LEFTWARDS ARROW FROM BAR - <---> - UPWARDS ARROW FROM BAR */ - -LIT_UNICODE_RANGE_SO (0x21A7, 0x21AD) /* DOWNWARDS ARROW FROM BAR - <---> - LEFT RIGHT WAVE ARROW */ - -LIT_UNICODE_RANGE_SO (0x21AF, 0x21CD) /* DOWNWARDS ZIGZAG ARROW - <---> - LEFTWARDS DOUBLE ARROW WITH STROKE */ - -LIT_UNICODE_RANGE_SO (0x21D0, 0x21D1) /* LEFTWARDS DOUBLE ARROW - <---> - UPWARDS DOUBLE ARROW */ - -LIT_UNICODE_RANGE_SO (0x21D3, 0x21D3) /* DOWNWARDS DOUBLE ARROW */ - -LIT_UNICODE_RANGE_SO (0x21D5, 0x21F3) /* UP DOWN DOUBLE ARROW - <---> - UP DOWN WHITE ARROW */ - -LIT_UNICODE_RANGE_SO (0x2300, 0x2307) /* DIAMETER SIGN - <---> - WAVY LINE */ - -LIT_UNICODE_RANGE_SO (0x230C, 0x231F) /* BOTTOM RIGHT CROP - <---> - BOTTOM RIGHT CORNER */ - -LIT_UNICODE_RANGE_SO (0x2322, 0x2328) /* FROWN - <---> - KEYBOARD */ - -LIT_UNICODE_RANGE_SO (0x232B, 0x237B) /* ERASE TO THE LEFT - <---> - NOT CHECK MARK */ - -LIT_UNICODE_RANGE_SO (0x237D, 0x239A) /* SHOULDERED OPEN BOX - <---> - CLEAR SCREEN SYMBOL */ - -LIT_UNICODE_RANGE_SO (0x2400, 0x2426) /* SYMBOL FOR NULL - <---> - SYMBOL FOR SUBSTITUTE FORM TWO */ - -LIT_UNICODE_RANGE_SO (0x2440, 0x244A) /* OCR HOOK - <---> - OCR DOUBLE BACKSLASH */ - -LIT_UNICODE_RANGE_SO (0x249C, 0x24E9) /* PARENTHESIZED LATIN SMALL LETTER A - <---> - CIRCLED LATIN SMALL LETTER Z */ - -LIT_UNICODE_RANGE_SO (0x2500, 0x2595) /* BOX DRAWINGS LIGHT HORIZONTAL - <---> - RIGHT ONE EIGHTH BLOCK */ - -LIT_UNICODE_RANGE_SO (0x25A0, 0x25B6) /* BLACK SQUARE - <---> - BLACK RIGHT-POINTING TRIANGLE */ - -LIT_UNICODE_RANGE_SO (0x25B8, 0x25C0) /* BLACK RIGHT-POINTING SMALL TRIANGLE - <---> - BLACK LEFT-POINTING TRIANGLE */ - -LIT_UNICODE_RANGE_SO (0x25C2, 0x25F7) /* BLACK LEFT-POINTING SMALL TRIANGLE - <---> - WHITE CIRCLE WITH UPPER RIGHT QUADRANT */ - -LIT_UNICODE_RANGE_SO (0x2600, 0x2613) /* BLACK SUN WITH RAYS - <---> - SALTIRE */ - -LIT_UNICODE_RANGE_SO (0x2619, 0x266E) /* REVERSED ROTATED FLORAL HEART BULLET - <---> - MUSIC NATURAL SIGN */ - -LIT_UNICODE_RANGE_SO (0x2670, 0x2671) /* WEST SYRIAC CROSS - <---> - EAST SYRIAC CROSS */ - -LIT_UNICODE_RANGE_SO (0x2701, 0x2704) /* UPPER BLADE SCISSORS - <---> - WHITE SCISSORS */ - -LIT_UNICODE_RANGE_SO (0x2706, 0x2709) /* TELEPHONE LOCATION SIGN - <---> - ENVELOPE */ - -LIT_UNICODE_RANGE_SO (0x270C, 0x2727) /* VICTORY HAND - <---> - WHITE FOUR POINTED STAR */ - -LIT_UNICODE_RANGE_SO (0x2729, 0x274B) /* STRESS OUTLINED WHITE STAR - <---> - HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK */ - -LIT_UNICODE_RANGE_SO (0x274D, 0x274D) /* SHADOWED WHITE CIRCLE */ - -LIT_UNICODE_RANGE_SO (0x274F, 0x2752) /* LOWER RIGHT DROP-SHADOWED WHITE SQUARE - <---> - UPPER RIGHT SHADOWED WHITE SQUARE */ - -LIT_UNICODE_RANGE_SO (0x2756, 0x2756) /* BLACK DIAMOND MINUS WHITE X */ - -LIT_UNICODE_RANGE_SO (0x2758, 0x275E) /* LIGHT VERTICAL BAR - <---> - HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT */ - -LIT_UNICODE_RANGE_SO (0x2761, 0x2767) /* CURVED STEM PARAGRAPH SIGN ORNAMENT - <---> - ROTATED FLORAL HEART BULLET */ - -LIT_UNICODE_RANGE_SO (0x2794, 0x2794) /* HEAVY WIDE-HEADED RIGHTWARDS ARROW */ - -LIT_UNICODE_RANGE_SO (0x2798, 0x27AF) /* HEAVY SOUTH EAST ARROW - <---> - NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW */ - -LIT_UNICODE_RANGE_SO (0x27B1, 0x27BE) /* NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW - <---> - OPEN-OUTLINED RIGHTWARDS ARROW */ - -LIT_UNICODE_RANGE_SO (0x2800, 0x28FF) /* BRAILLE PATTERN BLANK - <---> - BRAILLE PATTERN DOTS-12345678 */ - -LIT_UNICODE_RANGE_SO (0x2E80, 0x2E99) /* CJK RADICAL REPEAT - <---> - CJK RADICAL RAP */ - -LIT_UNICODE_RANGE_SO (0x2E9B, 0x2EF3) /* CJK RADICAL CHOKE - <---> - CJK RADICAL C-SIMPLIFIED TURTLE */ - -LIT_UNICODE_RANGE_SO (0x2F00, 0x2FD5) /* KANGXI RADICAL ONE - <---> - KANGXI RADICAL FLUTE */ - -LIT_UNICODE_RANGE_SO (0x2FF0, 0x2FFB) /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - <---> - IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID */ - -LIT_UNICODE_RANGE_SO (0x3004, 0x3004) /* JAPANESE INDUSTRIAL STANDARD SYMBOL */ - -LIT_UNICODE_RANGE_SO (0x3012, 0x3013) /* POSTAL MARK - <---> - GETA MARK */ - -LIT_UNICODE_RANGE_SO (0x3020, 0x3020) /* POSTAL MARK FACE */ - -LIT_UNICODE_RANGE_SO (0x3036, 0x3037) /* CIRCLED POSTAL MARK - <---> - IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL */ - -LIT_UNICODE_RANGE_SO (0x303E, 0x303F) /* IDEOGRAPHIC VARIATION INDICATOR - <---> - IDEOGRAPHIC HALF FILL SPACE */ - -LIT_UNICODE_RANGE_SO (0x3190, 0x3191) /* IDEOGRAPHIC ANNOTATION LINKING MARK - <---> - IDEOGRAPHIC ANNOTATION REVERSE MARK */ - -LIT_UNICODE_RANGE_SO (0x3196, 0x319F) /* IDEOGRAPHIC ANNOTATION TOP MARK - <---> - IDEOGRAPHIC ANNOTATION MAN MARK */ - -LIT_UNICODE_RANGE_SO (0x3200, 0x321C) /* PARENTHESIZED HANGUL KIYEOK - <---> - PARENTHESIZED HANGUL CIEUC U */ - -LIT_UNICODE_RANGE_SO (0x322A, 0x3243) /* PARENTHESIZED IDEOGRAPH MOON - <---> - PARENTHESIZED IDEOGRAPH REACH */ - -LIT_UNICODE_RANGE_SO (0x3260, 0x327B) /* CIRCLED HANGUL KIYEOK - <---> - CIRCLED HANGUL HIEUH A */ - -LIT_UNICODE_RANGE_SO (0x327F, 0x327F) /* KOREAN STANDARD SYMBOL */ - -LIT_UNICODE_RANGE_SO (0x328A, 0x32B0) /* CIRCLED IDEOGRAPH MOON - <---> - CIRCLED IDEOGRAPH NIGHT */ - -LIT_UNICODE_RANGE_SO (0x32C0, 0x32CB) /* IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY - <---> - IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER */ - -LIT_UNICODE_RANGE_SO (0x32D0, 0x32FE) /* CIRCLED KATAKANA A - <---> - CIRCLED KATAKANA WO */ - -LIT_UNICODE_RANGE_SO (0x3300, 0x3376) /* SQUARE APAATO - <---> - SQUARE PC */ - -LIT_UNICODE_RANGE_SO (0x337B, 0x33DD) /* SQUARE ERA NAME HEISEI - <---> - SQUARE WB */ - -LIT_UNICODE_RANGE_SO (0x33E0, 0x33FE) /* IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE - <---> - IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE */ - -LIT_UNICODE_RANGE_SO (0xA490, 0xA4A1) /* YI RADICAL QOT - <---> - YI RADICAL GA */ - -LIT_UNICODE_RANGE_SO (0xA4A4, 0xA4B3) /* YI RADICAL DDUR - <---> - YI RADICAL JO */ - -LIT_UNICODE_RANGE_SO (0xA4B5, 0xA4C0) /* YI RADICAL JJY - <---> - YI RADICAL SHAT */ - -LIT_UNICODE_RANGE_SO (0xA4C2, 0xA4C4) /* YI RADICAL SHOP - <---> - YI RADICAL ZZIET */ - -LIT_UNICODE_RANGE_SO (0xA4C6, 0xA4C6) /* YI RADICAL KE */ - -LIT_UNICODE_RANGE_SO (0xFFE4, 0xFFE4) /* FULLWIDTH BROKEN BAR */ - -LIT_UNICODE_RANGE_SO (0xFFE8, 0xFFE8) /* HALFWIDTH FORMS LIGHT VERTICAL */ - -LIT_UNICODE_RANGE_SO (0xFFED, 0xFFEE) /* HALFWIDTH BLACK SQUARE - <---> - HALFWIDTH WHITE CIRCLE */ - -LIT_UNICODE_RANGE_SO (0xFFFC, 0xFFFD) /* OBJECT REPLACEMENT CHARACTER - <---> - REPLACEMENT CHARACTER */ - - -#undef LIT_UNICODE_RANGE_LU -#undef LIT_UNICODE_RANGE_LL -#undef LIT_UNICODE_RANGE_LT -#undef LIT_UNICODE_RANGE_MN -#undef LIT_UNICODE_RANGE_MC -#undef LIT_UNICODE_RANGE_ME -#undef LIT_UNICODE_RANGE_ND -#undef LIT_UNICODE_RANGE_NL -#undef LIT_UNICODE_RANGE_NO -#undef LIT_UNICODE_RANGE_ZS -#undef LIT_UNICODE_RANGE_ZL -#undef LIT_UNICODE_RANGE_ZP -#undef LIT_UNICODE_RANGE_CC -#undef LIT_UNICODE_RANGE_CF -#undef LIT_UNICODE_RANGE_CS -#undef LIT_UNICODE_RANGE_CO -#undef LIT_UNICODE_RANGE_LM -#undef LIT_UNICODE_RANGE_LO -#undef LIT_UNICODE_RANGE_PC -#undef LIT_UNICODE_RANGE_PD -#undef LIT_UNICODE_RANGE_PS -#undef LIT_UNICODE_RANGE_PE -#undef LIT_UNICODE_RANGE_PI -#undef LIT_UNICODE_RANGE_PF -#undef LIT_UNICODE_RANGE_PO -#undef LIT_UNICODE_RANGE_SM -#undef LIT_UNICODE_RANGE_SC -#undef LIT_UNICODE_RANGE_SK -#undef LIT_UNICODE_RANGE_SO +static const uint16_t unicode_separator_chars[] = +{ + /* + * these two chars are handled separatly @see lit_char_is_space_separator + * 0x0020, space + * 0x00A0, non-braking space + */ + 0x1680, \ + 0x180E, /* manually added */ \ + 0x202F, /* manually added */ \ + 0x205F, \ + 0x3000 +}; + +#endif diff --git a/jerry-core/parser/js/common.c b/jerry-core/parser/js/common.c index 1931bac09..e17e995db 100644 --- a/jerry-core/parser/js/common.c +++ b/jerry-core/parser/js/common.c @@ -96,9 +96,7 @@ util_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */ } return (lit_char_is_unicode_letter (chr) - || lit_char_is_unicode_combining_mark (chr) - || lit_char_is_unicode_digit (chr) - || lit_char_is_unicode_connector_punctuation (chr)); + || lit_char_is_unicode_non_letter_ident_part (chr)); } /* util_is_identifier_part_character */ /** diff --git a/tools/print-unicode-ranges.sh b/tools/print-unicode-ranges.sh index f304dc9d0..1e891cbf5 100755 --- a/tools/print-unicode-ranges.sh +++ b/tools/print-unicode-ranges.sh @@ -17,124 +17,170 @@ # # http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt # + +# unicode categories: Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs Co Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So +# letter: Lu Ll Lt Lm Lo Nl +# non-letter-indent-part: +# digit: Nd +# punctuation mark: Mn Mc +# connector punctuation: Pc +# separators: Zs + +if [ $# -le 4 ]; then + echo "useage: print-unicode-ranges.sh <-i y sp|y len|n> <-cat letters|non-let-indent-parts|separators>" + echo " -i: y sp - print interval starting points" + echo " y len - print interval lengths" + echo " n - print individual characters" + echo " -cat: whether print letters|non-let-indent-parts|separators category" + exit 1 +fi + +STARTING_POINT="len" + UNICODE_DATA_PATH="$1" +shift -# -# One of unicode character category names (Lu, Ll, Nl, etc.) -# -UNICODE_CHAR_CATEGORY="$2" +while [ $# -gt 0 ]; do + if [ $1 == "-i" ]; then + shift + PRINT_INTERVALS="$1" + if [ $PRINT_INTERVALS == "y" ]; then + shift + STARTING_POINT="$1" + echo $STARTING_POINT + fi + elif [ $1 == "-cat" ]; then + shift + CATEGORY="$1" + echo $CATEGORY + fi + shift +done -UNICODE_CHAR_CATEGORY_UPPER_CASE=`echo $UNICODE_CHAR_CATEGORY | tr '[:lower:]' '[:upper:]'` - -# -# 1. Print character codes, categories, and names -# 2. Filter by category -# 3. Print character codes and names without categories -# 4. Sort -# 5. Add '0x' to each line -# 6. Combine hexadecimal numbers into named ranges -# 7. Print ranges in format "LIT_UNICODE_RANGE_$UNICODE_CHAR_CATEGORY_UPPER_CASE (range_begin, range_end) /* range name */" -# - -cut -d ';' "$UNICODE_DATA_PATH" -f 1,2,3 \ - | grep ";$UNICODE_CHAR_CATEGORY\$" \ - | cut -d ';' -f 1,2 \ - | sort \ - | awk 'BEGIN { FS=";"; OFS=";" } { print "0x"$1, $2; }' \ - | awk --non-decimal-data \ - 'BEGIN \ - { \ - FS=";"; \ - OFS=";"; \ - is_in_range=0; \ - } \ - \ - function output_next_range () \ - { \ - if (range_begin == range_prev) \ - { \ - print range_begin, range_prev, range_begin_name; \ - } \ - else \ - { \ - print range_begin, range_prev, range_begin_name, range_prev_name; \ - } \ - } \ - \ - { \ - if (is_in_range == 0) \ - { \ - is_in_range=1; \ - range_begin=$1; \ - range_prev=$1; \ - range_begin_name=$2; \ - range_prev_name=$2; \ - } \ - else \ - { \ - if (range_prev + 1 == $1) \ - { \ - range_prev=$1; \ - range_prev_name=$2 - } \ - else \ - { \ - output_next_range(); \ - range_begin=$1; \ - range_prev=$1; \ - range_begin_name=$2; \ - range_prev_name=$2; \ - } \ - } \ - } \ - \ - END \ - { \ - output_next_range(); \ - }' \ - | awk \ - 'BEGIN \ - { \ - FS = ";" \ - } \ - { \ - range_string = sprintf ("LIT_UNICODE_RANGE_'$UNICODE_CHAR_CATEGORY_UPPER_CASE' (%s, %s)", $1, $2); \ - range_string_length = length (range_string); \ - \ - range_begin_name=$3; \ - range_end_name=$4; \ - \ - range_begin_name_length = length (range_begin_name); \ - range_end_name_length = length (range_end_name); \ - \ - printf "%s", range_string; \ - if (range_end_name_length == 0) \ - { \ - printf " /* %s */\n", range_begin_name; \ - } \ - else \ - { \ - if (range_begin_name_length > range_end_name_length) \ - { \ - indent1 = 0; \ - indent2 = range_string_length + range_begin_name_length / 2; - indent3 = range_string_length + (range_begin_name_length - range_end_name_length) / 2; \ - } \ - else \ - { \ - indent1 = (range_end_name_length - range_begin_name_length) / 2; \ - indent2 = range_string_length + range_end_name_length / 2; - indent3 = range_string_length; \ - } \ - indent3 = indent3 + 3; \ - fmt1 = sprintf (" /* %%%ds%%s\n", indent1); \ - fmt2 = sprintf (" %%%ds<--->\n", indent2); \ - fmt3 = sprintf (" %%%ds%%s */\n", indent3); \ - \ - printf fmt1, "", $3; \ - printf fmt2, ""; \ - printf fmt3, "", $4; \ - } \ - \ - printf "\n"; \ - }' +awk -v desired_category="$CATEGORY" \ +'BEGIN \ + { \ + FS=";"; OFS=";" \ + } \ + { \ + cat=$3; \ + if (desired_category == "letters" && (cat == "Lu" || cat == "Ll" || cat == "Lt" || cat == "Lm" || cat == "Lo" || cat == "Nl")) \ + { \ + print "0x"$1, $2, $3; \ + } \ + else if (desired_category == "non-let-indent-parts" && (cat == "Nd" || cat == "Mn" || cat == "Mc" || cat == "Pc")) \ + { \ + print "0x"$1, $2, $3; \ + } \ + else if (desired_category == "separators" && cat == "Zs") \ + { \ + print "0x"$1, $2, $3; \ + } \ + }' $UNICODE_DATA_PATH \ +| gawk --non-decimal-data -v print_intervals="$PRINT_INTERVALS" -v sp="$STARTING_POINT" \ +'BEGIN \ + { \ + FS = ";"; \ + OFS = ";"; \ + is_in_range = 0; \ + print_count = 0; \ + } \ + \ + function print_Nl() \ + { \ + ++print_count; \ + if (print_count == 10) \ + { \ + printf "\n"; \ + print_count = 0; \ + } \ + } \ + \ + function output_next_range () \ + { \ + if (range_begin != range_prev && print_intervals=="y") \ + { \ + i1 = strtonum(range_begin); \ + i2 = strtonum(range_prev); \ + len = i2 - i1; \ + # if the length of an interval is > 255 have to spilt it into 255-lenth ones + if (len > 255) \ + { \ + numOfSubintervals = (len / 255); # more precisely number of subintervals - 1 \ + for (i = 1; i <= numOfSubintervals; ++i) \ + { \ + if (sp == "sp") \ + { \ + printf "0X%X, ", i1; \ + print_Nl(); \ + } + else \ + { \ + printf "%d, ", 255; \ + print_Nl(); \ + } \ + i1 = i1 + 256; # next interval begins on the ending of the previous + 1 \ + } \ + if (sp == "sp") \ + { \ + printf "0X%X, ", i1; \ + print_Nl(); \ + } \ + else \ + { \ + printf "%d, ", len % 255 - (i-1); \ + print_Nl(); \ + } \ + } \ + else \ + { \ + if (sp == "sp") \ + { \ + printf "%s, ", range_begin; \ + print_Nl(); \ + } \ + else \ + { \ + printf "%d, ", len; \ + print_Nl(); \ + } \ + } \ + } \ + else if (range_begin == range_prev && print_intervals != "y")\ + { \ + printf "%s, ", range_begin; \ + print_Nl(); \ + } \ + } \ + \ + { \ + if (is_in_range == 0) \ + { \ + is_in_range = 1; \ + range_begin = $1; \ + range_prev = $1; \ + range_begin_name = $2; \ + range_prev_name = $2; \ + } \ + else \ + { \ + if (range_prev + 1 == $1) \ + { \ + range_prev = $1; \ + range_prev_name = $2 + } \ + else \ + { \ + output_next_range(); \ + range_begin = $1; \ + range_prev=$1; \ + range_begin_name = $2; \ + range_prev_name = $2; \ + } \ + } \ + } \ + \ +END \ + { \ + output_next_range(); \ + }'