New unicode character handling using array data structure.
JerryScript-DCO-1.0-Signed-off-by: István Kádár ikadar@inf.u-szeged.hu
This commit is contained in:
+119
-156
@@ -14,9 +14,89 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "lit-char-helpers.h"
|
#include "lit-char-helpers.h"
|
||||||
|
#include "lit/lit-unicode-ranges.inc.h"
|
||||||
#include "lit-strings.h"
|
#include "lit-strings.h"
|
||||||
|
|
||||||
|
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Binary search algorithm that searches the a
|
||||||
|
* character in the given char array.
|
||||||
|
*
|
||||||
|
* @return true - if the character is in the given array
|
||||||
|
* false - otherwise
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
search_char_in_char_array (ecma_char_t c, /**< code unit */
|
||||||
|
const ecma_char_t *array, /**< array */
|
||||||
|
int size_of_array) /**< length of the array */
|
||||||
|
{
|
||||||
|
int bottom = 0;
|
||||||
|
int top = size_of_array - 1;
|
||||||
|
|
||||||
|
while (bottom <= top)
|
||||||
|
{
|
||||||
|
int middle = (bottom + top) / 2;
|
||||||
|
ecma_char_t current = array[middle];
|
||||||
|
|
||||||
|
if (current == c)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c < current)
|
||||||
|
{
|
||||||
|
top = middle - 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bottom = middle + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
} /* search_char_in_char_array */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Binary search algorithm that searches a character in the given intervals.
|
||||||
|
* Intervals specifed by two arrays. The first one contains the starting points
|
||||||
|
* of the intervals, the second one contains the length of them.
|
||||||
|
*
|
||||||
|
* @return true - if the the character is included (inclusively) in one of the intervals in the given array
|
||||||
|
* false - otherwise
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
search_char_in_interval_array (ecma_char_t c, /**< code unit */
|
||||||
|
const ecma_char_t *array_sp, /**< array of interval starting points */
|
||||||
|
const uint8_t *lengths, /**< array of interval lengths */
|
||||||
|
int size_of_array) /**< length of the array */
|
||||||
|
{
|
||||||
|
int bottom = 0;
|
||||||
|
int top = size_of_array - 1;
|
||||||
|
|
||||||
|
while (bottom <= top)
|
||||||
|
{
|
||||||
|
int middle = (bottom + top) / 2;
|
||||||
|
ecma_char_t current_sp = array_sp[middle];
|
||||||
|
|
||||||
|
if (current_sp <= c && c <= current_sp + lengths[middle])
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c > current_sp)
|
||||||
|
{
|
||||||
|
bottom = middle + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
top = middle - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
} /* search_char_in_interval_array */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is one of the Format-Control characters
|
* Check if specified character is one of the Format-Control characters
|
||||||
*
|
*
|
||||||
@@ -32,30 +112,8 @@ lit_char_is_format_control (ecma_char_t c) /**< code unit */
|
|||||||
} /* lit_char_is_format_control */
|
} /* lit_char_is_format_control */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is the Space Separator character
|
* Check if specified character is one of the Whitespace characters including those
|
||||||
*
|
* that fall into "Space, Separator" ("Zs") Unicode character category.
|
||||||
* See also:
|
|
||||||
* ECMA-262 v5, Table 2
|
|
||||||
*
|
|
||||||
* @return true - if the character falls into "Space, Separator" ("Zs") character category,
|
|
||||||
* false - otherwise.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
lit_char_is_space_separator (ecma_char_t c) /**< code unit */
|
|
||||||
{
|
|
||||||
/* Zs */
|
|
||||||
#define LIT_UNICODE_RANGE_ZS(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
#include "lit-unicode-ranges.inc.h"
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* lit_char_is_space_separator */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if specified character is one of the Whitespace characters
|
|
||||||
*
|
*
|
||||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
|
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
|
||||||
* false - otherwise.
|
* false - otherwise.
|
||||||
@@ -63,13 +121,21 @@ lit_char_is_space_separator (ecma_char_t c) /**< code unit */
|
|||||||
bool
|
bool
|
||||||
lit_char_is_white_space (ecma_char_t c) /**< code unit */
|
lit_char_is_white_space (ecma_char_t c) /**< code unit */
|
||||||
{
|
{
|
||||||
return (c == LIT_CHAR_TAB
|
if (c <= 127)
|
||||||
|| c == LIT_CHAR_VTAB
|
{
|
||||||
|| c == LIT_CHAR_FF
|
return (c == LIT_CHAR_TAB
|
||||||
|| c == LIT_CHAR_SP
|
|| c == LIT_CHAR_VTAB
|
||||||
|| c == LIT_CHAR_NBSP
|
|| c == LIT_CHAR_FF
|
||||||
|| c == LIT_CHAR_BOM
|
|| c == LIT_CHAR_SP);
|
||||||
|| lit_char_is_space_separator (c));
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return (c == LIT_CHAR_NBSP
|
||||||
|
|| c == LIT_CHAR_BOM
|
||||||
|
|| (c >= unicode_separator_char_interv_sps[0]
|
||||||
|
&& c <= unicode_separator_char_interv_sps[0] + unicode_separator_char_interv_lens[0])
|
||||||
|
|| search_char_in_char_array (c, unicode_separator_chars, NUM_OF_ELEMENTS (unicode_separator_chars)));
|
||||||
|
}
|
||||||
} /* lit_char_is_white_space */
|
} /* lit_char_is_white_space */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -119,60 +185,18 @@ lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Lu */
|
return (search_char_in_interval_array (c, unicode_letter_interv_sps, unicode_letter_interv_lens,
|
||||||
#define LIT_UNICODE_RANGE_LU(range_begin, range_end) \
|
NUM_OF_ELEMENTS (unicode_letter_interv_sps))
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|| search_char_in_char_array (c, unicode_letter_chars, NUM_OF_ELEMENTS (unicode_letter_chars)));
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Ll */
|
|
||||||
#define LIT_UNICODE_RANGE_LL(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Lt */
|
|
||||||
#define LIT_UNICODE_RANGE_LT(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Lm */
|
|
||||||
#define LIT_UNICODE_RANGE_LM(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Lo */
|
|
||||||
#define LIT_UNICODE_RANGE_LO(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Nl */
|
|
||||||
#define LIT_UNICODE_RANGE_NL(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "lit-unicode-ranges.inc.h"
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* lit_char_is_unicode_letter */
|
} /* lit_char_is_unicode_letter */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is a unicode combining mark
|
* Check if specified character is a non-letter character and can be used as a
|
||||||
*
|
* non-first character of an identifier.
|
||||||
* Note:
|
* These characters coverd by the following unicode categories:
|
||||||
* Unicode combining mark is a character, included into one of the following categories:
|
* - digit (Nd)
|
||||||
* - Non-spacing mark (Mn);
|
* - punctuation mark (Mn, Mc)
|
||||||
* - Combining spacing mark (Mc).
|
* - connector punctuation (Pc)
|
||||||
*
|
*
|
||||||
* See also:
|
* See also:
|
||||||
* ECMA-262 v5, 7.6
|
* ECMA-262 v5, 7.6
|
||||||
@@ -181,82 +205,21 @@ lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
|
|||||||
* false - otherwise.
|
* false - otherwise.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
lit_char_is_unicode_combining_mark (ecma_char_t c) /**< code unit */
|
lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
|
||||||
{
|
{
|
||||||
/* Mn */
|
if (c <= 127)
|
||||||
#define LIT_UNICODE_RANGE_MN(range_begin, range_end) \
|
{
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
return LIT_CHAR_ASCII_DIGITS_BEGIN <= c && c <= LIT_CHAR_ASCII_DIGITS_END;
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
/* Mc */
|
{
|
||||||
#define LIT_UNICODE_RANGE_MC(range_begin, range_end) \
|
return (search_char_in_interval_array (c, unicode_non_letter_ident_part_interv_sps,
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
unicode_non_letter_ident_part_interv_lens,
|
||||||
{ \
|
NUM_OF_ELEMENTS (unicode_non_letter_ident_part_interv_sps))
|
||||||
return true; \
|
|| search_char_in_char_array (c, unicode_non_letter_ident_part_chars,
|
||||||
|
NUM_OF_ELEMENTS (unicode_non_letter_ident_part_chars)));
|
||||||
}
|
}
|
||||||
|
} /* lit_char_is_unicode_non_letter_ident_part */
|
||||||
#include "lit-unicode-ranges.inc.h"
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* lit_char_is_unicode_combining_mark */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if specified character is a unicode digit
|
|
||||||
*
|
|
||||||
* Note:
|
|
||||||
* Unicode digit is a character, included into the following category:
|
|
||||||
* - Decimal number (Nd).
|
|
||||||
*
|
|
||||||
* See also:
|
|
||||||
* ECMA-262 v5, 7.6
|
|
||||||
*
|
|
||||||
* @return true - if specified character falls into the specified category,
|
|
||||||
* false - otherwise.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
lit_char_is_unicode_digit (ecma_char_t c) /**< code unit */
|
|
||||||
{
|
|
||||||
/* Nd */
|
|
||||||
#define LIT_UNICODE_RANGE_ND(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "lit-unicode-ranges.inc.h"
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* lit_char_is_unicode_digit */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if specified character is a unicode connector punctuation
|
|
||||||
*
|
|
||||||
* Note:
|
|
||||||
* Unicode connector punctuation is a character, included into the following category:
|
|
||||||
* - Connector punctuation (Pc).
|
|
||||||
*
|
|
||||||
* See also:
|
|
||||||
* ECMA-262 v5, 7.6
|
|
||||||
*
|
|
||||||
* @return true - if specified character falls into the specified category,
|
|
||||||
* false - otherwise.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
lit_char_is_unicode_connector_punctuation (ecma_char_t c) /**< code unit */
|
|
||||||
{
|
|
||||||
/* Pc */
|
|
||||||
#define LIT_UNICODE_RANGE_PC(range_begin, range_end) \
|
|
||||||
if (c >= (range_begin) && c <= (range_end)) \
|
|
||||||
{ \
|
|
||||||
return true; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "lit-unicode-ranges.inc.h"
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} /* lit_char_is_unicode_connector_punctuation */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
|
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
|
||||||
|
|||||||
@@ -37,7 +37,6 @@ extern bool lit_char_is_format_control (ecma_char_t);
|
|||||||
#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
|
#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
|
||||||
/* LIT_CHAR_BOM is defined above */
|
/* LIT_CHAR_BOM is defined above */
|
||||||
|
|
||||||
extern bool lit_char_is_space_separator (ecma_char_t);
|
|
||||||
extern bool lit_char_is_white_space (ecma_char_t);
|
extern bool lit_char_is_white_space (ecma_char_t);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -77,9 +76,7 @@ extern bool lit_char_is_line_terminator (ecma_char_t);
|
|||||||
/* LIT_CHAR_BACKSLASH defined above */
|
/* LIT_CHAR_BACKSLASH defined above */
|
||||||
|
|
||||||
extern bool lit_char_is_unicode_letter (ecma_char_t);
|
extern bool lit_char_is_unicode_letter (ecma_char_t);
|
||||||
extern bool lit_char_is_unicode_combining_mark (ecma_char_t);
|
extern bool lit_char_is_unicode_non_letter_ident_part (ecma_char_t);
|
||||||
extern bool lit_char_is_unicode_digit (ecma_char_t);
|
|
||||||
extern bool lit_char_is_unicode_connector_punctuation (ecma_char_t);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Punctuator characters (ECMA-262 v5, 7.7)
|
* Punctuator characters (ECMA-262 v5, 7.7)
|
||||||
|
|||||||
+179
-4321
File diff suppressed because it is too large
Load Diff
@@ -96,9 +96,7 @@ util_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */
|
|||||||
}
|
}
|
||||||
|
|
||||||
return (lit_char_is_unicode_letter (chr)
|
return (lit_char_is_unicode_letter (chr)
|
||||||
|| lit_char_is_unicode_combining_mark (chr)
|
|| lit_char_is_unicode_non_letter_ident_part (chr));
|
||||||
|| lit_char_is_unicode_digit (chr)
|
|
||||||
|| lit_char_is_unicode_connector_punctuation (chr));
|
|
||||||
} /* util_is_identifier_part_character */
|
} /* util_is_identifier_part_character */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
+164
-118
@@ -17,124 +17,170 @@
|
|||||||
#
|
#
|
||||||
# http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
|
# http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# unicode categories: Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs Co Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
|
||||||
|
# letter: Lu Ll Lt Lm Lo Nl
|
||||||
|
# non-letter-indent-part:
|
||||||
|
# digit: Nd
|
||||||
|
# punctuation mark: Mn Mc
|
||||||
|
# connector punctuation: Pc
|
||||||
|
# separators: Zs
|
||||||
|
|
||||||
|
if [ $# -le 4 ]; then
|
||||||
|
echo "useage: print-unicode-ranges.sh <unicode-data-path> <-i y sp|y len|n> <-cat letters|non-let-indent-parts|separators>"
|
||||||
|
echo " -i: y sp - print interval starting points"
|
||||||
|
echo " y len - print interval lengths"
|
||||||
|
echo " n - print individual characters"
|
||||||
|
echo " -cat: whether print letters|non-let-indent-parts|separators category"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
STARTING_POINT="len"
|
||||||
|
|
||||||
UNICODE_DATA_PATH="$1"
|
UNICODE_DATA_PATH="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
#
|
while [ $# -gt 0 ]; do
|
||||||
# One of unicode character category names (Lu, Ll, Nl, etc.)
|
if [ $1 == "-i" ]; then
|
||||||
#
|
shift
|
||||||
UNICODE_CHAR_CATEGORY="$2"
|
PRINT_INTERVALS="$1"
|
||||||
|
if [ $PRINT_INTERVALS == "y" ]; then
|
||||||
|
shift
|
||||||
|
STARTING_POINT="$1"
|
||||||
|
echo $STARTING_POINT
|
||||||
|
fi
|
||||||
|
elif [ $1 == "-cat" ]; then
|
||||||
|
shift
|
||||||
|
CATEGORY="$1"
|
||||||
|
echo $CATEGORY
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
UNICODE_CHAR_CATEGORY_UPPER_CASE=`echo $UNICODE_CHAR_CATEGORY | tr '[:lower:]' '[:upper:]'`
|
awk -v desired_category="$CATEGORY" \
|
||||||
|
'BEGIN \
|
||||||
#
|
{ \
|
||||||
# 1. Print character codes, categories, and names
|
FS=";"; OFS=";" \
|
||||||
# 2. Filter by category
|
} \
|
||||||
# 3. Print character codes and names without categories
|
{ \
|
||||||
# 4. Sort
|
cat=$3; \
|
||||||
# 5. Add '0x' to each line
|
if (desired_category == "letters" && (cat == "Lu" || cat == "Ll" || cat == "Lt" || cat == "Lm" || cat == "Lo" || cat == "Nl")) \
|
||||||
# 6. Combine hexadecimal numbers into named ranges
|
{ \
|
||||||
# 7. Print ranges in format "LIT_UNICODE_RANGE_$UNICODE_CHAR_CATEGORY_UPPER_CASE (range_begin, range_end) /* range name */"
|
print "0x"$1, $2, $3; \
|
||||||
#
|
} \
|
||||||
|
else if (desired_category == "non-let-indent-parts" && (cat == "Nd" || cat == "Mn" || cat == "Mc" || cat == "Pc")) \
|
||||||
cut -d ';' "$UNICODE_DATA_PATH" -f 1,2,3 \
|
{ \
|
||||||
| grep ";$UNICODE_CHAR_CATEGORY\$" \
|
print "0x"$1, $2, $3; \
|
||||||
| cut -d ';' -f 1,2 \
|
} \
|
||||||
| sort \
|
else if (desired_category == "separators" && cat == "Zs") \
|
||||||
| awk 'BEGIN { FS=";"; OFS=";" } { print "0x"$1, $2; }' \
|
{ \
|
||||||
| awk --non-decimal-data \
|
print "0x"$1, $2, $3; \
|
||||||
'BEGIN \
|
} \
|
||||||
{ \
|
}' $UNICODE_DATA_PATH \
|
||||||
FS=";"; \
|
| gawk --non-decimal-data -v print_intervals="$PRINT_INTERVALS" -v sp="$STARTING_POINT" \
|
||||||
OFS=";"; \
|
'BEGIN \
|
||||||
is_in_range=0; \
|
{ \
|
||||||
} \
|
FS = ";"; \
|
||||||
\
|
OFS = ";"; \
|
||||||
function output_next_range () \
|
is_in_range = 0; \
|
||||||
{ \
|
print_count = 0; \
|
||||||
if (range_begin == range_prev) \
|
} \
|
||||||
{ \
|
\
|
||||||
print range_begin, range_prev, range_begin_name; \
|
function print_Nl() \
|
||||||
} \
|
{ \
|
||||||
else \
|
++print_count; \
|
||||||
{ \
|
if (print_count == 10) \
|
||||||
print range_begin, range_prev, range_begin_name, range_prev_name; \
|
{ \
|
||||||
} \
|
printf "\n"; \
|
||||||
} \
|
print_count = 0; \
|
||||||
\
|
} \
|
||||||
{ \
|
} \
|
||||||
if (is_in_range == 0) \
|
\
|
||||||
{ \
|
function output_next_range () \
|
||||||
is_in_range=1; \
|
{ \
|
||||||
range_begin=$1; \
|
if (range_begin != range_prev && print_intervals=="y") \
|
||||||
range_prev=$1; \
|
{ \
|
||||||
range_begin_name=$2; \
|
i1 = strtonum(range_begin); \
|
||||||
range_prev_name=$2; \
|
i2 = strtonum(range_prev); \
|
||||||
} \
|
len = i2 - i1; \
|
||||||
else \
|
# if the length of an interval is > 255 have to spilt it into 255-lenth ones
|
||||||
{ \
|
if (len > 255) \
|
||||||
if (range_prev + 1 == $1) \
|
{ \
|
||||||
{ \
|
numOfSubintervals = (len / 255); # more precisely number of subintervals - 1 \
|
||||||
range_prev=$1; \
|
for (i = 1; i <= numOfSubintervals; ++i) \
|
||||||
range_prev_name=$2
|
{ \
|
||||||
} \
|
if (sp == "sp") \
|
||||||
else \
|
{ \
|
||||||
{ \
|
printf "0X%X, ", i1; \
|
||||||
output_next_range(); \
|
print_Nl(); \
|
||||||
range_begin=$1; \
|
}
|
||||||
range_prev=$1; \
|
else \
|
||||||
range_begin_name=$2; \
|
{ \
|
||||||
range_prev_name=$2; \
|
printf "%d, ", 255; \
|
||||||
} \
|
print_Nl(); \
|
||||||
} \
|
} \
|
||||||
} \
|
i1 = i1 + 256; # next interval begins on the ending of the previous + 1 \
|
||||||
\
|
} \
|
||||||
END \
|
if (sp == "sp") \
|
||||||
{ \
|
{ \
|
||||||
output_next_range(); \
|
printf "0X%X, ", i1; \
|
||||||
}' \
|
print_Nl(); \
|
||||||
| awk \
|
} \
|
||||||
'BEGIN \
|
else \
|
||||||
{ \
|
{ \
|
||||||
FS = ";" \
|
printf "%d, ", len % 255 - (i-1); \
|
||||||
} \
|
print_Nl(); \
|
||||||
{ \
|
} \
|
||||||
range_string = sprintf ("LIT_UNICODE_RANGE_'$UNICODE_CHAR_CATEGORY_UPPER_CASE' (%s, %s)", $1, $2); \
|
} \
|
||||||
range_string_length = length (range_string); \
|
else \
|
||||||
\
|
{ \
|
||||||
range_begin_name=$3; \
|
if (sp == "sp") \
|
||||||
range_end_name=$4; \
|
{ \
|
||||||
\
|
printf "%s, ", range_begin; \
|
||||||
range_begin_name_length = length (range_begin_name); \
|
print_Nl(); \
|
||||||
range_end_name_length = length (range_end_name); \
|
} \
|
||||||
\
|
else \
|
||||||
printf "%s", range_string; \
|
{ \
|
||||||
if (range_end_name_length == 0) \
|
printf "%d, ", len; \
|
||||||
{ \
|
print_Nl(); \
|
||||||
printf " /* %s */\n", range_begin_name; \
|
} \
|
||||||
} \
|
} \
|
||||||
else \
|
} \
|
||||||
{ \
|
else if (range_begin == range_prev && print_intervals != "y")\
|
||||||
if (range_begin_name_length > range_end_name_length) \
|
{ \
|
||||||
{ \
|
printf "%s, ", range_begin; \
|
||||||
indent1 = 0; \
|
print_Nl(); \
|
||||||
indent2 = range_string_length + range_begin_name_length / 2;
|
} \
|
||||||
indent3 = range_string_length + (range_begin_name_length - range_end_name_length) / 2; \
|
} \
|
||||||
} \
|
\
|
||||||
else \
|
{ \
|
||||||
{ \
|
if (is_in_range == 0) \
|
||||||
indent1 = (range_end_name_length - range_begin_name_length) / 2; \
|
{ \
|
||||||
indent2 = range_string_length + range_end_name_length / 2;
|
is_in_range = 1; \
|
||||||
indent3 = range_string_length; \
|
range_begin = $1; \
|
||||||
} \
|
range_prev = $1; \
|
||||||
indent3 = indent3 + 3; \
|
range_begin_name = $2; \
|
||||||
fmt1 = sprintf (" /* %%%ds%%s\n", indent1); \
|
range_prev_name = $2; \
|
||||||
fmt2 = sprintf (" %%%ds<--->\n", indent2); \
|
} \
|
||||||
fmt3 = sprintf (" %%%ds%%s */\n", indent3); \
|
else \
|
||||||
\
|
{ \
|
||||||
printf fmt1, "", $3; \
|
if (range_prev + 1 == $1) \
|
||||||
printf fmt2, ""; \
|
{ \
|
||||||
printf fmt3, "", $4; \
|
range_prev = $1; \
|
||||||
} \
|
range_prev_name = $2
|
||||||
\
|
} \
|
||||||
printf "\n"; \
|
else \
|
||||||
}'
|
{ \
|
||||||
|
output_next_range(); \
|
||||||
|
range_begin = $1; \
|
||||||
|
range_prev=$1; \
|
||||||
|
range_begin_name = $2; \
|
||||||
|
range_prev_name = $2; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
END \
|
||||||
|
{ \
|
||||||
|
output_next_range(); \
|
||||||
|
}'
|
||||||
|
|||||||
Reference in New Issue
Block a user