Add preprocessor definitions for various character codes, referenced in ECMA-262, and helpers for checking if a character falls into a ECMA-defined character category; remove lit_char_is_carriage_return and lit_char_is_new_line helpers.
JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com
This commit is contained in:
@@ -16,44 +16,242 @@
|
||||
#include "lit-char-helpers.h"
|
||||
|
||||
/**
|
||||
* Check if specified character is the newline character
|
||||
* Check if specified character is one of the Format-Control characters
|
||||
*
|
||||
* @return true - if the character is "<LF>" character according to ECMA-262 v5, Table 3,
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 1,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_new_line (ecma_char_t c) /**< code unit */
|
||||
lit_char_is_format_control (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c == '\x0A');
|
||||
} /* lit_char_is_new_line */
|
||||
return (c == LIT_CHAR_ZWNJ
|
||||
|| c == LIT_CHAR_ZWJ
|
||||
|| c == LIT_CHAR_BOM);
|
||||
} /* lit_char_is_format_control */
|
||||
|
||||
/**
|
||||
* Check if specified character the carriage return character
|
||||
* Check if specified character is the Space Separator character
|
||||
*
|
||||
* @return true - if the character is "<CR>" character according to ECMA-262 v5, Table 3,
|
||||
* See also:
|
||||
* ECMA-262 v5, Table 2
|
||||
*
|
||||
* @return true - if the character falls into "Space, Separator" ("Zs") character category,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_carriage_return (ecma_char_t c) /**< code unit */
|
||||
lit_char_is_space_separator (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c == '\x0D');
|
||||
} /* lit_char_is_carriage_return */
|
||||
/* Zs */
|
||||
#define LIT_UNICODE_RANGE_ZS(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
#include "lit-unicode-ranges.inc.h"
|
||||
|
||||
return false;
|
||||
} /* lit_char_is_space_separator */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of LineTerminator (ECMA-262 v5, Table 3) characters
|
||||
* Check if specified character is one of the Whitespace characters
|
||||
*
|
||||
* @return true - if the character is one of LineTerminator characters,
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_white_space (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c == LIT_CHAR_TAB
|
||||
|| c == LIT_CHAR_VTAB
|
||||
|| c == LIT_CHAR_FF
|
||||
|| c == LIT_CHAR_SP
|
||||
|| c == LIT_CHAR_NBSP
|
||||
|| c == LIT_CHAR_BOM
|
||||
|| lit_char_is_space_separator (c));
|
||||
} /* lit_char_is_white_space */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of LineTerminator characters
|
||||
*
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
/* FIXME: Handle <LS> and <PS> (ECMA-262 v5, 7.3, Table 3) when Unicode would be supported */
|
||||
|
||||
return (lit_char_is_carriage_return (c)
|
||||
|| lit_char_is_new_line (c));
|
||||
return (c == LIT_CHAR_LF
|
||||
|| c == LIT_CHAR_CR
|
||||
|| c == LIT_CHAR_LS
|
||||
|| c == LIT_CHAR_PS);
|
||||
} /* lit_char_is_line_terminator */
|
||||
|
||||
/**
|
||||
* Check if specified character is a unicode letter
|
||||
*
|
||||
* Note:
|
||||
* Unicode letter is a character, included into one of the following categories:
|
||||
* - Uppercase letter (Lu);
|
||||
* - Lowercase letter (Ll);
|
||||
* - Titlecase letter (Lt);
|
||||
* - Modifier letter (Lm);
|
||||
* - Other letter (Lo);
|
||||
* - Letter number (Nl).
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v5, 7.6
|
||||
*
|
||||
* @return true - if specified character falls into one of the listed categories,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
/* Fast path for ASCII-defined letters */
|
||||
if ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Lu */
|
||||
#define LIT_UNICODE_RANGE_LU(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
/* Ll */
|
||||
#define LIT_UNICODE_RANGE_LL(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
/* Lt */
|
||||
#define LIT_UNICODE_RANGE_LT(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
/* Lm */
|
||||
#define LIT_UNICODE_RANGE_LM(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
/* Lo */
|
||||
#define LIT_UNICODE_RANGE_LO(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
/* Nl */
|
||||
#define LIT_UNICODE_RANGE_NL(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#include "lit-unicode-ranges.inc.h"
|
||||
|
||||
return false;
|
||||
} /* lit_char_is_unicode_letter */
|
||||
|
||||
/**
|
||||
* Check if specified character is a unicode combining mark
|
||||
*
|
||||
* Note:
|
||||
* Unicode combining mark is a character, included into one of the following categories:
|
||||
* - Non-spacing mark (Mn);
|
||||
* - Combining spacing mark (Mc).
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v5, 7.6
|
||||
*
|
||||
* @return true - if specified character falls into one of the listed categories,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_unicode_combining_mark (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
/* Mn */
|
||||
#define LIT_UNICODE_RANGE_MN(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
/* Mc */
|
||||
#define LIT_UNICODE_RANGE_MC(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#include "lit-unicode-ranges.inc.h"
|
||||
|
||||
return false;
|
||||
} /* lit_char_is_unicode_combining_mark */
|
||||
|
||||
/**
|
||||
* Check if specified character is a unicode digit
|
||||
*
|
||||
* Note:
|
||||
* Unicode digit is a character, included into the following category:
|
||||
* - Decimal number (Nd).
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v5, 7.6
|
||||
*
|
||||
* @return true - if specified character falls into the specified category,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_unicode_digit (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
/* Nd */
|
||||
#define LIT_UNICODE_RANGE_ND(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#include "lit-unicode-ranges.inc.h"
|
||||
|
||||
return false;
|
||||
} /* lit_char_is_unicode_digit */
|
||||
|
||||
/**
|
||||
* Check if specified character is a unicode connector punctuation
|
||||
*
|
||||
* Note:
|
||||
* Unicode connector punctuation is a character, included into the following category:
|
||||
* - Connector punctuation (Pc).
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v5, 7.6
|
||||
*
|
||||
* @return true - if specified character falls into the specified category,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_unicode_connector_punctuation (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
/* Pc */
|
||||
#define LIT_UNICODE_RANGE_PC(range_begin, range_end) \
|
||||
if (c >= (range_begin) && c <= (range_end)) \
|
||||
{ \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#include "lit-unicode-ranges.inc.h"
|
||||
|
||||
return false;
|
||||
} /* lit_char_is_unicode_connector_punctuation */
|
||||
|
||||
/**
|
||||
* Check if specified character is a word character (part of IsWordChar abstract operation)
|
||||
*
|
||||
@@ -65,10 +263,10 @@ lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
|
||||
bool
|
||||
lit_char_is_word_char (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
if ((c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '_')
|
||||
if ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
||||
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
|| c == LIT_CHAR_UNDERSCORE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -85,30 +283,18 @@ uint32_t
|
||||
lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
|
||||
* one of [0-9A-Fa-f] characters */
|
||||
{
|
||||
switch (c)
|
||||
if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
{
|
||||
case '0': return 0x0;
|
||||
case '1': return 0x1;
|
||||
case '2': return 0x2;
|
||||
case '3': return 0x3;
|
||||
case '4': return 0x4;
|
||||
case '5': return 0x5;
|
||||
case '6': return 0x6;
|
||||
case '7': return 0x7;
|
||||
case '8': return 0x8;
|
||||
case '9': return 0x9;
|
||||
case 'a':
|
||||
case 'A': return 0xA;
|
||||
case 'b':
|
||||
case 'B': return 0xB;
|
||||
case 'c':
|
||||
case 'C': return 0xC;
|
||||
case 'd':
|
||||
case 'D': return 0xD;
|
||||
case 'e':
|
||||
case 'E': return 0xE;
|
||||
case 'f':
|
||||
case 'F': return 0xF;
|
||||
default: JERRY_UNREACHABLE ();
|
||||
return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN);
|
||||
}
|
||||
else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
|
||||
{
|
||||
return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10);
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END);
|
||||
|
||||
return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10);
|
||||
}
|
||||
} /* lit_char_hex_to_int */
|
||||
|
||||
Reference in New Issue
Block a user