Add core unicode functionality.
Add utf-8 processing routines. Change ecma_char_t from char/uint16_t to uint16_t. Apply all utf-8 processing routines. Change char to jerry_api_char in API functions' declarations. JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
This commit is contained in:
@@ -18,25 +18,63 @@
|
||||
|
||||
#include "jrt.h"
|
||||
|
||||
#if CONFIG_ECMA_CHAR_ENCODING == CONFIG_ECMA_CHAR_ASCII
|
||||
/**
|
||||
* Description of an ecma-character
|
||||
* ECMAScript standard defines terms "code unit" and "character" as 16-bit unsigned value
|
||||
* used to represent 16-bit unit of text, this is the same as code unit in UTF-16 (See ECMA-262 5.1 Chapter 6).
|
||||
*
|
||||
* The term "code point" or "Unicode character" is used to refer a single Unicode scalar value (may be longer
|
||||
* than 16 bits: 0x0 - 0x10FFFFF). One code point could be represented with one ore two 16-bit code units.
|
||||
*
|
||||
* According to the standard all strings and source text are assumed to be a sequence of code units.
|
||||
* Length of a string equals to number of code units in the string, which is not the same as number of Unicode
|
||||
* characters in a string.
|
||||
*
|
||||
* Internally JerryScript engine uses UTF-8 representation of strings to reduce memory overhead. Unicode character
|
||||
* occupies from one to four bytes in UTF-8 representation.
|
||||
*
|
||||
* Unicode scalar value | Bytes in UTF-8 | Bytes in UTF-16
|
||||
* | (internal representation) |
|
||||
* ----------------------------------------------------------------------
|
||||
* 0x0 - 0x7F | 1 byte | 2 bytes
|
||||
* 0x80 - 0x7FF | 2 bytes | 2 bytes
|
||||
* 0x800 - 0xFFFF | 3 bytes | 2 bytes
|
||||
* 0x10000 - 0x10FFFF | 4 bytes | 4 bytes
|
||||
*
|
||||
* Scalar values from 0xD800 to 0xDFFF are permanently reserved by Unicode standard to encode high and low
|
||||
* surrogates in UTF-16 (Code points 0x10000 - 0x10FFFF are encoded via pair of surrogates in UTF-16).
|
||||
* Despite that the official Unicode standard says that no UTF forms can encode these code points, we allow
|
||||
* them to be encoded inside strings. The reason for that is compatibility with ECMA standard.
|
||||
*
|
||||
* For example, assume a string which consists one Unicode character: 0x1D700 (Mathematical Italic Small Epsilon).
|
||||
* It has the following representation in UTF-16: 0xD835 0xDF00.
|
||||
*
|
||||
* ECMA standard allows extracting a substring from this string:
|
||||
* > var str = String.fromCharCode (0xD835, 0xDF00); // Create a string containing one character: 0x1D700
|
||||
* > str.length; // 2
|
||||
* > var str1 = str.substring (0, 1);
|
||||
* > str1.length; // 1
|
||||
* > str1.charCodeAt (0); // 55349 (this equals to 0xD835)
|
||||
*
|
||||
* Internally original string would be represented in UTF-8 as the following byte sequence: 0xF0 0x9D 0x9C 0x80.
|
||||
* After substring extraction high surrogate 0xD835 should be encoded via UTF-8: 0xED 0xA0 0xB5.
|
||||
*
|
||||
* Pair of low and high surrogates encoded separately should never occur in internal string representation,
|
||||
* it should be encoded as any code point and occupy 4 bytes. So, when constructing a string from two surrogates,
|
||||
* it should be processed gracefully;
|
||||
* > var str1 = String.fromCharCode (0xD835); // 0xED 0xA0 0xB5 - internal representation
|
||||
* > var str2 = String.fromCharCode (0xDF00); // 0xED 0xBC 0x80 - internal representation
|
||||
* > var str = str1 + str2; // 0xF0 0x9D 0x9C 0x80 - internal representation,
|
||||
* // !!! not 0xED 0xA0 0xB5 0xED 0xBC 0x80
|
||||
*/
|
||||
typedef uint8_t ecma_char_t;
|
||||
#elif CONFIG_ECMA_CHAR_ENCODING == CONFIG_ECMA_CHAR_UTF16
|
||||
|
||||
/**
|
||||
* Description of an ecma-character
|
||||
* Description of an ecma-character, which represents 16-bit code unit,
|
||||
* which is equal to UTF-16 character (see Chapter 6 from ECMA-262 5.1)
|
||||
*/
|
||||
typedef uint16_t ecma_char_t;
|
||||
#endif /* CONFIG_ECMA_CHAR_ENCODING == CONFIG_ECMA_CHAR_UTF16 */
|
||||
|
||||
/**
|
||||
* Description of an ecma-character pointer
|
||||
*/
|
||||
typedef ecma_char_t *ecma_char_ptr_t;
|
||||
|
||||
/**
|
||||
* Null character (zt-string end marker)
|
||||
* Null character
|
||||
*/
|
||||
#define ECMA_CHAR_NULL ((ecma_char_t) '\0')
|
||||
|
||||
@@ -45,13 +83,38 @@ typedef ecma_char_t *ecma_char_ptr_t;
|
||||
*/
|
||||
typedef uint32_t ecma_length_t;
|
||||
|
||||
/**
|
||||
* Description of an ecma-character pointer
|
||||
*/
|
||||
typedef ecma_char_t *ecma_char_ptr_t;
|
||||
|
||||
/**
|
||||
* Max bytes needed to represent a code unit (utf-16 char) via utf-8 encoding
|
||||
*/
|
||||
#define LIT_UTF8_MAX_BYTES_IN_CODE_UNIT (3)
|
||||
|
||||
/**
|
||||
* A byte of utf-8 string
|
||||
*/
|
||||
typedef uint8_t lit_utf8_byte_t;
|
||||
|
||||
/**
|
||||
* Size of a utf-8 string in bytes
|
||||
*/
|
||||
typedef uint32_t lit_utf8_size_t;
|
||||
|
||||
/**
|
||||
* Unicode code point
|
||||
*/
|
||||
typedef uint32_t lit_code_point_t;
|
||||
|
||||
/**
|
||||
* ECMA string hash
|
||||
*/
|
||||
typedef uint8_t lit_string_hash_t;
|
||||
|
||||
/**
|
||||
* Length of string hash, in bits
|
||||
* ECMA string hash value length, in bits
|
||||
*/
|
||||
#define LIT_STRING_HASH_BITS (sizeof (lit_string_hash_t) * JERRY_BITSINBYTE)
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "lit-literal-storage.h"
|
||||
#include "ecma-helpers.h"
|
||||
#include "lit-literal.h"
|
||||
#include "lit-magic-strings.h"
|
||||
|
||||
/**
|
||||
* Literal storage
|
||||
@@ -57,18 +58,18 @@ lit_charset_record_t::set_prev (rcs_record_t *prev_rec_p) /**< pointer to the re
|
||||
* Set the charset of the record
|
||||
*/
|
||||
void
|
||||
lit_charset_record_t::set_charset (const ecma_char_t *str, /**< buffer containing characters to set */
|
||||
size_t size) /**< size of the buffer in bytes */
|
||||
lit_charset_record_t::set_charset (const lit_utf8_byte_t *str, /**< buffer containing characters to set */
|
||||
lit_utf8_size_t size) /**< size of the buffer in bytes */
|
||||
{
|
||||
JERRY_ASSERT (header_size () + size == get_size () - get_alignment_bytes_count ());
|
||||
|
||||
rcs_record_iterator_t it ((rcs_recordset_t *)&lit_storage, (rcs_record_t *)this);
|
||||
it.skip (header_size ());
|
||||
|
||||
for (size_t i = 0; i < get_length (); ++i)
|
||||
for (lit_utf8_size_t i = 0; i < get_length (); ++i)
|
||||
{
|
||||
it.write<ecma_char_t> (str[i]);
|
||||
it.skip<ecma_char_t> ();
|
||||
it.write<lit_utf8_byte_t> (str[i]);
|
||||
it.skip<lit_utf8_byte_t> ();
|
||||
}
|
||||
} /* lit_charset_record_t::set_charset */
|
||||
|
||||
@@ -77,38 +78,39 @@ lit_charset_record_t::set_charset (const ecma_char_t *str, /**< buffer containin
|
||||
*
|
||||
* @return number of code units written to the buffer
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_charset_record_t::get_charset (ecma_char_t *buff, /**< output buffer */
|
||||
lit_utf8_size_t
|
||||
lit_charset_record_t::get_charset (lit_utf8_byte_t *buff, /**< output buffer */
|
||||
size_t size) /**< size of the output buffer in bytes */
|
||||
{
|
||||
JERRY_ASSERT (buff && size >= sizeof (ecma_char_t));
|
||||
JERRY_ASSERT (buff && size >= sizeof (lit_utf8_byte_t));
|
||||
|
||||
rcs_record_iterator_t it ((rcs_recordset_t *)&lit_storage, (rcs_record_t *)this);
|
||||
it.skip (header_size ());
|
||||
ecma_length_t len = get_length ();
|
||||
size_t i;
|
||||
lit_utf8_size_t len = get_length ();
|
||||
lit_utf8_size_t i;
|
||||
|
||||
for (i = 0; i < len && size > sizeof (ecma_char_t); ++i)
|
||||
for (i = 0; i < len && size > 0; ++i)
|
||||
{
|
||||
buff[i] = it.read<ecma_char_t> ();
|
||||
it.skip<ecma_char_t> ();
|
||||
size -= sizeof (ecma_char_t);
|
||||
buff[i] = it.read<lit_utf8_byte_t> ();
|
||||
it.skip<lit_utf8_byte_t> ();
|
||||
size -= sizeof (lit_utf8_byte_t);
|
||||
}
|
||||
|
||||
return (ecma_length_t) i;
|
||||
return i;
|
||||
} /* lit_charset_record_t::get_charset */
|
||||
|
||||
/**
|
||||
* Compares characters from the record to the string
|
||||
*
|
||||
* @return 0 if strings are equal
|
||||
* -1 if str2 is greater
|
||||
* 1 if str2 is less
|
||||
* -1 if str_to_compare_with is greater
|
||||
* 1 if str_to_compare_with is less
|
||||
*/
|
||||
int
|
||||
lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< buffer with string to compare */
|
||||
size_t length) /**< length of the string in buffer str2 */
|
||||
lit_charset_record_t::compare_utf8 (const lit_utf8_byte_t *str_to_compare_with, /**< buffer with string to compare */
|
||||
lit_utf8_size_t str_size) /**< size of the string */
|
||||
{
|
||||
TODO ("Support utf-8 in comparison.");
|
||||
size_t i;
|
||||
|
||||
if (get_length () == 0)
|
||||
@@ -132,9 +134,9 @@ lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< b
|
||||
|
||||
it_this.skip (header_size ());
|
||||
|
||||
for (i = 0; i < get_length () && i < length; i++)
|
||||
for (i = 0; i < get_length () && i < str_size; i++)
|
||||
{
|
||||
ecma_char_t chr = it_this.read<ecma_char_t> ();
|
||||
lit_utf8_byte_t chr = it_this.read<lit_utf8_byte_t> ();
|
||||
|
||||
if (chr > str_to_compare_with[i])
|
||||
{
|
||||
@@ -145,10 +147,10 @@ lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< b
|
||||
return -1;
|
||||
}
|
||||
|
||||
it_this.skip<ecma_char_t> ();
|
||||
it_this.skip<lit_utf8_byte_t> ();
|
||||
}
|
||||
|
||||
if (i < length)
|
||||
if (i < str_size)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
@@ -163,7 +165,7 @@ lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< b
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_charset_record_t::equal (lit_charset_record_t *rec) /**< charset record to compare with */
|
||||
lit_charset_record_t::is_equal (lit_charset_record_t *rec) /**< charset record to compare with */
|
||||
{
|
||||
if (get_length () != rec->get_length ())
|
||||
{
|
||||
@@ -176,31 +178,19 @@ lit_charset_record_t::equal (lit_charset_record_t *rec) /**< charset record to c
|
||||
it_this.skip (header_size ());
|
||||
it_record.skip (rec->header_size ());
|
||||
|
||||
for (ecma_length_t i = 0; i < get_length (); i++)
|
||||
for (lit_utf8_size_t i = 0; i < get_length (); i++)
|
||||
{
|
||||
if (it_this.read<ecma_char_t> () != it_record.read<ecma_char_t> ())
|
||||
if (it_this.read<lit_utf8_byte_t> () != it_record.read<lit_utf8_byte_t> ())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
it_this.skip<ecma_char_t> ();
|
||||
it_record.skip<ecma_char_t> ();
|
||||
it_this.skip<lit_utf8_byte_t> ();
|
||||
it_record.skip<lit_utf8_byte_t> ();
|
||||
}
|
||||
|
||||
return true;
|
||||
} /* lit_charset_record_t::equal */
|
||||
|
||||
/**
|
||||
* Compares this lit_charset_record_t records with zero-terminated string for equality
|
||||
*
|
||||
* @return true if compared instances are equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_charset_record_t::equal_zt (const ecma_char_t *str) /**< zero-terminated string */
|
||||
{
|
||||
return equal_non_zt (str, ecma_zt_string_length (str));
|
||||
} /* lit_charset_record_t::equal_zt */
|
||||
} /* lit_charset_record_t::is_equal */
|
||||
|
||||
/**
|
||||
* Compare this lit_charset_record_t record with string (which could contain '\0' characters) for equality
|
||||
@@ -209,24 +199,24 @@ lit_charset_record_t::equal_zt (const ecma_char_t *str) /**< zero-terminated str
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_charset_record_t::equal_non_zt (const ecma_char_t *str, /**< string to compare with */
|
||||
ecma_length_t len) /**< length of the string */
|
||||
lit_charset_record_t::is_equal_utf8_string (const lit_utf8_byte_t *str, /**< string to compare with */
|
||||
lit_utf8_size_t str_size) /**< length of the string */
|
||||
{
|
||||
rcs_record_iterator_t it_this (&lit_storage, this);
|
||||
|
||||
it_this.skip (header_size ());
|
||||
|
||||
for (ecma_length_t i = 0; i < get_length () && i < len; i++)
|
||||
for (lit_utf8_size_t i = 0; i < get_length () && i < str_size; i++)
|
||||
{
|
||||
if (it_this.read<ecma_char_t> () != str[i])
|
||||
if (it_this.read<lit_utf8_byte_t> () != str[i])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
it_this.skip<ecma_char_t> ();
|
||||
it_this.skip<lit_utf8_byte_t> ();
|
||||
}
|
||||
|
||||
return get_length () == len;
|
||||
return get_length () == str_size;
|
||||
} /* lit_charset_record_t::equal_non_zt */
|
||||
|
||||
/**
|
||||
@@ -235,9 +225,9 @@ lit_charset_record_t::equal_non_zt (const ecma_char_t *str, /**< string to compa
|
||||
* @return pointer to the created record
|
||||
*/
|
||||
lit_charset_record_t *
|
||||
lit_literal_storage_t::create_charset_record (const ecma_char_t *str, /**< string to be placed in the record */
|
||||
size_t buf_size) /**< size in bytes of the buffer which holds the
|
||||
* string */
|
||||
lit_literal_storage_t::create_charset_record (const lit_utf8_byte_t *str, /**< string to be placed in the record */
|
||||
lit_utf8_size_t buf_size) /**< size in bytes of the buffer which holds the
|
||||
* string */
|
||||
{
|
||||
const size_t alignment = lit_charset_record_t::size (buf_size) - (lit_charset_record_t::header_size () + buf_size);
|
||||
|
||||
@@ -245,7 +235,7 @@ lit_literal_storage_t::create_charset_record (const ecma_char_t *str, /**< strin
|
||||
|
||||
ret->set_alignment_bytes_count (alignment);
|
||||
ret->set_charset (str, buf_size);
|
||||
ret->set_hash (ecma_chars_buffer_calc_hash_last_chars (str, ret->get_length ()));
|
||||
ret->set_hash (lit_utf8_string_calc_hash_last_bytes (str, ret->get_length ()));
|
||||
|
||||
return ret;
|
||||
} /* lit_literal_storage_t::create_charset_record */
|
||||
@@ -319,8 +309,9 @@ lit_literal_storage_t::dump ()
|
||||
|
||||
for (size_t i = 0; i < lit_p->get_length (); ++i)
|
||||
{
|
||||
printf ("%c", it_this.read<ecma_char_t> ());
|
||||
it_this.skip<ecma_char_t> ();
|
||||
FIXME ("Support proper printing of characters which occupy more than one byte.")
|
||||
printf ("%c", it_this.read<lit_utf8_byte_t> ());
|
||||
it_this.skip<lit_utf8_byte_t> ();
|
||||
}
|
||||
|
||||
printf (" : STRING");
|
||||
@@ -330,7 +321,7 @@ lit_literal_storage_t::dump ()
|
||||
case LIT_MAGIC_STR:
|
||||
{
|
||||
lit_magic_string_id_t id = lit_magic_record_get_magic_str_id (rec_p);
|
||||
printf ("%s : MAGIC STRING", lit_get_magic_string_zt (id));
|
||||
printf ("%s : MAGIC STRING", lit_get_magic_string_utf8 (id));
|
||||
printf (" [id=%d] ", id);
|
||||
|
||||
break;
|
||||
@@ -338,7 +329,7 @@ lit_literal_storage_t::dump ()
|
||||
case LIT_MAGIC_STR_EX:
|
||||
{
|
||||
lit_magic_string_ex_id_t id = lit_magic_record_ex_get_magic_str_id (rec_p);
|
||||
printf ("%s : EXT MAGIC STRING", lit_get_magic_string_ex_zt (id));
|
||||
printf ("%s : EXT MAGIC STRING", lit_get_magic_string_ex_utf8 (id));
|
||||
printf (" [id=%d] ", id);
|
||||
|
||||
break;
|
||||
@@ -353,8 +344,8 @@ lit_literal_storage_t::dump ()
|
||||
}
|
||||
else
|
||||
{
|
||||
ecma_char_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
ecma_number_to_zt_string (lit_p->get_number (), buff, ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER);
|
||||
lit_utf8_byte_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
ecma_number_to_utf8_string (lit_p->get_number (), buff, sizeof (buff));
|
||||
printf ("%s : NUMBER", buff);
|
||||
}
|
||||
|
||||
@@ -465,12 +456,12 @@ lit_literal_storage_t::get_record_size (rcs_record_t* rec_p) /**< pointer to a r
|
||||
}
|
||||
} /* lit_literal_storage_t::get_record_size */
|
||||
|
||||
template void rcs_record_iterator_t::skip<ecma_char_t> ();
|
||||
template void rcs_record_iterator_t::skip<uint8_t> ();
|
||||
template void rcs_record_iterator_t::skip<uint16_t> ();
|
||||
template void rcs_record_iterator_t::skip<uint32_t> ();
|
||||
|
||||
template void rcs_record_iterator_t::write<ecma_char_t> (ecma_char_t);
|
||||
template ecma_char_t rcs_record_iterator_t::read<ecma_char_t> ();
|
||||
template void rcs_record_iterator_t::write<uint8_t> (uint8_t);
|
||||
template uint8_t rcs_record_iterator_t::read<uint8_t> ();
|
||||
|
||||
template void rcs_record_iterator_t::write<ecma_number_t> (ecma_number_t);
|
||||
template ecma_number_t rcs_record_iterator_t::read<ecma_number_t> ();
|
||||
|
||||
@@ -106,12 +106,12 @@ public:
|
||||
/**
|
||||
* Get the length of the string, which is contained inside the record
|
||||
*
|
||||
* @return length of the string (count of the ecma_char_t characters inside the charset)
|
||||
* @return length of the string (bytes count)
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_utf8_size_t
|
||||
get_length () const
|
||||
{
|
||||
return (ecma_length_t) ((get_size () - header_size () - get_alignment_bytes_count ()) / sizeof (ecma_char_t));
|
||||
return (lit_utf8_size_t) (get_size () - header_size () - get_alignment_bytes_count ());
|
||||
} /* get_length */
|
||||
|
||||
/**
|
||||
@@ -127,12 +127,11 @@ public:
|
||||
|
||||
rcs_record_t *get_prev () const;
|
||||
|
||||
ecma_length_t get_charset (ecma_char_t *buff, size_t size);
|
||||
lit_utf8_size_t get_charset (lit_utf8_byte_t *, size_t);
|
||||
|
||||
int compare_zt (const ecma_char_t *, size_t);
|
||||
bool equal (lit_charset_record_t *);
|
||||
bool equal_zt (const ecma_char_t *);
|
||||
bool equal_non_zt (const ecma_char_t *, ecma_length_t);
|
||||
int compare_utf8 (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
bool is_equal (lit_charset_record_t *);
|
||||
bool is_equal_utf8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
private:
|
||||
/**
|
||||
@@ -157,7 +156,7 @@ private:
|
||||
|
||||
void set_prev (rcs_record_t *);
|
||||
|
||||
void set_charset (const ecma_char_t *, size_t);
|
||||
void set_charset (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/**
|
||||
* Offset and length of 'alignment' field, in bits
|
||||
@@ -242,7 +241,6 @@ public:
|
||||
magic_string_id_t get_magic_str_id () const
|
||||
{
|
||||
uint32_t id = get_field (magic_field_pos, magic_field_width);
|
||||
// JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
|
||||
return (magic_string_id_t) id;
|
||||
} /* get_magic_str_id */
|
||||
|
||||
@@ -303,9 +301,10 @@ private:
|
||||
* Layout:
|
||||
* ------- header -----------------------
|
||||
* type (4 bits)
|
||||
* magic string id (12 bits)
|
||||
* padding (12 bits)
|
||||
* pointer to prev (16 bits)
|
||||
* --------------------------------------
|
||||
* ecma_number_t
|
||||
*/
|
||||
class lit_number_record_t : public rcs_record_t
|
||||
{
|
||||
@@ -417,7 +416,7 @@ public:
|
||||
LIT_NUMBER
|
||||
};
|
||||
|
||||
lit_charset_record_t *create_charset_record (const ecma_char_t *, size_t);
|
||||
lit_charset_record_t *create_charset_record (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
lit_magic_record_t *create_magic_record (lit_magic_string_id_t);
|
||||
lit_magic_record_t *create_magic_record_ex (lit_magic_string_ex_id_t);
|
||||
lit_number_record_t *create_number_record (ecma_number_t);
|
||||
|
||||
+137
-82
@@ -14,7 +14,9 @@
|
||||
*/
|
||||
|
||||
#include "lit-literal.h"
|
||||
|
||||
#include "ecma-helpers.h"
|
||||
#include "lit-magic-strings.h"
|
||||
|
||||
/**
|
||||
* Initialize literal storage
|
||||
@@ -54,43 +56,43 @@ lit_dump_literals ()
|
||||
* @return pointer to created record
|
||||
*/
|
||||
literal_t
|
||||
lit_create_literal_from_charset (const ecma_char_t *str, /**< string to initialize the record,
|
||||
* could be non-zero-terminated */
|
||||
ecma_length_t len) /**< length of the string */
|
||||
lit_create_literal_from_utf8_string (const lit_utf8_byte_t *str_p, /**< string to initialize the record,
|
||||
* could be non-zero-terminated */
|
||||
lit_utf8_size_t str_size) /**< length of the string */
|
||||
{
|
||||
JERRY_ASSERT (str || !len);
|
||||
JERRY_ASSERT (str_p || !str_size);
|
||||
for (lit_magic_string_id_t msi = (lit_magic_string_id_t) 0;
|
||||
msi < LIT_MAGIC_STRING__COUNT;
|
||||
msi = (lit_magic_string_id_t) (msi + 1))
|
||||
{
|
||||
if (ecma_zt_string_length (lit_get_magic_string_zt (msi)) != len)
|
||||
if (lit_get_magic_string_size (msi) != str_size)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp ((const char *) str, (const char *) lit_get_magic_string_zt (msi), len))
|
||||
if (!strncmp ((const char *) str_p, (const char *) lit_get_magic_string_utf8 (msi), str_size))
|
||||
{
|
||||
return lit_storage.create_magic_record (msi);
|
||||
}
|
||||
}
|
||||
|
||||
for (lit_magic_string_ex_id_t msi = (lit_magic_string_ex_id_t) 0;
|
||||
msi < ecma_get_magic_string_ex_count ();
|
||||
msi < lit_get_magic_string_ex_count ();
|
||||
msi = (lit_magic_string_ex_id_t) (msi + 1))
|
||||
{
|
||||
if (ecma_zt_string_length (lit_get_magic_string_ex_zt (msi)) != len)
|
||||
if (lit_get_magic_string_ex_size (msi) != str_size)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp ((const char *) str, (const char *) lit_get_magic_string_ex_zt (msi), len))
|
||||
if (!strncmp ((const char *) str_p, (const char *) lit_get_magic_string_ex_utf8 (msi), str_size))
|
||||
{
|
||||
return lit_storage.create_magic_record_ex (msi);
|
||||
}
|
||||
}
|
||||
|
||||
return lit_storage.create_charset_record (str, len * sizeof (ecma_char_t));
|
||||
} /* lit_create_literal_from_charset */
|
||||
return lit_storage.create_charset_record (str_p, str_size);
|
||||
} /* lit_create_literal_from_utf8_string */
|
||||
|
||||
/**
|
||||
* Find a literal in literal storage.
|
||||
@@ -99,22 +101,22 @@ lit_create_literal_from_charset (const ecma_char_t *str, /**< string to initiali
|
||||
* @return pointer to a literal or NULL if no corresponding literal exists
|
||||
*/
|
||||
literal_t
|
||||
lit_find_literal_by_charset (const ecma_char_t *str, /**< a string to search for */
|
||||
ecma_length_t len) /**< length of the string */
|
||||
lit_find_literal_by_utf8_string (const lit_utf8_byte_t *str_p, /**< a string to search for */
|
||||
lit_utf8_size_t str_size) /**< length of the string */
|
||||
{
|
||||
JERRY_ASSERT (str || !len);
|
||||
JERRY_ASSERT (str_p || !str_size);
|
||||
for (literal_t lit = lit_storage.get_first (); lit != NULL; lit = lit_storage.get_next (lit))
|
||||
{
|
||||
rcs_record_t::type_t type = lit->get_type ();
|
||||
|
||||
if (type == LIT_STR_T)
|
||||
{
|
||||
if (static_cast<lit_charset_record_t *>(lit)->get_length () != len)
|
||||
if (static_cast<lit_charset_record_t *>(lit)->get_length () != str_size)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!static_cast<lit_charset_record_t *>(lit)->compare_zt (str, len))
|
||||
if (!static_cast<lit_charset_record_t *>(lit)->compare_utf8 (str_p, str_size))
|
||||
{
|
||||
return lit;
|
||||
}
|
||||
@@ -122,14 +124,14 @@ lit_find_literal_by_charset (const ecma_char_t *str, /**< a string to search for
|
||||
else if (type == LIT_MAGIC_STR_T)
|
||||
{
|
||||
lit_magic_string_id_t magic_id = lit_magic_record_get_magic_str_id (lit);
|
||||
const char *magic_str = (const char *) lit_get_magic_string_zt (magic_id);
|
||||
const lit_utf8_byte_t *magic_str_p = lit_get_magic_string_utf8 (magic_id);
|
||||
|
||||
if (strlen (magic_str) != len)
|
||||
if (lit_zt_utf8_string_size (magic_str_p) != str_size)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp (magic_str, (const char *) str, strlen (magic_str)))
|
||||
if (!strncmp ((const char *) magic_str_p, (const char *) str_p, str_size))
|
||||
{
|
||||
return lit;
|
||||
}
|
||||
@@ -137,14 +139,14 @@ lit_find_literal_by_charset (const ecma_char_t *str, /**< a string to search for
|
||||
else if (type == LIT_MAGIC_STR_EX_T)
|
||||
{
|
||||
lit_magic_string_ex_id_t magic_id = lit_magic_record_ex_get_magic_str_id (lit);
|
||||
const char *magic_str = (const char *) lit_get_magic_string_ex_zt (magic_id);
|
||||
const lit_utf8_byte_t *magic_str_p = lit_get_magic_string_ex_utf8 (magic_id);
|
||||
|
||||
if (strlen (magic_str) != len)
|
||||
if (lit_zt_utf8_string_size (magic_str_p) != str_size)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp (magic_str, (const char *) str, strlen (magic_str)))
|
||||
if (!strncmp ((const char *) magic_str_p, (const char *) str_p, str_size))
|
||||
{
|
||||
return lit;
|
||||
}
|
||||
@@ -152,7 +154,7 @@ lit_find_literal_by_charset (const ecma_char_t *str, /**< a string to search for
|
||||
}
|
||||
|
||||
return NULL;
|
||||
} /* lit_find_literal_by_charset */
|
||||
} /* lit_find_literal_by_utf8_string */
|
||||
|
||||
/**
|
||||
* Check if a literal which holds the passed string exists.
|
||||
@@ -161,18 +163,18 @@ lit_find_literal_by_charset (const ecma_char_t *str, /**< a string to search for
|
||||
* @return pointer to existing or newly created record
|
||||
*/
|
||||
literal_t
|
||||
lit_find_or_create_literal_from_charset (const ecma_char_t *str, /**< string, could be non-zero-terminated */
|
||||
ecma_length_t len) /**< length of the string */
|
||||
lit_find_or_create_literal_from_utf8_string (const lit_utf8_byte_t *str_p, /**< string, could be non-zero-terminated */
|
||||
lit_utf8_size_t str_size) /**< length of the string */
|
||||
{
|
||||
literal_t lit = lit_find_literal_by_charset (str, len);
|
||||
literal_t lit = lit_find_literal_by_utf8_string (str_p, str_size);
|
||||
|
||||
if (lit == NULL)
|
||||
{
|
||||
lit = lit_create_literal_from_charset (str, len);
|
||||
lit = lit_create_literal_from_utf8_string (str_p, str_size);
|
||||
}
|
||||
|
||||
return lit;
|
||||
} /* lit_find_or_create_literal_from_s */
|
||||
} /* lit_find_or_create_literal_from_utf8_string */
|
||||
|
||||
|
||||
/**
|
||||
@@ -235,7 +237,7 @@ lit_find_literal_by_num (ecma_number_t num) /**< a number to search for */
|
||||
/**
|
||||
* Check if literal equals to charset record
|
||||
*
|
||||
* @return true if equal
|
||||
* @return true if is_equal
|
||||
* false otherwise
|
||||
*/
|
||||
static bool
|
||||
@@ -246,24 +248,28 @@ lit_literal_equal_charset_rec (literal_t lit, /**< literal to com
|
||||
{
|
||||
case LIT_STR_T:
|
||||
{
|
||||
return static_cast<lit_charset_record_t *>(lit)->equal (record);
|
||||
return static_cast<lit_charset_record_t *>(lit)->is_equal (record);
|
||||
}
|
||||
case LIT_MAGIC_STR_T:
|
||||
{
|
||||
return record->equal_zt (lit_get_magic_string_zt (lit_magic_record_get_magic_str_id (lit)));
|
||||
lit_magic_string_id_t magic_string_id = lit_magic_record_get_magic_str_id (lit);
|
||||
return record->is_equal_utf8_string (lit_get_magic_string_utf8 (magic_string_id),
|
||||
lit_get_magic_string_size (magic_string_id));
|
||||
}
|
||||
case LIT_MAGIC_STR_EX_T:
|
||||
{
|
||||
return record->equal_zt (lit_get_magic_string_ex_zt (lit_magic_record_ex_get_magic_str_id (lit)));
|
||||
lit_magic_string_ex_id_t magic_string_id = lit_magic_record_ex_get_magic_str_id (lit);
|
||||
return record->is_equal_utf8_string (lit_get_magic_string_ex_utf8 (magic_string_id),
|
||||
lit_get_magic_string_ex_size (magic_string_id));
|
||||
}
|
||||
case LIT_NUMBER_T:
|
||||
{
|
||||
ecma_char_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
ecma_number_to_zt_string (static_cast<lit_number_record_t *>(lit)->get_number (),
|
||||
buff,
|
||||
ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER);
|
||||
lit_utf8_byte_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
lit_utf8_size_t copied = ecma_number_to_utf8_string (static_cast<lit_number_record_t *>(lit)->get_number (),
|
||||
buff,
|
||||
sizeof (buff));
|
||||
|
||||
return record->equal_zt (buff);
|
||||
return record->is_equal_utf8_string (buff, copied);
|
||||
}
|
||||
default:
|
||||
{
|
||||
@@ -273,46 +279,47 @@ lit_literal_equal_charset_rec (literal_t lit, /**< literal to com
|
||||
} /* lit_literal_equal_charset_rec */
|
||||
|
||||
/**
|
||||
* Check if literal equals to zero-terminated string
|
||||
* Check if literal equals to utf-8 string
|
||||
*
|
||||
* @return true if equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_literal_equal_zt (literal_t lit, /**< literal to compare */
|
||||
const ecma_char_t *str) /**< zero-terminated string to compare */
|
||||
lit_literal_equal_utf8 (literal_t lit, /**< literal to compare */
|
||||
const lit_utf8_byte_t *str_p, /**< utf-8 string to compare */
|
||||
lit_utf8_size_t str_size) /**< string size in bytes */
|
||||
{
|
||||
switch (lit->get_type ())
|
||||
{
|
||||
case LIT_STR_T:
|
||||
{
|
||||
return static_cast<lit_charset_record_t *>(lit)->equal_zt (str);
|
||||
return static_cast<lit_charset_record_t *>(lit)->is_equal_utf8_string (str_p, str_size);
|
||||
}
|
||||
case LIT_MAGIC_STR_T:
|
||||
{
|
||||
lit_magic_string_id_t magic_id = lit_magic_record_get_magic_str_id (lit);
|
||||
return ecma_compare_zt_strings (str, lit_get_magic_string_zt (magic_id));
|
||||
return lit_compare_utf8_string_and_magic_string (str_p, str_size, magic_id);
|
||||
}
|
||||
case LIT_MAGIC_STR_EX_T:
|
||||
{
|
||||
lit_magic_string_ex_id_t magic_id = lit_magic_record_ex_get_magic_str_id (lit);
|
||||
return ecma_compare_zt_strings (str, lit_get_magic_string_ex_zt (magic_id));
|
||||
return lit_compare_utf8_string_and_magic_string_ex (str_p, str_size, magic_id);
|
||||
}
|
||||
case LIT_NUMBER_T:
|
||||
{
|
||||
ecma_char_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
ecma_number_to_zt_string (static_cast<lit_number_record_t *>(lit)->get_number (),
|
||||
buff,
|
||||
ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER);
|
||||
lit_utf8_byte_t num_buf[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
lit_utf8_size_t num_size = ecma_number_to_utf8_string (static_cast<lit_number_record_t *>(lit)->get_number (),
|
||||
num_buf,
|
||||
sizeof (num_buf));
|
||||
|
||||
return ecma_compare_zt_strings (str, buff);
|
||||
return lit_compare_utf8_strings (str_p, str_size, num_buf, num_size);
|
||||
}
|
||||
default:
|
||||
{
|
||||
JERRY_UNREACHABLE ();
|
||||
}
|
||||
}
|
||||
} /* lit_literal_equal_zt */
|
||||
} /* lit_literal_equal_utf8 */
|
||||
|
||||
/**
|
||||
* Check if literal contains the string equal to the passed number
|
||||
@@ -324,10 +331,10 @@ bool
|
||||
lit_literal_equal_num (literal_t lit, /**< literal to check */
|
||||
ecma_number_t num) /**< number to compare with */
|
||||
{
|
||||
ecma_char_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
ecma_number_to_zt_string (num, buff, ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER);
|
||||
lit_utf8_byte_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
|
||||
lit_utf8_size_t copied = ecma_number_to_utf8_string (num, buff, sizeof (buff));
|
||||
|
||||
return lit_literal_equal_zt (lit, buff);
|
||||
return lit_literal_equal_utf8 (lit, buff, copied);
|
||||
} /* lit_literal_equal_num */
|
||||
|
||||
/**
|
||||
@@ -348,11 +355,17 @@ lit_literal_equal (literal_t lit1, /**< first literal */
|
||||
}
|
||||
case lit_literal_storage_t::LIT_MAGIC_STR:
|
||||
{
|
||||
return lit_literal_equal_zt (lit1, lit_get_magic_string_zt (lit_magic_record_get_magic_str_id (lit2)));
|
||||
lit_magic_string_id_t magic_str_id = lit_magic_record_get_magic_str_id (lit2);
|
||||
return lit_literal_equal_utf8 (lit1,
|
||||
lit_get_magic_string_utf8 (magic_str_id),
|
||||
lit_get_magic_string_size (magic_str_id));
|
||||
}
|
||||
case lit_literal_storage_t::LIT_MAGIC_STR_EX:
|
||||
{
|
||||
return lit_literal_equal_zt (lit1, lit_get_magic_string_ex_zt (lit_magic_record_ex_get_magic_str_id (lit2)));
|
||||
lit_magic_string_ex_id_t magic_str_ex_id = lit_magic_record_ex_get_magic_str_id (lit2);
|
||||
return lit_literal_equal_utf8 (lit1,
|
||||
lit_get_magic_string_ex_utf8 (magic_str_ex_id),
|
||||
lit_get_magic_string_ex_size (magic_str_ex_id));
|
||||
}
|
||||
case lit_literal_storage_t::LIT_NUMBER:
|
||||
{
|
||||
@@ -366,15 +379,16 @@ lit_literal_equal (literal_t lit1, /**< first literal */
|
||||
} /* lit_literal_equal */
|
||||
|
||||
/**
|
||||
* Check if literal equals to zero-terminated string.
|
||||
* Check if literal equals to utf-8 string.
|
||||
* Check that literal is a string literal before performing detailed comparison.
|
||||
*
|
||||
* @return true if equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_literal_equal_type_zt (literal_t lit, /**< literal to compare */
|
||||
const ecma_char_t *str) /**< zero-terminated string */
|
||||
lit_literal_equal_type_utf8 (literal_t lit, /**< literal to compare */
|
||||
const lit_utf8_byte_t *str_p, /**< utf-8 string */
|
||||
lit_utf8_size_t str_size) /**< string size */
|
||||
{
|
||||
if (lit->get_type () != LIT_STR_T
|
||||
&& lit->get_type () != LIT_MAGIC_STR_T
|
||||
@@ -383,8 +397,22 @@ lit_literal_equal_type_zt (literal_t lit, /**< literal to compare */
|
||||
return false;
|
||||
}
|
||||
|
||||
return lit_literal_equal_zt (lit, str);
|
||||
} /* lit_literal_equal_type_zt */
|
||||
return lit_literal_equal_utf8 (lit, str_p, str_size);
|
||||
} /* lit_literal_equal_type_utf8 */
|
||||
|
||||
/**
|
||||
* Check if literal equals to C string.
|
||||
* Check that literal is a string literal before performing detailed comparison.
|
||||
*
|
||||
* @return true if equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_literal_equal_type_cstr (literal_t lit, /**< literal to compare */
|
||||
const char *c_str_p) /**< zero-terminated C-string */
|
||||
{
|
||||
return lit_literal_equal_type_utf8 (lit, (const lit_utf8_byte_t *) c_str_p, (lit_utf8_size_t) strlen (c_str_p));
|
||||
} /* lit_literal_equal_type_cstr */
|
||||
|
||||
/**
|
||||
* Check if literal contains the string equal to the passed number.
|
||||
@@ -432,12 +460,12 @@ lit_literal_equal_type (literal_t lit1, /**< first literal */
|
||||
*
|
||||
* @return pointer to the zero-terminated string.
|
||||
*/
|
||||
const ecma_char_t *
|
||||
lit_literal_to_charset (literal_t lit, /**< literal to be processed */
|
||||
ecma_char_t *buff, /**< buffer to use as a string storage */
|
||||
size_t size) /**< size of the buffer */
|
||||
const lit_utf8_byte_t *
|
||||
lit_literal_to_utf8_string (literal_t lit, /**< literal to be processed */
|
||||
lit_utf8_byte_t *buff_p, /**< buffer to use as a string storage */
|
||||
size_t size) /**< size of the buffer */
|
||||
{
|
||||
JERRY_ASSERT (buff != NULL && size > sizeof (ecma_char_t));
|
||||
JERRY_ASSERT (buff_p != NULL && size > 0);
|
||||
rcs_record_t::type_t type = lit->get_type ();
|
||||
|
||||
switch (type)
|
||||
@@ -445,35 +473,28 @@ lit_literal_to_charset (literal_t lit, /**< literal to be processed */
|
||||
case LIT_STR_T:
|
||||
{
|
||||
lit_charset_record_t *ch_rec_p = static_cast<lit_charset_record_t *> (lit);
|
||||
ecma_length_t index = ch_rec_p->get_charset (buff, size);
|
||||
|
||||
if (index != 0 && ((size_t)index + 1) * sizeof (ecma_char_t) > size)
|
||||
{
|
||||
index--;
|
||||
}
|
||||
buff[index] = '\0';
|
||||
|
||||
return buff;
|
||||
ch_rec_p->get_charset (buff_p, size);
|
||||
return buff_p;
|
||||
}
|
||||
case LIT_MAGIC_STR_T:
|
||||
{
|
||||
return lit_get_magic_string_zt (lit_magic_record_get_magic_str_id (lit));
|
||||
return lit_get_magic_string_utf8 (lit_magic_record_get_magic_str_id (lit));
|
||||
}
|
||||
case LIT_MAGIC_STR_EX_T:
|
||||
{
|
||||
return lit_get_magic_string_ex_zt (lit_magic_record_ex_get_magic_str_id (lit));
|
||||
return lit_get_magic_string_ex_utf8 (lit_magic_record_ex_get_magic_str_id (lit));
|
||||
}
|
||||
case LIT_NUMBER_T:
|
||||
{
|
||||
ecma_number_to_zt_string (static_cast<lit_number_record_t *> (lit)->get_number (), buff, (ssize_t)size);
|
||||
ecma_number_to_utf8_string (static_cast<lit_number_record_t *> (lit)->get_number (), buff_p, (ssize_t)size);
|
||||
|
||||
return buff;
|
||||
return buff_p;
|
||||
}
|
||||
default: JERRY_UNREACHABLE ();
|
||||
}
|
||||
|
||||
JERRY_UNREACHABLE ();
|
||||
} /* lit_literal_to_charset */
|
||||
} /* lit_literal_to_utf8_string */
|
||||
|
||||
/**
|
||||
* Get the contents of the literal as a C string.
|
||||
@@ -484,10 +505,10 @@ lit_literal_to_charset (literal_t lit, /**< literal to be processed */
|
||||
const char *
|
||||
lit_literal_to_str_internal_buf (literal_t lit) /**< literal */
|
||||
{
|
||||
const ecma_length_t buff_size = ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER;
|
||||
static ecma_char_t buff[buff_size];
|
||||
static lit_utf8_byte_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER + 1];
|
||||
memset (buff, 0, sizeof (buff));
|
||||
|
||||
return (const char *)lit_literal_to_charset (lit, buff, buff_size);
|
||||
return (const char *) lit_literal_to_utf8_string (lit, buff, sizeof (buff) - 1);
|
||||
} /* lit_literal_to_str_internal_buf */
|
||||
|
||||
|
||||
@@ -544,10 +565,44 @@ lit_magic_record_ex_get_magic_str_id (literal_t lit) /**< literal */
|
||||
return static_cast<lit_magic_record_t *> (lit)->get_magic_str_id<lit_magic_string_ex_id_t> ();
|
||||
} /* lit_magic_record_ex_get_magic_str_id */
|
||||
|
||||
lit_utf8_size_t
|
||||
lit_charset_record_get_size (literal_t lit) /**< literal */
|
||||
{
|
||||
return static_cast<lit_charset_record_t *> (lit)->get_length ();
|
||||
} /* lit_charset_record_get_size */
|
||||
|
||||
/**
|
||||
* Get length of the literal
|
||||
*
|
||||
* @return code units count
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_charset_record_get_length (literal_t lit) /**< literal */
|
||||
{
|
||||
return static_cast<lit_charset_record_t *> (lit)->get_length ();;
|
||||
TODO ("Add special case for literals which doesn't contain long characters");
|
||||
|
||||
lit_charset_record_t *charset_record_p = static_cast<lit_charset_record_t *> (lit);
|
||||
rcs_record_iterator_t lit_iter (&lit_storage, lit);
|
||||
lit_iter.skip (lit_charset_record_t::header_size ());
|
||||
|
||||
lit_utf8_size_t lit_utf8_str_size = charset_record_p->get_length ();
|
||||
ecma_length_t length = 0;
|
||||
for (lit_utf8_size_t i = 0; i < lit_utf8_str_size;)
|
||||
{
|
||||
lit_utf8_byte_t byte = lit_iter.read <lit_utf8_byte_t> ();
|
||||
lit_utf8_size_t bytes_to_skip = lit_get_unicode_char_size_by_utf8_first_byte (byte);
|
||||
lit_iter.skip (bytes_to_skip);
|
||||
i += bytes_to_skip;
|
||||
|
||||
length++;
|
||||
}
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
lit_iter.skip (charset_record_p->get_alignment_bytes_count ());
|
||||
JERRY_ASSERT (lit_iter.finished ());
|
||||
#endif
|
||||
|
||||
return length;
|
||||
} /* lit_charset_record_get_length */
|
||||
|
||||
ecma_number_t
|
||||
|
||||
@@ -16,8 +16,9 @@
|
||||
#ifndef LIT_LITERAL_H
|
||||
#define LIT_LITERAL_H
|
||||
|
||||
#include "ecma-globals.h"
|
||||
#include "lit-globals.h"
|
||||
#include "lit-literal-storage.h"
|
||||
#include "lit-magic-strings.h"
|
||||
|
||||
#define LITERAL_TO_REWRITE (INVALID_VALUE - 1)
|
||||
|
||||
@@ -25,30 +26,31 @@ void lit_init ();
|
||||
void lit_finalize ();
|
||||
void lit_dump_literals ();
|
||||
|
||||
literal_t lit_create_literal_from_charset (const ecma_char_t *, ecma_length_t);
|
||||
literal_t lit_find_literal_by_charset (const ecma_char_t *, ecma_length_t);
|
||||
literal_t lit_find_or_create_literal_from_charset (const ecma_char_t *, ecma_length_t);
|
||||
literal_t lit_create_literal_from_utf8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
literal_t lit_find_literal_by_utf8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
literal_t lit_find_or_create_literal_from_utf8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
literal_t lit_create_literal_from_num (ecma_number_t);
|
||||
literal_t lit_find_literal_by_num (ecma_number_t);
|
||||
literal_t lit_find_or_create_literal_from_num (ecma_number_t);
|
||||
|
||||
bool lit_literal_equal_zt (literal_t, const ecma_char_t *);
|
||||
bool lit_literal_equal_utf8 (literal_t, const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
bool lit_literal_equal_num (literal_t, ecma_number_t);
|
||||
bool lit_literal_equal (literal_t, literal_t);
|
||||
|
||||
bool lit_literal_equal_type_zt (literal_t, const ecma_char_t *);
|
||||
bool lit_literal_equal_type_utf8 (literal_t, const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
bool lit_literal_equal_type_cstr (literal_t, const char *);
|
||||
bool lit_literal_equal_type_num (literal_t, ecma_number_t);
|
||||
bool lit_literal_equal_type (literal_t, literal_t);
|
||||
|
||||
const ecma_char_t *lit_literal_to_charset (literal_t, ecma_char_t *, size_t);
|
||||
|
||||
const lit_utf8_byte_t *lit_literal_to_utf8_string (literal_t, lit_utf8_byte_t *, size_t);
|
||||
const char *lit_literal_to_str_internal_buf (literal_t);
|
||||
|
||||
literal_t lit_get_literal_by_cp (lit_cpointer_t);
|
||||
|
||||
lit_string_hash_t lit_charset_literal_get_hash (literal_t);
|
||||
ecma_number_t lit_charset_literal_get_number (literal_t);
|
||||
lit_utf8_size_t lit_charset_record_get_size (literal_t);
|
||||
ecma_length_t lit_charset_record_get_length (literal_t);
|
||||
|
||||
lit_magic_string_id_t lit_magic_record_get_magic_str_id (literal_t);
|
||||
|
||||
@@ -15,26 +15,26 @@
|
||||
|
||||
#include "lit-magic-strings.h"
|
||||
|
||||
#include "ecma-helpers.h"
|
||||
#include "lit-strings.h"
|
||||
|
||||
/**
|
||||
* Lengths of magic strings
|
||||
*/
|
||||
static ecma_length_t lit_magic_string_lengths[LIT_MAGIC_STRING__COUNT];
|
||||
static lit_utf8_size_t lit_magic_string_sizes[LIT_MAGIC_STRING__COUNT];
|
||||
|
||||
/**
|
||||
* External magic strings data array, count and lengths
|
||||
*/
|
||||
static const ecma_char_ptr_t *lit_magic_string_ex_array = NULL;
|
||||
static const lit_utf8_byte_t **lit_magic_string_ex_array = NULL;
|
||||
static uint32_t lit_magic_string_ex_count = 0;
|
||||
static const ecma_length_t *lit_magic_string_ex_lengths = NULL;
|
||||
static const lit_utf8_size_t *lit_magic_string_ex_sizes = NULL;
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
/**
|
||||
* Maximum length among lengths of magic strings
|
||||
*/
|
||||
static ecma_length_t lit_magic_string_max_length;
|
||||
#endif /* !JERRY_NDEBUG */
|
||||
static ecma_length_t ecma_magic_string_max_length;
|
||||
#endif /* JERRY_NDEBUG */
|
||||
|
||||
/**
|
||||
* Initialize data for string helpers
|
||||
@@ -45,22 +45,22 @@ lit_magic_strings_init (void)
|
||||
/* Initializing magic strings information */
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
lit_magic_string_max_length = 0;
|
||||
ecma_magic_string_max_length = 0;
|
||||
#endif /* !JERRY_NDEBUG */
|
||||
|
||||
for (lit_magic_string_id_t id = (lit_magic_string_id_t) 0;
|
||||
id < LIT_MAGIC_STRING__COUNT;
|
||||
id = (lit_magic_string_id_t) (id + 1))
|
||||
{
|
||||
lit_magic_string_lengths[id] = ecma_zt_string_length (lit_get_magic_string_zt (id));
|
||||
lit_magic_string_sizes[id] = lit_zt_utf8_string_size (lit_get_magic_string_utf8 (id));
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
lit_magic_string_max_length = JERRY_MAX (lit_magic_string_max_length, lit_magic_string_lengths[id]);
|
||||
ecma_magic_string_max_length = JERRY_MAX (ecma_magic_string_max_length, lit_magic_string_sizes[id]);
|
||||
|
||||
JERRY_ASSERT (lit_magic_string_max_length <= LIT_MAGIC_STRING_LENGTH_LIMIT);
|
||||
JERRY_ASSERT (ecma_magic_string_max_length <= LIT_MAGIC_STRING_LENGTH_LIMIT);
|
||||
#endif /* !JERRY_NDEBUG */
|
||||
}
|
||||
} /* ecma_strings_init */
|
||||
} /* lit_magic_strings_init */
|
||||
|
||||
/**
|
||||
* Initialize external magic strings
|
||||
@@ -70,44 +70,8 @@ lit_magic_strings_ex_init (void)
|
||||
{
|
||||
lit_magic_string_ex_array = NULL;
|
||||
lit_magic_string_ex_count = 0;
|
||||
lit_magic_string_ex_lengths = NULL;
|
||||
} /* ecma_strings_ex_init */
|
||||
|
||||
/**
|
||||
* Register external magic strings
|
||||
*/
|
||||
void
|
||||
lit_magic_strings_ex_set (const ecma_char_ptr_t* ex_str_items, /**< character arrays, representing
|
||||
* external magic strings' contents */
|
||||
uint32_t count, /**< number of the strings */
|
||||
const ecma_length_t* ex_str_lengths) /**< lengths of the strings */
|
||||
{
|
||||
JERRY_ASSERT (ex_str_items != NULL);
|
||||
JERRY_ASSERT (count > 0);
|
||||
JERRY_ASSERT (ex_str_lengths != NULL);
|
||||
|
||||
JERRY_ASSERT (lit_magic_string_ex_array == NULL);
|
||||
JERRY_ASSERT (lit_magic_string_ex_count == 0);
|
||||
JERRY_ASSERT (lit_magic_string_ex_lengths == NULL);
|
||||
|
||||
/* Set external magic strings information */
|
||||
lit_magic_string_ex_array = ex_str_items;
|
||||
lit_magic_string_ex_count = count;
|
||||
lit_magic_string_ex_lengths = ex_str_lengths;
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
for (lit_magic_string_ex_id_t id = (lit_magic_string_ex_id_t) 0;
|
||||
id < lit_magic_string_ex_count;
|
||||
id = (lit_magic_string_ex_id_t) (id + 1))
|
||||
{
|
||||
JERRY_ASSERT (lit_magic_string_ex_lengths[id] == ecma_zt_string_length (lit_get_magic_string_ex_zt (id)));
|
||||
|
||||
lit_magic_string_max_length = JERRY_MAX (lit_magic_string_max_length, lit_magic_string_ex_lengths[id]);
|
||||
|
||||
JERRY_ASSERT (lit_magic_string_max_length <= LIT_MAGIC_STRING_LENGTH_LIMIT);
|
||||
}
|
||||
#endif /* !JERRY_NDEBUG */
|
||||
} /* ecma_strings_ex_init */
|
||||
lit_magic_string_ex_sizes = NULL;
|
||||
} /* lit_magic_strings_ex_init */
|
||||
|
||||
/**
|
||||
* Get number of external magic strings
|
||||
@@ -116,25 +80,23 @@ lit_magic_strings_ex_set (const ecma_char_ptr_t* ex_str_items, /**< character ar
|
||||
* zero - otherwise.
|
||||
*/
|
||||
uint32_t
|
||||
ecma_get_magic_string_ex_count (void)
|
||||
lit_get_magic_string_ex_count (void)
|
||||
{
|
||||
return lit_magic_string_ex_count;
|
||||
} /* ecma_get_magic_string_ex_count */
|
||||
} /* lit_get_magic_string_ex_count */
|
||||
|
||||
/**
|
||||
* Get specified magic string as zero-terminated string
|
||||
*
|
||||
* @return pointer to zero-terminated magic string
|
||||
*/
|
||||
const ecma_char_t *
|
||||
lit_get_magic_string_zt (lit_magic_string_id_t id) /**< magic string id */
|
||||
const lit_utf8_byte_t *
|
||||
lit_get_magic_string_utf8 (lit_magic_string_id_t id) /**< magic string id */
|
||||
{
|
||||
TODO (Support UTF-16);
|
||||
|
||||
switch (id)
|
||||
{
|
||||
#define LIT_MAGIC_STRING_DEF(id, ascii_zt_string) \
|
||||
case id: return (ecma_char_t*) ascii_zt_string;
|
||||
#define LIT_MAGIC_STRING_DEF(id, utf8_string) \
|
||||
case id: return (lit_utf8_byte_t*) utf8_string;
|
||||
#include "lit-magic-strings.inc.h"
|
||||
#undef LIT_MAGIC_STRING_DEF
|
||||
|
||||
@@ -142,58 +104,94 @@ lit_get_magic_string_zt (lit_magic_string_id_t id) /**< magic string id */
|
||||
}
|
||||
|
||||
JERRY_UNREACHABLE ();
|
||||
} /* lit_get_magic_string_zt */
|
||||
} /* lit_get_magic_string_utf8 */
|
||||
|
||||
/**
|
||||
* Get length of specified magic string
|
||||
* Get size of specified magic string
|
||||
*
|
||||
* @return length
|
||||
* @return size in bytes
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_get_magic_string_length (lit_magic_string_id_t id) /**< magic string id */
|
||||
lit_utf8_size_t
|
||||
lit_get_magic_string_size (lit_magic_string_id_t id) /**< magic string id */
|
||||
{
|
||||
return lit_magic_string_lengths[id];
|
||||
} /* ecma_get_magic_string_size */
|
||||
return lit_magic_string_sizes[id];
|
||||
} /* lit_get_magic_string_size */
|
||||
|
||||
/**
|
||||
* Get specified magic string as zero-terminated string from external table
|
||||
*
|
||||
* @return pointer to zero-terminated magic string
|
||||
*/
|
||||
const ecma_char_t*
|
||||
lit_get_magic_string_ex_zt (lit_magic_string_ex_id_t id) /**< extern magic string id */
|
||||
const lit_utf8_byte_t *
|
||||
lit_get_magic_string_ex_utf8 (lit_magic_string_ex_id_t id) /**< extern magic string id */
|
||||
{
|
||||
TODO (Support UTF-16);
|
||||
|
||||
if (lit_magic_string_ex_array && id < lit_magic_string_ex_count)
|
||||
{
|
||||
return lit_magic_string_ex_array[id];
|
||||
}
|
||||
|
||||
JERRY_UNREACHABLE ();
|
||||
} /* lit_get_magic_string_ex_zt */
|
||||
} /* lit_get_magic_string_ex_utf8 */
|
||||
|
||||
/**
|
||||
* Get length of specified external magic string
|
||||
* Get size of specified external magic string
|
||||
*
|
||||
* @return length
|
||||
* @return size in bytes
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_get_magic_string_ex_length (lit_magic_string_ex_id_t id) /**< external magic string id */
|
||||
lit_utf8_size_t
|
||||
lit_get_magic_string_ex_size (lit_magic_string_ex_id_t id) /**< external magic string id */
|
||||
{
|
||||
return lit_magic_string_ex_lengths[id];
|
||||
} /* lit_get_magic_string_ex_length */
|
||||
return lit_magic_string_ex_sizes[id];
|
||||
} /* lit_get_magic_string_ex_size */
|
||||
|
||||
/**
|
||||
* Check if passed zt-string equals to one of magic strings
|
||||
* Register external magic strings
|
||||
*/
|
||||
void
|
||||
lit_magic_strings_ex_set (const lit_utf8_byte_t **ex_str_items, /**< character arrays, representing
|
||||
* external magic strings' contents */
|
||||
uint32_t count, /**< number of the strings */
|
||||
const lit_utf8_size_t *ex_str_sizes) /**< sizes of the strings */
|
||||
{
|
||||
JERRY_ASSERT (ex_str_items != NULL);
|
||||
JERRY_ASSERT (count > 0);
|
||||
JERRY_ASSERT (ex_str_sizes != NULL);
|
||||
|
||||
JERRY_ASSERT (lit_magic_string_ex_array == NULL);
|
||||
JERRY_ASSERT (lit_magic_string_ex_count == 0);
|
||||
JERRY_ASSERT (lit_magic_string_ex_sizes == NULL);
|
||||
|
||||
/* Set external magic strings information */
|
||||
lit_magic_string_ex_array = ex_str_items;
|
||||
lit_magic_string_ex_count = count;
|
||||
lit_magic_string_ex_sizes = ex_str_sizes;
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
for (lit_magic_string_ex_id_t id = (lit_magic_string_ex_id_t) 0;
|
||||
id < lit_magic_string_ex_count;
|
||||
id = (lit_magic_string_ex_id_t) (id + 1))
|
||||
{
|
||||
JERRY_ASSERT (lit_magic_string_ex_sizes[id] == lit_zt_utf8_string_size (lit_get_magic_string_ex_utf8 (id)));
|
||||
|
||||
ecma_magic_string_max_length = JERRY_MAX (ecma_magic_string_max_length, lit_magic_string_ex_sizes[id]);
|
||||
|
||||
JERRY_ASSERT (ecma_magic_string_max_length <= LIT_MAGIC_STRING_LENGTH_LIMIT);
|
||||
}
|
||||
#endif /* !JERRY_NDEBUG */
|
||||
} /* lit_magic_strings_ex_set */
|
||||
|
||||
|
||||
/**
|
||||
* Check if passed utf-8 string equals to one of magic strings
|
||||
* and if equal magic string was found, return it's id in 'out_id_p' argument.
|
||||
*
|
||||
* @return true - if magic string equal to passed string was found,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_is_zt_string_magic (const ecma_char_t *zt_string_p, /**< zero-terminated string */
|
||||
lit_magic_string_id_t *out_id_p) /**< out: magic string's id */
|
||||
lit_is_utf8_string_magic (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_id_t *out_id_p) /**< out: magic string's id */
|
||||
{
|
||||
TODO (Improve performance of search);
|
||||
|
||||
@@ -201,7 +199,7 @@ lit_is_zt_string_magic (const ecma_char_t *zt_string_p, /**< zero-terminated str
|
||||
id < LIT_MAGIC_STRING__COUNT;
|
||||
id = (lit_magic_string_id_t) (id + 1))
|
||||
{
|
||||
if (ecma_compare_zt_strings (zt_string_p, lit_get_magic_string_zt (id)))
|
||||
if (lit_compare_utf8_string_and_magic_string (string_p, string_size, id))
|
||||
{
|
||||
*out_id_p = id;
|
||||
|
||||
@@ -212,18 +210,18 @@ lit_is_zt_string_magic (const ecma_char_t *zt_string_p, /**< zero-terminated str
|
||||
*out_id_p = LIT_MAGIC_STRING__COUNT;
|
||||
|
||||
return false;
|
||||
} /* lit_is_zt_string_magic */
|
||||
} /* lit_is_utf8_string_magic */
|
||||
|
||||
/**
|
||||
* Check if passed zt-string equals to one of external magic strings
|
||||
* Check if passed utf-8 string equals to one of external magic strings
|
||||
* and if equal magic string was found, return it's id in 'out_id_p' argument.
|
||||
*
|
||||
* @return true - if external magic string equal to passed string was found,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_is_zt_ex_string_magic (const ecma_char_t *zt_string_p, /**< zero-terminated string */
|
||||
lit_magic_string_ex_id_t *out_id_p) /**< out: external magic string's id */
|
||||
bool lit_is_ex_utf8_string_magic (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_ex_id_t *out_id_p) /**< out: magic string's id */
|
||||
{
|
||||
TODO (Improve performance of search);
|
||||
|
||||
@@ -231,7 +229,7 @@ lit_is_zt_ex_string_magic (const ecma_char_t *zt_string_p, /**< zero-terminated
|
||||
id < lit_magic_string_ex_count;
|
||||
id = (lit_magic_string_ex_id_t) (id + 1))
|
||||
{
|
||||
if (ecma_compare_zt_strings (zt_string_p, lit_get_magic_string_ex_zt (id)))
|
||||
if (lit_compare_utf8_string_and_magic_string_ex (string_p, string_size, id))
|
||||
{
|
||||
*out_id_p = id;
|
||||
|
||||
@@ -242,4 +240,70 @@ lit_is_zt_ex_string_magic (const ecma_char_t *zt_string_p, /**< zero-terminated
|
||||
*out_id_p = lit_magic_string_ex_count;
|
||||
|
||||
return false;
|
||||
} /* lit_is_zt_ex_string_magic */
|
||||
} /* lit_is_ex_utf8_string_magic */
|
||||
|
||||
/**
|
||||
* Compare utf-8 string and magic string for equality
|
||||
*
|
||||
* @return true if strings are equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_compare_utf8_string_and_magic_string (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_id_t magic_string_id) /**< magic string's id */
|
||||
{
|
||||
return lit_compare_utf8_strings (string_p,
|
||||
string_size,
|
||||
lit_get_magic_string_utf8 (magic_string_id),
|
||||
lit_get_magic_string_size (magic_string_id));
|
||||
} /* lit_compare_utf8_string_and_magic_string */
|
||||
|
||||
/**
|
||||
* Compare utf-8 string and external magic string for equality
|
||||
*
|
||||
* @return true if strings are equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_compare_utf8_string_and_magic_string_ex (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_ex_id_t magic_string_ex_id) /**< external magic string's
|
||||
* id */
|
||||
{
|
||||
return lit_compare_utf8_strings (string_p,
|
||||
string_size,
|
||||
lit_get_magic_string_ex_utf8 (magic_string_ex_id),
|
||||
lit_get_magic_string_ex_size (magic_string_ex_id));
|
||||
} /* lit_compare_utf8_string_and_magic_string_ex */
|
||||
|
||||
/**
|
||||
* Copy magic string to buffer
|
||||
*
|
||||
* Warning:
|
||||
* the routine requires that buffer size is enough
|
||||
*
|
||||
* @return pointer to the byte next to the last copied in the buffer
|
||||
*/
|
||||
extern lit_utf8_byte_t *
|
||||
lit_copy_magic_string_to_buffer (lit_magic_string_id_t id, /**< magic string id */
|
||||
lit_utf8_byte_t *buffer_p, /**< destination buffer */
|
||||
ssize_t buffer_size) /**< size of buffer */
|
||||
{
|
||||
const lit_utf8_byte_t *magic_string_bytes_p = lit_get_magic_string_utf8 (id);
|
||||
lit_utf8_size_t magic_string_bytes_count = lit_get_magic_string_size (id);
|
||||
|
||||
const lit_utf8_byte_t *str_iter_p = magic_string_bytes_p;
|
||||
lit_utf8_byte_t *buf_iter_p = buffer_p;
|
||||
ssize_t bytes_copied = 0;
|
||||
|
||||
while (magic_string_bytes_count--)
|
||||
{
|
||||
bytes_copied ++;
|
||||
JERRY_ASSERT (bytes_copied <= buffer_size);
|
||||
|
||||
*buf_iter_p++ = *str_iter_p++;
|
||||
}
|
||||
|
||||
return buf_iter_p;
|
||||
} /* lit_copy_magic_string_to_buffer */
|
||||
|
||||
@@ -44,18 +44,35 @@ typedef uint32_t lit_magic_string_ex_id_t;
|
||||
extern void lit_magic_strings_init (void);
|
||||
extern void lit_magic_strings_ex_init (void);
|
||||
|
||||
extern void lit_magic_strings_ex_set (const ecma_char_ptr_t *,
|
||||
uint32_t,
|
||||
const ecma_length_t *);
|
||||
extern uint32_t ecma_get_magic_string_ex_count (void);
|
||||
extern uint32_t lit_get_magic_string_ex_count (void);
|
||||
|
||||
extern const ecma_char_t *lit_get_magic_string_zt (lit_magic_string_id_t);
|
||||
extern ecma_length_t lit_get_magic_string_length (lit_magic_string_id_t);
|
||||
extern const lit_utf8_byte_t *lit_get_magic_string_utf8 (lit_magic_string_id_t);
|
||||
extern lit_utf8_size_t lit_get_magic_string_size (lit_magic_string_id_t);
|
||||
|
||||
extern const ecma_char_t *lit_get_magic_string_ex_zt (lit_magic_string_ex_id_t);
|
||||
extern ecma_length_t lit_get_magic_string_ex_length (lit_magic_string_ex_id_t);
|
||||
extern const lit_utf8_byte_t *lit_get_magic_string_ex_utf8 (lit_magic_string_ex_id_t);
|
||||
extern lit_utf8_size_t lit_get_magic_string_ex_size (lit_magic_string_ex_id_t);
|
||||
|
||||
extern bool lit_is_zt_string_magic (const ecma_char_t *, lit_magic_string_id_t *);
|
||||
extern bool lit_is_zt_ex_string_magic (const ecma_char_t *, lit_magic_string_ex_id_t *);
|
||||
extern void lit_magic_strings_ex_set (const lit_utf8_byte_t **,
|
||||
uint32_t count,
|
||||
const lit_utf8_size_t *);
|
||||
|
||||
extern bool lit_is_utf8_string_magic (const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t,
|
||||
lit_magic_string_id_t *);
|
||||
extern bool lit_is_ex_utf8_string_magic (const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t,
|
||||
lit_magic_string_ex_id_t *);
|
||||
|
||||
extern bool lit_compare_utf8_string_and_magic_string (const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t,
|
||||
lit_magic_string_id_t);
|
||||
|
||||
extern bool lit_compare_utf8_string_and_magic_string_ex (const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t,
|
||||
lit_magic_string_ex_id_t);
|
||||
|
||||
extern lit_utf8_byte_t *lit_copy_magic_string_to_buffer (lit_magic_string_id_t,
|
||||
lit_utf8_byte_t *buffer_p,
|
||||
ssize_t buffer_size);
|
||||
|
||||
#endif /* LIT_MAGIC_STRINGS_H */
|
||||
|
||||
@@ -0,0 +1,579 @@
|
||||
/* Copyright 2015 Samsung Electronics Co., Ltd.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "lit-strings.h"
|
||||
|
||||
#include "jrt-libc-includes.h"
|
||||
|
||||
/**
|
||||
* For the formal definition of Unicode transformation formats (UTF) see Section 3.9, Unicode Encoding Forms in The
|
||||
* Unicode Standard (http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G7404, tables 3-6, 3-7).
|
||||
*/
|
||||
#define LIT_UNICODE_CODE_POINT_NULL (0x0)
|
||||
#define LIT_UNICODE_CODE_POINT_MAX (0x10FFFF)
|
||||
|
||||
#define LIT_UTF16_CODE_UNIT_MAX (0xFFFF)
|
||||
#define LIT_UTF16_FIRST_SURROGATE_CODE_POINT (0x10000)
|
||||
#define LIT_UTF16_LOW_SURROGATE_MARKER (0xDC00)
|
||||
#define LIT_UTF16_HIGH_SURROGATE_MARKER (0xD800)
|
||||
#define LIT_UTF16_HIGH_SURROGATE_MIN (0xD800)
|
||||
#define LIT_UTF16_HIGH_SURROGATE_MAX (0xDBFF)
|
||||
#define LIT_UTF16_LOW_SURROGATE_MIN (0xDC00)
|
||||
#define LIT_UTF16_LOW_SURROGATE_MAX (0xDFFF)
|
||||
#define LIT_UTF16_BITS_IN_SURROGATE (10)
|
||||
#define LIT_UTF16_LAST_10_BITS_MASK (0x3FF)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_MARKER (0x00)
|
||||
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
|
||||
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
|
||||
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
|
||||
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_MASK (0x80)
|
||||
#define LIT_UTF8_2_BYTE_MASK (0xE0)
|
||||
#define LIT_UTF8_3_BYTE_MASK (0xF0)
|
||||
#define LIT_UTF8_4_BYTE_MASK (0xF8)
|
||||
#define LIT_UTF8_EXTRA_BYTE_MASK (0xC0)
|
||||
|
||||
#define LIT_UTF8_LAST_7_BITS_MASK (0x7F)
|
||||
#define LIT_UTF8_LAST_6_BITS_MASK (0x3F)
|
||||
#define LIT_UTF8_LAST_5_BITS_MASK (0x1F)
|
||||
#define LIT_UTF8_LAST_4_BITS_MASK (0x0F)
|
||||
#define LIT_UTF8_LAST_3_BITS_MASK (0x07)
|
||||
#define LIT_UTF8_LAST_2_BITS_MASK (0x03)
|
||||
#define LIT_UTF8_LAST_1_BIT_MASK (0x01)
|
||||
|
||||
#define LIT_UTF8_BITS_IN_EXTRA_BYTES (6)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_CODE_POINT_MAX (0x7F)
|
||||
#define LIT_UTF8_2_BYTE_CODE_POINT_MIN (0x80)
|
||||
#define LIT_UTF8_2_BYTE_CODE_POINT_MAX (0x7FF)
|
||||
#define LIT_UTF8_3_BYTE_CODE_POINT_MIN (0x800)
|
||||
#define LIT_UTF8_3_BYTE_CODE_POINT_MAX (LIT_UTF16_CODE_UNIT_MAX)
|
||||
#define LIT_UTF8_4_BYTE_CODE_POINT_MIN (0x1000)
|
||||
#define LIT_UTF8_4_BYTE_CODE_POINT_MAX (LIT_UNICODE_CODE_POINT_MAX)
|
||||
|
||||
/**
|
||||
* Validate utf-8 string
|
||||
*
|
||||
* NOTE:
|
||||
* Isolated surrogates are allowed.
|
||||
* Correct pair of surrogates is not allowed, it should be represented as 4-byte utf-8 character.
|
||||
*
|
||||
* @return true if utf-8 string is well-formed
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t buf_size) /**< string size */
|
||||
{
|
||||
lit_utf8_size_t idx = 0;
|
||||
|
||||
bool is_prev_code_point_high_surrogate = false;
|
||||
while (idx < buf_size)
|
||||
{
|
||||
lit_utf8_byte_t c = utf8_buf_p[idx++];
|
||||
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
lit_code_point_t code_point = 0;
|
||||
lit_code_point_t min_code_point = 0;
|
||||
lit_utf8_size_t extra_bytes_count;
|
||||
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 1;
|
||||
min_code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 2;
|
||||
min_code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 3;
|
||||
min_code_point = LIT_UTF8_4_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_3_BITS_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* utf-8 string could not contain 5- and 6-byte sequences. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (idx + extra_bytes_count > buf_size)
|
||||
{
|
||||
/* utf-8 string breaks in the middle */
|
||||
return false;
|
||||
}
|
||||
|
||||
for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset)
|
||||
{
|
||||
c = utf8_buf_p[idx + offset];
|
||||
if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
|
||||
{
|
||||
/* invalid continuation byte */
|
||||
return false;
|
||||
}
|
||||
code_point <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
code_point |= (c & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
}
|
||||
|
||||
if (code_point < min_code_point
|
||||
|| code_point > LIT_UNICODE_CODE_POINT_MAX)
|
||||
{
|
||||
/* utf-8 string doesn't encode valid unicode code point */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (code_point >= LIT_UTF16_HIGH_SURROGATE_MIN
|
||||
&& code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
|
||||
{
|
||||
is_prev_code_point_high_surrogate = true;
|
||||
}
|
||||
else if (code_point >= LIT_UTF16_LOW_SURROGATE_MIN
|
||||
&& code_point <= LIT_UTF16_LOW_SURROGATE_MAX
|
||||
&& is_prev_code_point_high_surrogate)
|
||||
{
|
||||
/* sequence of high and low surrogate is not allowed */
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
is_prev_code_point_high_surrogate = false;
|
||||
}
|
||||
|
||||
idx += extra_bytes_count;
|
||||
}
|
||||
|
||||
return true;
|
||||
} /* lit_is_utf8_string_valid */
|
||||
|
||||
/**
|
||||
* Initialize iterator for traversing utf-8 string as a string of code units
|
||||
*
|
||||
* @return iterator
|
||||
*/
|
||||
lit_utf8_iterator_t
|
||||
lit_utf8_iterator_create (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t buf_size) /**< string size */
|
||||
{
|
||||
JERRY_ASSERT (utf8_buf_p || !buf_size);
|
||||
|
||||
lit_utf8_iterator_t buf_iter =
|
||||
{
|
||||
0,
|
||||
buf_size,
|
||||
utf8_buf_p,
|
||||
0,
|
||||
};
|
||||
|
||||
return buf_iter;
|
||||
} /* lit_utf8_iterator_create */
|
||||
|
||||
/**
|
||||
* Represents code point (>0xFFFF) as surrogate pair and returns its lower part
|
||||
*
|
||||
* @return lower code_unit of the surrogate pair
|
||||
*/
|
||||
static ecma_char_t
|
||||
convert_code_point_to_low_surrogate (lit_code_point_t code_point) /**< code point, should be > 0xFFFF */
|
||||
{
|
||||
JERRY_ASSERT (code_point > LIT_UTF16_CODE_UNIT_MAX);
|
||||
|
||||
ecma_char_t code_unit_bits;
|
||||
code_unit_bits = (ecma_char_t) (code_point & LIT_UTF16_LAST_10_BITS_MASK);
|
||||
|
||||
return (ecma_char_t) (LIT_UTF16_LOW_SURROGATE_MARKER | code_unit_bits);
|
||||
} /* convert_code_point_to_low_surrogate */
|
||||
|
||||
/**
|
||||
* Represents code point (>0xFFFF) as surrogate pair and returns its higher part
|
||||
*
|
||||
* @return higher code_unit of the surrogate pair
|
||||
*/
|
||||
static ecma_char_t
|
||||
convert_code_point_to_high_surrogate (lit_code_point_t code_point) /**< code point, should be > 0xFFFF */
|
||||
{
|
||||
JERRY_ASSERT (code_point > LIT_UTF16_CODE_UNIT_MAX);
|
||||
JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX);
|
||||
|
||||
ecma_char_t code_unit_bits;
|
||||
code_unit_bits = (ecma_char_t) ((code_point - LIT_UTF16_FIRST_SURROGATE_CODE_POINT) >> LIT_UTF16_BITS_IN_SURROGATE);
|
||||
|
||||
return (LIT_UTF16_HIGH_SURROGATE_MARKER | code_unit_bits);
|
||||
} /* convert_code_point_to_low_surrogate */
|
||||
|
||||
/**
|
||||
* Get next code unit form the iterated string and increment iterator to point to next code unit
|
||||
*
|
||||
* @return next code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_iterator_read_code_unit_and_increment (lit_utf8_iterator_t *buf_iter_p) /**< @in-out: utf-8 string iterator */
|
||||
{
|
||||
JERRY_ASSERT (!lit_utf8_iterator_reached_buffer_end (buf_iter_p));
|
||||
|
||||
if (buf_iter_p->code_point)
|
||||
{
|
||||
ecma_char_t code_unit = convert_code_point_to_low_surrogate (buf_iter_p->code_point);
|
||||
buf_iter_p->code_point = 0;
|
||||
return code_unit;
|
||||
}
|
||||
|
||||
lit_code_point_t code_point;
|
||||
buf_iter_p->buf_offset += lit_read_code_point_from_utf8 (buf_iter_p->buf_p + buf_iter_p->buf_offset,
|
||||
buf_iter_p->buf_size - buf_iter_p->buf_offset,
|
||||
&code_point);
|
||||
|
||||
if (code_point <= LIT_UTF16_CODE_UNIT_MAX)
|
||||
{
|
||||
return (ecma_char_t) code_point;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf_iter_p->code_point = code_point;
|
||||
return convert_code_point_to_high_surrogate (code_point);
|
||||
}
|
||||
|
||||
JERRY_ASSERT (false);
|
||||
return ECMA_CHAR_NULL;
|
||||
} /* lit_utf8_iterator_read_code_unit_and_increment */
|
||||
|
||||
/**
|
||||
* Checks iterator reached end of the string
|
||||
*
|
||||
* @return true - the whole string was iterated
|
||||
* false - otherwise
|
||||
*/
|
||||
bool
|
||||
lit_utf8_iterator_reached_buffer_end (const lit_utf8_iterator_t *buf_iter_p) /**< utf-8 string iterator */
|
||||
{
|
||||
JERRY_ASSERT (buf_iter_p->buf_offset <= buf_iter_p->buf_size);
|
||||
|
||||
if (buf_iter_p->code_point == LIT_UNICODE_CODE_POINT_NULL && buf_iter_p->buf_offset == buf_iter_p->buf_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} /* lit_utf8_iterator_reached_buffer_end */
|
||||
|
||||
/**
|
||||
* Calculate size of a zero-terminated utf-8 string
|
||||
*
|
||||
* NOTE:
|
||||
* string should not contain zero characters in the middel
|
||||
*
|
||||
* @return size of a string
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_zt_utf8_string_size (const lit_utf8_byte_t *utf8_str_p) /**< zero-terminated utf-8 string */
|
||||
{
|
||||
return (lit_utf8_size_t) strlen ((const char *) utf8_str_p);
|
||||
} /* lit_zt_utf8_string_size */
|
||||
|
||||
/**
|
||||
* Calculate length of a utf-8 string
|
||||
*
|
||||
* @return UTF-16 code units count
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_utf8_string_length (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t utf8_buf_size) /**< string size */
|
||||
{
|
||||
ecma_length_t length = 0;
|
||||
lit_utf8_iterator_t buf_iter = lit_utf8_iterator_create (utf8_buf_p, utf8_buf_size);
|
||||
while (!lit_utf8_iterator_reached_buffer_end (&buf_iter))
|
||||
{
|
||||
lit_utf8_iterator_read_code_unit_and_increment (&buf_iter);
|
||||
length++;
|
||||
}
|
||||
JERRY_ASSERT (lit_utf8_iterator_reached_buffer_end (&buf_iter));
|
||||
|
||||
return length;
|
||||
} /* lit_utf8_string_length */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code point from non-empty utf-8-encoded buffer
|
||||
*
|
||||
* @return number of bytes occupied by code point in the string
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_read_code_point_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
||||
lit_utf8_size_t buf_size, /**< size of the buffer in bytes */
|
||||
lit_code_point_t *code_point) /**< @out: code point */
|
||||
{
|
||||
JERRY_ASSERT (buf_p && buf_size);
|
||||
|
||||
lit_utf8_byte_t c = (uint8_t) buf_p[0];
|
||||
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
*code_point = (uint32_t) (c & LIT_UTF8_LAST_7_BITS_MASK);
|
||||
return 1;
|
||||
}
|
||||
|
||||
lit_code_point_t ret = LIT_UNICODE_CODE_POINT_NULL;
|
||||
ecma_length_t bytes_count = 0;
|
||||
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 2;
|
||||
ret = ((uint32_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 3;
|
||||
ret = ((uint32_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 4;
|
||||
ret = ((uint32_t) (c & LIT_UTF8_LAST_3_BITS_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT (false);
|
||||
}
|
||||
|
||||
JERRY_ASSERT (buf_size >= bytes_count);
|
||||
|
||||
for (uint32_t i = 1; i < bytes_count; ++i)
|
||||
{
|
||||
ret <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
ret |= (buf_p[i] & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
}
|
||||
|
||||
*code_point = ret;
|
||||
return bytes_count;
|
||||
} /* lit_read_code_point_from_utf8 */
|
||||
|
||||
|
||||
/**
|
||||
* Calculate hash from last LIT_STRING_HASH_LAST_BYTES_COUNT characters from the buffer.
|
||||
*
|
||||
* @return ecma-string's hash
|
||||
*/
|
||||
lit_string_hash_t
|
||||
lit_utf8_string_calc_hash_last_bytes (const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */
|
||||
lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */
|
||||
{
|
||||
JERRY_ASSERT (utf8_buf_p != NULL);
|
||||
|
||||
lit_utf8_byte_t byte1 = (utf8_buf_size > 0) ? utf8_buf_p[utf8_buf_size - 1] : 0;
|
||||
lit_utf8_byte_t byte2 = (utf8_buf_size > 1) ? utf8_buf_p[utf8_buf_size - 2] : 0;
|
||||
|
||||
uint32_t t1 = (uint32_t) byte1 + (uint32_t) byte2;
|
||||
uint32_t t2 = t1 * 0x24418b66;
|
||||
uint32_t t3 = (t2 >> 16) ^ (t2 & 0xffffu);
|
||||
uint32_t t4 = (t3 >> 8) ^ (t3 & 0xffu);
|
||||
|
||||
return (lit_string_hash_t) t4;
|
||||
} /* lit_utf8_string_calc_hash_last_bytes */
|
||||
|
||||
/**
|
||||
* Return code unit at the specified position in string
|
||||
*
|
||||
* NOTE:
|
||||
* code_unit_offset should be less then string's length
|
||||
*
|
||||
* @return code unit value
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t utf8_buf_size, /**< string size in bytes */
|
||||
ecma_length_t code_unit_offset) /**< ofset of a code_unit */
|
||||
{
|
||||
lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_buf_p, utf8_buf_size);
|
||||
ecma_char_t code_unit;
|
||||
|
||||
do
|
||||
{
|
||||
JERRY_ASSERT (!lit_utf8_iterator_reached_buffer_end (&iter));
|
||||
code_unit = lit_utf8_iterator_read_code_unit_and_increment (&iter);
|
||||
}
|
||||
while (code_unit_offset--);
|
||||
|
||||
return code_unit;
|
||||
} /* lit_utf8_string_code_unit_at */
|
||||
|
||||
/**
|
||||
* Return number of bytes occupied by a unicode character in utf-8 representation
|
||||
*
|
||||
* @return size of a unicode character in utf-8 format
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t first_byte) /**< first byte of a utf-8 byte sequence */
|
||||
{
|
||||
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
else if ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT ((first_byte & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER);
|
||||
return 4;
|
||||
}
|
||||
} /* lit_get_unicode_char_size_by_utf8_first_byte */
|
||||
|
||||
/**
|
||||
* Convert code_unit to utf-8 representation
|
||||
*
|
||||
* @return bytes count, stored required to represent specified code unit
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_code_unit_to_utf8 (ecma_char_t code_unit, /**< code unit */
|
||||
lit_utf8_byte_t *buf_p) /**< buffer where to store the result,
|
||||
* its size should be at least MAX_BYTES_IN_CODE_UNIT */
|
||||
{
|
||||
return lit_code_point_to_utf8 (code_unit, buf_p);
|
||||
} /* lit_code_unit_to_utf8 */
|
||||
|
||||
/**
|
||||
* Convert code point to utf-8 representation
|
||||
*
|
||||
* @return bytes count, stored required to represent specified code unit
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
|
||||
lit_utf8_byte_t *buf) /**< buffer where to store the result,
|
||||
* its size should be at least 4 bytes */
|
||||
{
|
||||
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
buf[0] = (lit_utf8_byte_t) code_point;
|
||||
return 1;
|
||||
}
|
||||
else if (code_point <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
uint32_t code_point_bits = code_point;
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_5_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_point_bits);
|
||||
|
||||
buf[0] = LIT_UTF8_2_BYTE_MARKER | first_byte_bits;
|
||||
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
return 2;
|
||||
}
|
||||
else if (code_point <= LIT_UTF8_3_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
uint32_t code_point_bits = code_point;
|
||||
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_4_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_point_bits);
|
||||
|
||||
buf[0] = LIT_UTF8_3_BYTE_MARKER | first_byte_bits;
|
||||
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
buf[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MAX);
|
||||
|
||||
uint32_t code_point_bits = code_point;
|
||||
lit_utf8_byte_t fourth_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_3_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_point_bits);
|
||||
|
||||
buf[0] = LIT_UTF8_4_BYTE_MARKER | first_byte_bits;
|
||||
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
buf[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
|
||||
buf[3] = LIT_UTF8_EXTRA_BYTE_MARKER | fourth_byte_bits;
|
||||
return 4;
|
||||
}
|
||||
} /* lit_code_unit_to_utf8 */
|
||||
|
||||
/**
|
||||
* Compare utf-8 string to utf-8 string
|
||||
*
|
||||
* @return true - if strings are equal;
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_compare_utf8_strings (const lit_utf8_byte_t *string1_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string1_size, /**< string size */
|
||||
const lit_utf8_byte_t *string2_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string2_size) /**< string size */
|
||||
{
|
||||
if (string1_size != string2_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return memcmp (string1_p, string2_p, string1_size) == 0;
|
||||
} /* lit_compare_utf8_strings */
|
||||
|
||||
/**
|
||||
* Relational compare of utf-8 strings
|
||||
*
|
||||
* First string is less than second string if:
|
||||
* - strings are not equal;
|
||||
* - first string is prefix of second or is lexicographically less than second.
|
||||
*
|
||||
* @return true - if first string is less than second string,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string1_size, /**< string size */
|
||||
const lit_utf8_byte_t *string2_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string2_size) /**< string size */
|
||||
{
|
||||
lit_utf8_iterator_t iter1 = lit_utf8_iterator_create (string1_p, string1_size);
|
||||
lit_utf8_iterator_t iter2 = lit_utf8_iterator_create (string2_p, string2_size);
|
||||
|
||||
while (!lit_utf8_iterator_reached_buffer_end (&iter1)
|
||||
&& !lit_utf8_iterator_reached_buffer_end (&iter2))
|
||||
{
|
||||
ecma_char_t code_point1 = lit_utf8_iterator_read_code_unit_and_increment (&iter1);
|
||||
ecma_char_t code_point2 = lit_utf8_iterator_read_code_unit_and_increment (&iter2);
|
||||
if (code_point1 < code_point2)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (code_point1 > code_point2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return (lit_utf8_iterator_reached_buffer_end (&iter1) && !lit_utf8_iterator_reached_buffer_end (&iter2));
|
||||
} /* lit_compare_utf8_strings_relational */
|
||||
@@ -0,0 +1,80 @@
|
||||
/* Copyright 2015 Samsung Electronics Co., Ltd.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LIT_UNICODE_HELPERS_H
|
||||
#define LIT_UNICODE_HELPERS_H
|
||||
|
||||
#include "jrt.h"
|
||||
#include "lit-globals.h"
|
||||
|
||||
/**
|
||||
* Null character (used in few cases as utf-8 string end marker)
|
||||
*/
|
||||
#define LIT_BYTE_NULL (0)
|
||||
|
||||
/**
|
||||
* Represents an iterator over utf-8 buffer
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
lit_utf8_size_t buf_offset; /* current offset in the buffer */
|
||||
lit_utf8_size_t buf_size; /* buffer length */
|
||||
const lit_utf8_byte_t *buf_p; /* buffer */
|
||||
lit_code_point_t code_point; /* code point is saved here when processed Unicode character is higher than
|
||||
* 0xFFFF */
|
||||
} lit_utf8_iterator_t;
|
||||
|
||||
/* validation */
|
||||
bool lit_is_utf8_string_valid (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/* iteration */
|
||||
lit_utf8_iterator_t lit_utf8_iterator_create (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
ecma_char_t lit_utf8_iterator_read_code_unit_and_increment (lit_utf8_iterator_t *);
|
||||
bool lit_utf8_iterator_reached_buffer_end (const lit_utf8_iterator_t *);
|
||||
|
||||
/* size */
|
||||
lit_utf8_size_t lit_zt_utf8_string_size (const lit_utf8_byte_t *);
|
||||
|
||||
/* length */
|
||||
ecma_length_t lit_utf8_string_length (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/* hash */
|
||||
lit_string_hash_t lit_utf8_string_calc_hash_last_bytes (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/* code unit access */
|
||||
ecma_char_t lit_utf8_string_code_unit_at (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_length_t);
|
||||
lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t);
|
||||
|
||||
/* conversion */
|
||||
lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t, lit_utf8_byte_t *);
|
||||
lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t, lit_utf8_byte_t *);
|
||||
|
||||
/* comparison */
|
||||
bool lit_compare_utf8_strings (const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t,
|
||||
const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t);
|
||||
|
||||
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p,
|
||||
lit_utf8_size_t,
|
||||
const lit_utf8_byte_t *string2_p,
|
||||
lit_utf8_size_t);
|
||||
|
||||
/* read code point from buffer */
|
||||
lit_utf8_size_t lit_read_code_point_from_utf8 (const lit_utf8_byte_t *,
|
||||
lit_utf8_size_t,
|
||||
lit_code_point_t *);
|
||||
|
||||
#endif /* LIT_UNICODE_HELPERS_H */
|
||||
Reference in New Issue
Block a user