Add core unicode functionality.

Add utf-8 processing routines.
Change ecma_char_t from char/uint16_t to uint16_t.
Apply all utf-8 processing routines.
Change char to jerry_api_char in API functions' declarations.

JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
This commit is contained in:
Andrey Shitov
2015-06-29 19:17:17 +03:00
parent c4b0cd2196
commit fd9ff8e3bd
56 changed files with 2468 additions and 1480 deletions
+51 -60
View File
@@ -16,6 +16,7 @@
#include "lit-literal-storage.h"
#include "ecma-helpers.h"
#include "lit-literal.h"
#include "lit-magic-strings.h"
/**
* Literal storage
@@ -57,18 +58,18 @@ lit_charset_record_t::set_prev (rcs_record_t *prev_rec_p) /**< pointer to the re
* Set the charset of the record
*/
void
lit_charset_record_t::set_charset (const ecma_char_t *str, /**< buffer containing characters to set */
size_t size) /**< size of the buffer in bytes */
lit_charset_record_t::set_charset (const lit_utf8_byte_t *str, /**< buffer containing characters to set */
lit_utf8_size_t size) /**< size of the buffer in bytes */
{
JERRY_ASSERT (header_size () + size == get_size () - get_alignment_bytes_count ());
rcs_record_iterator_t it ((rcs_recordset_t *)&lit_storage, (rcs_record_t *)this);
it.skip (header_size ());
for (size_t i = 0; i < get_length (); ++i)
for (lit_utf8_size_t i = 0; i < get_length (); ++i)
{
it.write<ecma_char_t> (str[i]);
it.skip<ecma_char_t> ();
it.write<lit_utf8_byte_t> (str[i]);
it.skip<lit_utf8_byte_t> ();
}
} /* lit_charset_record_t::set_charset */
@@ -77,38 +78,39 @@ lit_charset_record_t::set_charset (const ecma_char_t *str, /**< buffer containin
*
* @return number of code units written to the buffer
*/
ecma_length_t
lit_charset_record_t::get_charset (ecma_char_t *buff, /**< output buffer */
lit_utf8_size_t
lit_charset_record_t::get_charset (lit_utf8_byte_t *buff, /**< output buffer */
size_t size) /**< size of the output buffer in bytes */
{
JERRY_ASSERT (buff && size >= sizeof (ecma_char_t));
JERRY_ASSERT (buff && size >= sizeof (lit_utf8_byte_t));
rcs_record_iterator_t it ((rcs_recordset_t *)&lit_storage, (rcs_record_t *)this);
it.skip (header_size ());
ecma_length_t len = get_length ();
size_t i;
lit_utf8_size_t len = get_length ();
lit_utf8_size_t i;
for (i = 0; i < len && size > sizeof (ecma_char_t); ++i)
for (i = 0; i < len && size > 0; ++i)
{
buff[i] = it.read<ecma_char_t> ();
it.skip<ecma_char_t> ();
size -= sizeof (ecma_char_t);
buff[i] = it.read<lit_utf8_byte_t> ();
it.skip<lit_utf8_byte_t> ();
size -= sizeof (lit_utf8_byte_t);
}
return (ecma_length_t) i;
return i;
} /* lit_charset_record_t::get_charset */
/**
* Compares characters from the record to the string
*
* @return 0 if strings are equal
* -1 if str2 is greater
* 1 if str2 is less
* -1 if str_to_compare_with is greater
* 1 if str_to_compare_with is less
*/
int
lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< buffer with string to compare */
size_t length) /**< length of the string in buffer str2 */
lit_charset_record_t::compare_utf8 (const lit_utf8_byte_t *str_to_compare_with, /**< buffer with string to compare */
lit_utf8_size_t str_size) /**< size of the string */
{
TODO ("Support utf-8 in comparison.");
size_t i;
if (get_length () == 0)
@@ -132,9 +134,9 @@ lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< b
it_this.skip (header_size ());
for (i = 0; i < get_length () && i < length; i++)
for (i = 0; i < get_length () && i < str_size; i++)
{
ecma_char_t chr = it_this.read<ecma_char_t> ();
lit_utf8_byte_t chr = it_this.read<lit_utf8_byte_t> ();
if (chr > str_to_compare_with[i])
{
@@ -145,10 +147,10 @@ lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< b
return -1;
}
it_this.skip<ecma_char_t> ();
it_this.skip<lit_utf8_byte_t> ();
}
if (i < length)
if (i < str_size)
{
return -1;
}
@@ -163,7 +165,7 @@ lit_charset_record_t::compare_zt (const ecma_char_t *str_to_compare_with, /**< b
* false otherwise
*/
bool
lit_charset_record_t::equal (lit_charset_record_t *rec) /**< charset record to compare with */
lit_charset_record_t::is_equal (lit_charset_record_t *rec) /**< charset record to compare with */
{
if (get_length () != rec->get_length ())
{
@@ -176,31 +178,19 @@ lit_charset_record_t::equal (lit_charset_record_t *rec) /**< charset record to c
it_this.skip (header_size ());
it_record.skip (rec->header_size ());
for (ecma_length_t i = 0; i < get_length (); i++)
for (lit_utf8_size_t i = 0; i < get_length (); i++)
{
if (it_this.read<ecma_char_t> () != it_record.read<ecma_char_t> ())
if (it_this.read<lit_utf8_byte_t> () != it_record.read<lit_utf8_byte_t> ())
{
return false;
}
it_this.skip<ecma_char_t> ();
it_record.skip<ecma_char_t> ();
it_this.skip<lit_utf8_byte_t> ();
it_record.skip<lit_utf8_byte_t> ();
}
return true;
} /* lit_charset_record_t::equal */
/**
* Compares this lit_charset_record_t records with zero-terminated string for equality
*
* @return true if compared instances are equal
* false otherwise
*/
bool
lit_charset_record_t::equal_zt (const ecma_char_t *str) /**< zero-terminated string */
{
return equal_non_zt (str, ecma_zt_string_length (str));
} /* lit_charset_record_t::equal_zt */
} /* lit_charset_record_t::is_equal */
/**
* Compare this lit_charset_record_t record with string (which could contain '\0' characters) for equality
@@ -209,24 +199,24 @@ lit_charset_record_t::equal_zt (const ecma_char_t *str) /**< zero-terminated str
* false otherwise
*/
bool
lit_charset_record_t::equal_non_zt (const ecma_char_t *str, /**< string to compare with */
ecma_length_t len) /**< length of the string */
lit_charset_record_t::is_equal_utf8_string (const lit_utf8_byte_t *str, /**< string to compare with */
lit_utf8_size_t str_size) /**< length of the string */
{
rcs_record_iterator_t it_this (&lit_storage, this);
it_this.skip (header_size ());
for (ecma_length_t i = 0; i < get_length () && i < len; i++)
for (lit_utf8_size_t i = 0; i < get_length () && i < str_size; i++)
{
if (it_this.read<ecma_char_t> () != str[i])
if (it_this.read<lit_utf8_byte_t> () != str[i])
{
return false;
}
it_this.skip<ecma_char_t> ();
it_this.skip<lit_utf8_byte_t> ();
}
return get_length () == len;
return get_length () == str_size;
} /* lit_charset_record_t::equal_non_zt */
/**
@@ -235,9 +225,9 @@ lit_charset_record_t::equal_non_zt (const ecma_char_t *str, /**< string to compa
* @return pointer to the created record
*/
lit_charset_record_t *
lit_literal_storage_t::create_charset_record (const ecma_char_t *str, /**< string to be placed in the record */
size_t buf_size) /**< size in bytes of the buffer which holds the
* string */
lit_literal_storage_t::create_charset_record (const lit_utf8_byte_t *str, /**< string to be placed in the record */
lit_utf8_size_t buf_size) /**< size in bytes of the buffer which holds the
* string */
{
const size_t alignment = lit_charset_record_t::size (buf_size) - (lit_charset_record_t::header_size () + buf_size);
@@ -245,7 +235,7 @@ lit_literal_storage_t::create_charset_record (const ecma_char_t *str, /**< strin
ret->set_alignment_bytes_count (alignment);
ret->set_charset (str, buf_size);
ret->set_hash (ecma_chars_buffer_calc_hash_last_chars (str, ret->get_length ()));
ret->set_hash (lit_utf8_string_calc_hash_last_bytes (str, ret->get_length ()));
return ret;
} /* lit_literal_storage_t::create_charset_record */
@@ -319,8 +309,9 @@ lit_literal_storage_t::dump ()
for (size_t i = 0; i < lit_p->get_length (); ++i)
{
printf ("%c", it_this.read<ecma_char_t> ());
it_this.skip<ecma_char_t> ();
FIXME ("Support proper printing of characters which occupy more than one byte.")
printf ("%c", it_this.read<lit_utf8_byte_t> ());
it_this.skip<lit_utf8_byte_t> ();
}
printf (" : STRING");
@@ -330,7 +321,7 @@ lit_literal_storage_t::dump ()
case LIT_MAGIC_STR:
{
lit_magic_string_id_t id = lit_magic_record_get_magic_str_id (rec_p);
printf ("%s : MAGIC STRING", lit_get_magic_string_zt (id));
printf ("%s : MAGIC STRING", lit_get_magic_string_utf8 (id));
printf (" [id=%d] ", id);
break;
@@ -338,7 +329,7 @@ lit_literal_storage_t::dump ()
case LIT_MAGIC_STR_EX:
{
lit_magic_string_ex_id_t id = lit_magic_record_ex_get_magic_str_id (rec_p);
printf ("%s : EXT MAGIC STRING", lit_get_magic_string_ex_zt (id));
printf ("%s : EXT MAGIC STRING", lit_get_magic_string_ex_utf8 (id));
printf (" [id=%d] ", id);
break;
@@ -353,8 +344,8 @@ lit_literal_storage_t::dump ()
}
else
{
ecma_char_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
ecma_number_to_zt_string (lit_p->get_number (), buff, ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER);
lit_utf8_byte_t buff[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
ecma_number_to_utf8_string (lit_p->get_number (), buff, sizeof (buff));
printf ("%s : NUMBER", buff);
}
@@ -465,12 +456,12 @@ lit_literal_storage_t::get_record_size (rcs_record_t* rec_p) /**< pointer to a r
}
} /* lit_literal_storage_t::get_record_size */
template void rcs_record_iterator_t::skip<ecma_char_t> ();
template void rcs_record_iterator_t::skip<uint8_t> ();
template void rcs_record_iterator_t::skip<uint16_t> ();
template void rcs_record_iterator_t::skip<uint32_t> ();
template void rcs_record_iterator_t::write<ecma_char_t> (ecma_char_t);
template ecma_char_t rcs_record_iterator_t::read<ecma_char_t> ();
template void rcs_record_iterator_t::write<uint8_t> (uint8_t);
template uint8_t rcs_record_iterator_t::read<uint8_t> ();
template void rcs_record_iterator_t::write<ecma_number_t> (ecma_number_t);
template ecma_number_t rcs_record_iterator_t::read<ecma_number_t> ();