diff --git a/src/transliterate.c b/src/transliterate.c index 18eea5c4..32198c7e 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -1,6 +1,5 @@ #include #include "transliterate.h" -#include "transliteration_scripts_data.c" #include "file_utils.h" #define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA @@ -991,6 +990,14 @@ void transliteration_table_destroy(void) { kh_destroy(str_transliterator, trans_table->transliterators); } + if (trans_table->script_languages) { + kh_destroy(script_language_index, trans_table->script_languages); + } + + if (trans_table->transliterator_names) { + cstring_array_destroy(trans_table->transliterator_names); + } + if (trans_table->steps) { step_array_destroy(trans_table->steps); } @@ -1010,6 +1017,7 @@ void transliteration_table_destroy(void) { free(trans_table); } + transliteration_table_t *transliteration_table_init(void) { transliteration_table_t *trans_table = get_transliteration_table(); @@ -1026,6 +1034,16 @@ transliteration_table_t *transliteration_table_init(void) { goto exit_trans_table_created; } + trans_table->script_languages = kh_init(script_language_index); + if (trans_table->script_languages == NULL) { + goto exit_trans_table_created; + } + + trans_table->transliterator_names = cstring_array_new(); + if (trans_table->transliterator_names == NULL) { + goto exit_trans_table_created; + } + trans_table->steps = step_array_new(); if (trans_table->steps == NULL) { goto exit_trans_table_created; @@ -1132,6 +1150,33 @@ bool transliteration_table_add_transliterator(transliterator_t *trans) { return true; } +bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index) { + if (trans_table == NULL) { + return false; + } + + int ret; + khiter_t k = kh_put(script_language_index, trans_table->script_languages, script_language, &ret); + kh_value(trans_table->script_languages, k) = index; + + return true; +} + +transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language) { + if (trans_table == NULL || language == NULL || strlen(language) >= MAX_LANGUAGE_LEN) { + return NULL_TRANSLITERATOR_INDEX; + } + + script_language_t script_lang; + script_lang.script = script; + strcpy(script_lang.language, language); + + khiter_t k; + k = kh_get(script_language_index, trans_table->script_languages, script_lang); + return (k != kh_end(trans_table->script_languages)) ? kh_value(trans_table->script_languages, k) : NULL_TRANSLITERATOR_INDEX; +} + + char *transliterator_replace_strings(trie_t *trie, cstring_array *replacements, char *input) { phrase_array *phrases; char_array *str; @@ -1413,6 +1458,86 @@ bool transliteration_table_read(FILE *f) { log_debug("Read transliterators\n"); + size_t num_script_languages; + if (!file_read_uint64(f, (uint64_t *)&num_script_languages)) { + goto exit_trans_table_load_error; + } + + log_debug("num_script_languages = %zu\n", num_script_languages); + + script_language_t script_language; + transliterator_index_t index; + + size_t language_len = 0; + char language[MAX_LANGUAGE_LEN] = ""; + + for (i = 0; i < num_script_languages; i++) { + if (!file_read_uint32(f, (uint32_t *)&script_language.script)) { + goto exit_trans_table_load_error; + } + + if (!file_read_uint64(f, (uint64_t *)&language_len) || language_len >= MAX_LANGUAGE_LEN) { + goto exit_trans_table_load_error; + } + + if (language_len == 0) { + script_language.language[0] = '\0'; + } else if (!file_read_chars(f, (char *)language, language_len)) { + goto exit_trans_table_load_error; + } else { + strcpy(script_language.language, language); + } + + + if (!file_read_uint64(f, (uint64_t *)&index.transliterator_index)) { + goto exit_trans_table_load_error; + } + + if (!file_read_uint64(f, (uint64_t *)&index.num_transliterators)) { + goto exit_trans_table_load_error; + } + + log_debug("Adding script language key={%d, %s}, value={%d, %d}\n", script_language.script, script_language.language, index.transliterator_index, index.num_transliterators); + + transliteration_table_add_script_language(script_language, index); + } + + size_t trans_table_num_strings; + + if (!file_read_uint64(f, (uint64_t *)&trans_table_num_strings)) { + goto exit_trans_table_load_error; + } + + log_debug("trans_table_num_strings=%zu\n", trans_table_num_strings); + + size_t trans_name_str_len; + + if (!file_read_uint64(f, (uint64_t *)&trans_name_str_len)) { + goto exit_trans_table_load_error; + } + + log_debug("Creating char_array with size=%zu\n", trans_name_str_len); + + char_array *array = char_array_new_size(trans_name_str_len); + + if (!file_read_chars(f, array->a, trans_name_str_len)) { + goto exit_trans_table_load_error; + } + + array->n = trans_name_str_len; + + cstring_array_destroy(trans_table->transliterator_names); + log_debug("Destroyed current cstring_array\n"); + + log_debug("char_array len=%zu\n", array->n); + + trans_table->transliterator_names = cstring_array_from_char_array(array); + log_debug("Set trans_table->transliterator_names\n"); + + if (cstring_array_num_strings(trans_table->transliterator_names) != trans_table_num_strings) { + goto exit_trans_table_load_error; + } + size_t num_steps; if (!file_read_uint64(f, (uint64_t *)&num_steps)) { @@ -1586,9 +1711,59 @@ bool transliteration_table_write(FILE *f) { } }) + int i; + + size_t num_script_languages = kh_size(trans_table->script_languages); + + if (!file_write_uint64(f, (uint64_t)num_script_languages)) { + return false; + } + + script_language_t script_language; + transliterator_index_t index; + + kh_foreach(trans_table->script_languages, script_language, index, { + if (!file_write_uint32(f, (uint32_t)script_language.script)) { + return false; + } + + size_t language_len = strlen(script_language.language); + + if (!file_write_uint64(f, (uint64_t)language_len)) { + return false; + } + + if (language_len > 0 && !file_write_chars(f, script_language.language, language_len)) { + return false; + } + + if (!file_write_uint64(f, (uint64_t)index.transliterator_index)) { + return false; + } + + if (!file_write_uint64(f, (uint64_t)index.num_transliterators)) { + return false; + } + }) + + size_t num_trans_names = trans_table->transliterator_names->indices->n; + + if (!file_write_uint64(f, (uint64_t)num_trans_names)) { + return false; + } + + size_t trans_names_str_len = trans_table->transliterator_names->str->n; + + if (!file_write_uint64(f, (uint64_t)trans_names_str_len)) { + return false; + } + + if (!file_write_chars(f, trans_table->transliterator_names->str->a, trans_names_str_len)) { + return false; + } + transliteration_step_t *step; - int i; size_t num_steps = trans_table->steps->n;