From 2e54ca3575640170f755d39fc6b7c514db01df3b Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 21 Jun 2015 05:42:10 -0500 Subject: [PATCH] [transliteration] including script data file, adding len to transliterate API for tokenized transliteration --- src/transliterate.c | 10 +++++----- src/transliterate.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transliterate.c b/src/transliterate.c index f7f0e3c4..18eea5c4 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -1,5 +1,6 @@ #include #include "transliterate.h" +#include "transliteration_scripts_data.c" #include "file_utils.h" #define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA @@ -641,7 +642,7 @@ static inline phrase_array *phrase_array_create_if_null(phrase_array *phrases) { } -char *transliterate(char *trans_name, char *str) { +char *transliterate(char *trans_name, char *str, size_t len) { if (trans_name == NULL || str == NULL || trans_table == NULL) return NULL; trie_t *trie = trans_table->trie; @@ -651,7 +652,6 @@ char *transliterate(char *trans_name, char *str) { return NULL; } - size_t len = strlen(str); log_debug("len = %zu\n", len); str = strdup(str); @@ -728,8 +728,6 @@ char *transliterate(char *trans_name, char *str) { trie_prefix_result_t prev_result; trie_node_t node; - len = strlen(str); - new_str = char_array_new_size(len); transliteration_state_t state = TRANSLITERATION_DEFAULT_STATE; @@ -957,6 +955,7 @@ char *transliterate(char *trans_name, char *str) { if (utf8proc_normalized != NULL) { char *old_str = str; str = (char *)utf8proc_normalized; + len = strlen(str); free(old_str); } log_debug("Got unicode normalization step, new str=%s, len=%lu\n", str, strlen(str)); @@ -964,8 +963,9 @@ char *transliterate(char *trans_name, char *str) { // Recursive call here shouldn't hurt too much, happens in only a few languages and only 2-3 calls deep log_debug("Got STEP_TYPE_TRANSFORM, step=%s\n", step_name); char *old_str = str; - str = transliterate(step_name, str); + str = transliterate(step_name, str, len); log_debug("Transform result = %s\n", str); + len = strlen(str); free(old_str); } diff --git a/src/transliterate.h b/src/transliterate.h index 2067eff2..44654541 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -129,7 +129,7 @@ void transliterator_destroy(transliterator_t *self); bool transliteration_table_add_transliterator(transliterator_t *trans); transliterator_t *get_transliterator(char *name); -char *transliterate(char *trans_name, char *str); +char *transliterate(char *trans_name, char *str, size_t len); bool transliteration_table_write(FILE *file); bool transliteration_table_save(char *filename);