[transliteration] including script data file, adding len to transliterate API for tokenized transliteration

This commit is contained in:
Al
2015-06-21 05:42:10 -05:00
parent 79530ae974
commit 2e54ca3575
2 changed files with 6 additions and 6 deletions

View File

@@ -1,5 +1,6 @@
#include <math.h>
#include "transliterate.h"
#include "transliteration_scripts_data.c"
#include "file_utils.h"
#define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA
@@ -641,7 +642,7 @@ static inline phrase_array *phrase_array_create_if_null(phrase_array *phrases) {
}
char *transliterate(char *trans_name, char *str) {
char *transliterate(char *trans_name, char *str, size_t len) {
if (trans_name == NULL || str == NULL || trans_table == NULL) return NULL;
trie_t *trie = trans_table->trie;
@@ -651,7 +652,6 @@ char *transliterate(char *trans_name, char *str) {
return NULL;
}
size_t len = strlen(str);
log_debug("len = %zu\n", len);
str = strdup(str);
@@ -728,8 +728,6 @@ char *transliterate(char *trans_name, char *str) {
trie_prefix_result_t prev_result;
trie_node_t node;
len = strlen(str);
new_str = char_array_new_size(len);
transliteration_state_t state = TRANSLITERATION_DEFAULT_STATE;
@@ -957,6 +955,7 @@ char *transliterate(char *trans_name, char *str) {
if (utf8proc_normalized != NULL) {
char *old_str = str;
str = (char *)utf8proc_normalized;
len = strlen(str);
free(old_str);
}
log_debug("Got unicode normalization step, new str=%s, len=%lu\n", str, strlen(str));
@@ -964,8 +963,9 @@ char *transliterate(char *trans_name, char *str) {
// Recursive call here shouldn't hurt too much, happens in only a few languages and only 2-3 calls deep
log_debug("Got STEP_TYPE_TRANSFORM, step=%s\n", step_name);
char *old_str = str;
str = transliterate(step_name, str);
str = transliterate(step_name, str, len);
log_debug("Transform result = %s\n", str);
len = strlen(str);
free(old_str);
}

View File

@@ -129,7 +129,7 @@ void transliterator_destroy(transliterator_t *self);
bool transliteration_table_add_transliterator(transliterator_t *trans);
transliterator_t *get_transliterator(char *name);
char *transliterate(char *trans_name, char *str);
char *transliterate(char *trans_name, char *str, size_t len);
bool transliteration_table_write(FILE *file);
bool transliteration_table_save(char *filename);