[transliteration] Adding structs/header stuff for transliterator lookup by script/language

This commit is contained in:
Al
2015-06-23 15:34:38 -05:00
parent 8fb6a28e9c
commit c3143e5291

View File

@@ -10,6 +10,7 @@
#include "string_utils.h"
#include "trie.h"
#include "trie_search.h"
#include "unicode_scripts.h"
#define DEFAULT_TRANSLITERATION_PATH "../data/transliteration/transliteration.dat"
@@ -66,10 +67,32 @@ VECTOR_INIT_FREE_DATA(transliteration_replacement_array, transliteration_replace
KHASH_MAP_INIT_STR(str_transliterator, transliterator_t *)
#define kh_script_lang_hash(key) ((uint64_t)(key).script ^ (((key).language == NULL) ? 0 : kh_str_hash_func((key).language)))
#define kh_script_lang_equal(a, b) (((a).script == (b).script) && strcmp((a).language, (b).language) == 0)
#define MAX_LANGUAGE_LEN 4
typedef struct script_language {
script_t script;
char language[MAX_LANGUAGE_LEN];
} script_language_t;
typedef struct transliterator_index {
size_t transliterator_index;
size_t num_transliterators;
} transliterator_index_t;
#define NULL_TRANSLITERATOR_INDEX (transliterator_index_t) {0, 0}
KHASH_INIT(script_language_index, script_language_t, transliterator_index_t, 1, kh_script_lang_hash, kh_script_lang_equal)
typedef struct transliteration_table {
khash_t(str_transliterator) *transliterators;
step_array *steps;
khash_t(script_language_index) *script_languages;
cstring_array *transliterator_names;
step_array *steps;
trie_t *trie;
transliteration_replacement_array *replacements;
@@ -131,6 +154,18 @@ bool transliteration_table_add_transliterator(transliterator_t *trans);
transliterator_t *get_transliterator(char *name);
char *transliterate(char *trans_name, char *str, size_t len);
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
#define foreach_transliterator(script, language, transliterator_var, code) do { \
transliterator_index_t __index = get_transliterator_index_for_script_language(script, language); \
for (int __i = __index.transliterator_index; __i < __index.transliterator_index + __index.num_transliterators; __i++) { \
transliterator_var = cstring_array_get_token(trans_table->transliterator_names, __i); \
if (transliterator_var == NULL) break; \
code; \
} \
} while (0);
bool transliteration_table_write(FILE *file);
bool transliteration_table_save(char *filename);