From 5c2839e534c87e79bcbaf5da7c7e4c225fd593ef Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 12 Jun 2015 16:10:53 -0400 Subject: [PATCH] [numx] header and table builder changes to support whole words languages --- src/numex.h | 12 +++++++----- src/numex_rule.h | 1 + src/numex_table_builder.c | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/numex.h b/src/numex.h index f966c7b7..901584b4 100644 --- a/src/numex.h +++ b/src/numex.h @@ -18,6 +18,7 @@ extern "C" { #include "string_utils.h" #include "tokens.h" #include "trie.h" +#include "trie_search.h" #define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR "/numex/numex.dat" @@ -97,6 +98,7 @@ VECTOR_INIT_FREE_DATA(ordinal_indicator_array, ordinal_indicator_t *, ordinal_in typedef struct numex_language { char *name; + bool whole_tokens_only; size_t rules_index; size_t num_rules; size_t ordinals_index; @@ -114,25 +116,25 @@ typedef struct { numex_table_t *get_numex_table(void); -numex_language_t *numex_language_new(char *name, size_t rules_index, size_t num_rules, size_t ordinals_index, size_t num_ordinals); +numex_language_t *numex_language_new(char *name, bool whole_tokens_only, size_t rules_index, size_t num_rules, size_t ordinals_index, size_t num_ordinals); void numex_language_destroy(numex_language_t *self); bool numex_table_add_language(numex_language_t *language); numex_language_t *get_numex_language(char *name); -typedef struct numex_phrase { +typedef struct numex_result { int64_t value; gender_t gender; grammatical_category_t category; bool is_ordinal; size_t start; size_t len; -} numex_phrase_t; +} numex_result_t; -VECTOR_INIT(numex_phrase_array, numex_phrase_t) +VECTOR_INIT(numex_result_array, numex_result_t) -numex_phrase_array *convert_numeric_expressions(char *str, char *lang); +numex_result_array *convert_numeric_expressions(char *str, char *lang); bool numex_table_write(FILE *file); bool numex_table_save(char *filename); diff --git a/src/numex_rule.h b/src/numex_rule.h index cd9b57b6..da2d586a 100644 --- a/src/numex_rule.h +++ b/src/numex_rule.h @@ -11,6 +11,7 @@ typedef struct numex_rule_source { typedef struct numex_language_source { char *name; + bool whole_tokens_only; size_t rule_index; size_t num_rules; size_t ordinal_indicator_index; diff --git a/src/numex_table_builder.c b/src/numex_table_builder.c index f922196c..60b7638d 100644 --- a/src/numex_table_builder.c +++ b/src/numex_table_builder.c @@ -123,7 +123,7 @@ int main(int argc, char **argv) { trie_add(numex_table->trie, str_key, value); } - numex_language_t *language = numex_language_new(lang_source.name, lang_source.rule_index, lang_source.num_rules, lang_source.ordinal_indicator_index, lang_source.num_ordinal_indicators); + numex_language_t *language = numex_language_new(lang_source.name, lang_source.whole_tokens_only, lang_source.rule_index, lang_source.num_rules, lang_source.ordinal_indicator_index, lang_source.num_ordinal_indicators); numex_table_add_language(language); }