#ifndef NUMEX_H #define NUMEX_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include #include "collections.h" #include "config.h" #include "constants.h" #include "klib/khash.h" #include "string_utils.h" #include "tokens.h" #include "trie.h" #define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR "/numex/numex.dat" #define GENDER_MASCULINE_PREFIX "m" #define GENDER_FEMININE_PREFIX "f" #define GENDER_NEUTER_PREFIX "n" #define GENDER_NONE_PREFIX "d" typedef enum { GENDER_MASCULINE, GENDER_FEMININE, GENDER_NEUTER, GENDER_NONE } gender_t; #define CATEGORY_PLURAL_PREFIX "p" #define CATEGORY_DEFAULT_PREFIX "s" typedef enum { CATEGORY_PLURAL, CATEGORY_DEFAULT } grammatical_category_t; typedef enum { NUMEX_LEFT_CONTEXT_NONE, NUMEX_LEFT_CONTEXT_ADD, NUMEX_LEFT_CONTEXT_MULTIPLY } numex_left_context; typedef enum { NUMEX_RIGHT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD } numex_right_context; typedef enum { NUMEX_CARDINAL_RULE, NUMEX_ORDINAL_RULE, NUMEX_ORDINAL_INDICATOR_RULE, NUMEX_DECIMAL_RULE, NUMEX_NEGATION_RULE, NUMEX_STOPWORD, NUMEX_NULL } numex_rule_type; typedef struct numex_rule { numex_left_context left_context_type; numex_right_context right_context_type; numex_rule_type rule_type; gender_t gender; grammatical_category_t category; uint32_t radix; int64_t value; } numex_rule_t; #define NUMEX_NULL_RULE_INDEX 0 #define NUMEX_STOPWORD_INDEX 1 #define NUMEX_NULL_RULE (numex_rule_t) {NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_NONE, NUMEX_NULL, GENDER_NONE, CATEGORY_DEFAULT, 0, 0} #define NUMEX_STOPWORD_RULE (numex_rule_t) {NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_NONE, NUMEX_STOPWORD, GENDER_NONE, CATEGORY_DEFAULT, 0, 0} VECTOR_INIT(numex_rule_array, numex_rule_t) #define ORDINAL_NAMESPACE_CHAR "o" typedef struct ordinal_indicator { char *key; gender_t gender; grammatical_category_t category; char *suffix; } ordinal_indicator_t; ordinal_indicator_t *ordinal_indicator_new(char *key, gender_t gender, grammatical_category_t category, char *suffix); void ordinal_indicator_destroy(ordinal_indicator_t *self); VECTOR_INIT_FREE_DATA(ordinal_indicator_array, ordinal_indicator_t *, ordinal_indicator_destroy) typedef struct numex_language { char *name; size_t rules_index; size_t num_rules; size_t ordinals_index; size_t num_ordinals; } numex_language_t; KHASH_MAP_INIT_STR(str_numex_language, numex_language_t *) typedef struct { khash_t(str_numex_language) *languages; trie_t *trie; numex_rule_array *rules; ordinal_indicator_array *ordinal_indicators; } numex_table_t; numex_table_t *get_numex_table(void); numex_language_t *numex_language_new(char *name, size_t rules_index, size_t num_rules, size_t ordinals_index, size_t num_ordinals); void numex_language_destroy(numex_language_t *self); bool numex_table_add_language(numex_language_t *language); numex_language_t *get_numex_language(char *name); typedef struct numex_phrase { int64_t value; gender_t gender; grammatical_category_t category; bool is_ordinal; size_t start; size_t len; } numex_phrase_t; VECTOR_INIT(numex_phrase_array, numex_phrase_t) numex_phrase_array *convert_numeric_expressions(char *text, char *lang); bool numex_table_write(FILE *file); bool numex_table_save(char *filename); bool numex_module_setup(char *filename); void numex_module_teardown(void); #ifdef __cplusplus } #endif #endif