[numx] header and table builder changes to support whole words languages

This commit is contained in:
Al
2015-06-12 16:10:53 -04:00
parent 1c4657b631
commit 5c2839e534
3 changed files with 9 additions and 6 deletions

View File

@@ -18,6 +18,7 @@ extern "C" {
#include "string_utils.h"
#include "tokens.h"
#include "trie.h"
#include "trie_search.h"
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR "/numex/numex.dat"
@@ -97,6 +98,7 @@ VECTOR_INIT_FREE_DATA(ordinal_indicator_array, ordinal_indicator_t *, ordinal_in
typedef struct numex_language {
char *name;
bool whole_tokens_only;
size_t rules_index;
size_t num_rules;
size_t ordinals_index;
@@ -114,25 +116,25 @@ typedef struct {
numex_table_t *get_numex_table(void);
numex_language_t *numex_language_new(char *name, size_t rules_index, size_t num_rules, size_t ordinals_index, size_t num_ordinals);
numex_language_t *numex_language_new(char *name, bool whole_tokens_only, size_t rules_index, size_t num_rules, size_t ordinals_index, size_t num_ordinals);
void numex_language_destroy(numex_language_t *self);
bool numex_table_add_language(numex_language_t *language);
numex_language_t *get_numex_language(char *name);
typedef struct numex_phrase {
typedef struct numex_result {
int64_t value;
gender_t gender;
grammatical_category_t category;
bool is_ordinal;
size_t start;
size_t len;
} numex_phrase_t;
} numex_result_t;
VECTOR_INIT(numex_phrase_array, numex_phrase_t)
VECTOR_INIT(numex_result_array, numex_result_t)
numex_phrase_array *convert_numeric_expressions(char *str, char *lang);
numex_result_array *convert_numeric_expressions(char *str, char *lang);
bool numex_table_write(FILE *file);
bool numex_table_save(char *filename);

View File

@@ -11,6 +11,7 @@ typedef struct numex_rule_source {
typedef struct numex_language_source {
char *name;
bool whole_tokens_only;
size_t rule_index;
size_t num_rules;
size_t ordinal_indicator_index;

View File

@@ -123,7 +123,7 @@ int main(int argc, char **argv) {
trie_add(numex_table->trie, str_key, value);
}
numex_language_t *language = numex_language_new(lang_source.name, lang_source.rule_index, lang_source.num_rules, lang_source.ordinal_indicator_index, lang_source.num_ordinal_indicators);
numex_language_t *language = numex_language_new(lang_source.name, lang_source.whole_tokens_only, lang_source.rule_index, lang_source.num_rules, lang_source.ordinal_indicator_index, lang_source.num_ordinal_indicators);
numex_table_add_language(language);
}