65 lines
3.0 KiB
C
65 lines
3.0 KiB
C
#ifndef EXPAND_H
|
|
#define EXPAND_H
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include "libpostal.h"
|
|
|
|
#include "address_dictionary.h"
|
|
#include "collections.h"
|
|
#include "klib/khash.h"
|
|
#include "klib/ksort.h"
|
|
#include "trie_search.h"
|
|
|
|
typedef struct phrase_language {
|
|
char *language;
|
|
phrase_t phrase;
|
|
} phrase_language_t;
|
|
|
|
VECTOR_INIT(phrase_language_array, phrase_language_t)
|
|
|
|
#define ks_lt_phrase_language(a, b) ((a).phrase.start < (b).phrase.start || ((a).phrase.start == (b).phrase.start && (a).phrase.len > (b).phrase.len))
|
|
|
|
KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language)
|
|
|
|
uint64_t get_normalize_token_options(libpostal_normalize_options_t options);
|
|
uint64_t get_normalize_string_options(libpostal_normalize_options_t options);
|
|
|
|
void add_normalized_strings_token(cstring_array *strings, char *str, token_t token, libpostal_normalize_options_t options);
|
|
void add_postprocessed_string(cstring_array *strings, char *str, libpostal_normalize_options_t options);
|
|
|
|
address_expansion_array *valid_affix_expansions(phrase_t phrase, libpostal_normalize_options_t options);
|
|
|
|
void cat_affix_expansion(char_array *key, char *str, address_expansion_t expansion, token_t token, phrase_t phrase, libpostal_normalize_options_t options);
|
|
bool add_affix_expansions(string_tree_t *tree, char *str, char *lang, token_t token, phrase_t prefix, phrase_t suffix, libpostal_normalize_options_t options, bool with_period);
|
|
|
|
bool expand_affixes(string_tree_t *tree, char *str, char *lang, token_t token, libpostal_normalize_options_t options);
|
|
bool expand_affixes_period(string_tree_t *tree, char *str, char *lang, token_t token, libpostal_normalize_options_t options);
|
|
bool add_period_affixes_or_token(string_tree_t *tree, char *str, token_t token, libpostal_normalize_options_t options);
|
|
|
|
bool normalize_ordinal_suffixes(string_tree_t *tree, char *str, char *lang, token_t token, size_t i, token_t prev_token, libpostal_normalize_options_t options);
|
|
|
|
void add_normalized_strings_tokenized(string_tree_t *tree, char *str, token_array *tokens, libpostal_normalize_options_t options);
|
|
|
|
|
|
bool address_phrase_is_ignorable_for_components(phrase_t phrase, uint32_t address_components);
|
|
bool address_phrase_is_edge_ignorable_for_components(phrase_t phrase, uint32_t address_components);
|
|
bool address_phrase_is_possible_root_for_components(phrase_t phrase, uint32_t address_components);
|
|
bool address_phrase_is_specifier_for_components(phrase_t phrase, uint32_t address_components);
|
|
bool address_phrase_is_valid_for_components(phrase_t phrase, uint32_t address_components);
|
|
|
|
|
|
typedef enum {
|
|
EXPAND_PHRASES,
|
|
KEEP_PHRASES,
|
|
DELETE_PHRASES
|
|
} expansion_phrase_option_t;
|
|
|
|
cstring_array *expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
|
|
cstring_array *expand_address_phrase_option(char *input, libpostal_normalize_options_t options, size_t *n, expansion_phrase_option_t phrase_option);
|
|
cstring_array *expand_address_root(char *input, libpostal_normalize_options_t options, size_t *n);
|
|
void expansion_array_destroy(char **expansions, size_t n);
|
|
|
|
#endif
|