[expand] moving expand to its own module so the internal methods can be exposed, calling from libpostal.c
This commit is contained in:
@@ -12,7 +12,7 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||
CFLAGS =
|
||||
|
||||
lib_LTLIBRARIES = libpostal.la
|
||||
libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
|
||||
libpostal_la_SOURCES = strndup.c libpostal.c expand.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
|
||||
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
|
||||
libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS
|
||||
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined
|
||||
|
||||
1086
src/expand.c
Normal file
1086
src/expand.c
Normal file
File diff suppressed because it is too large
Load Diff
52
src/expand.h
Normal file
52
src/expand.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#ifndef EXPAND_H
|
||||
#define EXPAND_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "libpostal.h"
|
||||
|
||||
#include "address_dictionary.h"
|
||||
#include "collections.h"
|
||||
#include "klib/khash.h"
|
||||
#include "klib/ksort.h"
|
||||
#include "trie_search.h"
|
||||
|
||||
typedef struct phrase_language {
|
||||
char *language;
|
||||
phrase_t phrase;
|
||||
} phrase_language_t;
|
||||
|
||||
VECTOR_INIT(phrase_language_array, phrase_language_t)
|
||||
|
||||
#define ks_lt_phrase_language(a, b) ((a).phrase.start < (b).phrase.start || ((a).phrase.start == (b).phrase.start && (a).phrase.len > (b).phrase.len))
|
||||
|
||||
KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language)
|
||||
|
||||
uint64_t get_normalize_token_options(libpostal_normalize_options_t options);
|
||||
uint64_t get_normalize_string_options(libpostal_normalize_options_t options);
|
||||
|
||||
void add_normalized_strings_token(cstring_array *strings, char *str, token_t token, libpostal_normalize_options_t options);
|
||||
void add_postprocessed_string(cstring_array *strings, char *str, libpostal_normalize_options_t options);
|
||||
|
||||
address_expansion_array *valid_affix_expansions(phrase_t phrase, libpostal_normalize_options_t options);
|
||||
|
||||
void cat_affix_expansion(char_array *key, char *str, address_expansion_t expansion, token_t token, phrase_t phrase, libpostal_normalize_options_t options);
|
||||
bool add_affix_expansions(string_tree_t *tree, char *str, char *lang, token_t token, phrase_t prefix, phrase_t suffix, libpostal_normalize_options_t options, bool with_period);
|
||||
|
||||
bool expand_affixes(string_tree_t *tree, char *str, char *lang, token_t token, libpostal_normalize_options_t options);
|
||||
bool expand_affixes_period(string_tree_t *tree, char *str, char *lang, token_t token, libpostal_normalize_options_t options);
|
||||
bool add_period_affixes_or_token(string_tree_t *tree, char *str, token_t token, libpostal_normalize_options_t options);
|
||||
|
||||
string_tree_t *add_string_alternatives(char *str, libpostal_normalize_options_t options);
|
||||
|
||||
bool normalize_ordinal_suffixes(string_tree_t *tree, char *str, char *lang, token_t token, size_t i, token_t prev_token, libpostal_normalize_options_t options);
|
||||
|
||||
void add_normalized_strings_tokenized(string_tree_t *tree, char *str, token_array *tokens, libpostal_normalize_options_t options);
|
||||
|
||||
void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_strings, char *str, libpostal_normalize_options_t options);
|
||||
char **expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
|
||||
char **expand_address_root(char *input, libpostal_normalize_options_t options, size_t *n);
|
||||
void expansion_array_destroy(char **expansions, size_t n);
|
||||
|
||||
#endif
|
||||
1103
src/libpostal.c
1103
src/libpostal.c
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user