Merge branch 'master' into parser-data
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
language: c
|
||||
brances:
|
||||
only:
|
||||
- master
|
||||
env:
|
||||
global:
|
||||
- secure: "bHrAu46oecEj3gjamT+XWXtf2J0ZJCFa8tUdgM4evscaJiiwv1TtsGXyhIj/ai7DlRIPVJUtBUy6uoGGjr6GT43zTrzSxYAOMdVXZYsnTDcdL1/0dbwcIK6/u0EI377s1buGIxG1fHveWKXuXwJWDAw4KS+5HU88a42+zMbhKe4="
|
||||
|
||||
1
resources/dictionaries/it/no_number.txt
Normal file
1
resources/dictionaries/it/no_number.txt
Normal file
@@ -0,0 +1 @@
|
||||
senza numero civico|snc|s.n.c.|s n c
|
||||
@@ -15,12 +15,12 @@ corso|c.so|cso
|
||||
corte|c.te|cte
|
||||
cortile|c.ile|ctile
|
||||
ferrata|f.rata|frata
|
||||
fondamenta|f.ta|fta
|
||||
fondamenta|f.ta|fta|fon
|
||||
forca|f.ca|fca
|
||||
forcella|forc.la|forcla
|
||||
frazione|fraz.e|fraze
|
||||
frazione|fraz.e|fraze|frz
|
||||
frazioni|fraz.i|frazi
|
||||
galleria|gall.a|galla
|
||||
galleria|gall.a|galla|gal
|
||||
gallerie|gall.e|galle
|
||||
genna|g.na|gna
|
||||
genne|g.ne|gne
|
||||
@@ -32,26 +32,35 @@ piazzale|p.le|ple
|
||||
piazzetta|p.ta|pta
|
||||
ponte|p.te|pte
|
||||
porta|p.ta|pta
|
||||
porto|pto
|
||||
provinciale
|
||||
regione|reg
|
||||
salizada|s.da|sda
|
||||
stazione|staz
|
||||
strada|s|str.a|stra|str
|
||||
strada comunale|sc|s.c.|s.c|s c
|
||||
strada provinciale|sp|s.p.|s.p|s p|str provinciale|s provinciale|str.prov|str prov|s.prov|s prov
|
||||
strada comunale|sc|s.c.|s.c|s c|stc
|
||||
strada provinciale|sp|s.p.|s.p|s p|str provinciale|s provinciale|str.prov|str prov|s.prov|s prov|stp
|
||||
strada regionale|sr|s.r.|s.r|s r|str.reg|str reg|str r|str.r|str reginale|s reginale
|
||||
strada statale|ss|s.s.|s.s|s s|str.s|str s|str.st|str st|str statale|s statale
|
||||
strada vicinale|sv|s.v.|s.v|s v|str.vic|str vic|str vicinale|s vicinale
|
||||
strada statale|ss|s.s.|s.s|s s|str.s|str s|str.st|str st|str statale|s statale|sts
|
||||
strada vicinale|sv|s.v.|s.v|s v|str.vic|str vic|str vicinale|s vicinale|svc
|
||||
stretta|str.ta|strta
|
||||
strette|str.te|strte
|
||||
stretti|str.ti|strti
|
||||
stretto|str.to|strto
|
||||
superstrada
|
||||
tangenziale
|
||||
traversa
|
||||
traversa|tra
|
||||
traversa nuova|tnu
|
||||
traversa privata|tpr
|
||||
traversa strada statale|tra s.s.
|
||||
traversa vicinale|tvc
|
||||
via|v
|
||||
via comunale|vc|v.c.|v.c|v c
|
||||
via provincale|vp|v.p.|v.p|v p|v provinciale
|
||||
via statale|vs|v.s.|v.s|v s
|
||||
via vicinale|vvc
|
||||
viale|v.le|vle|vl
|
||||
vicolo|v.lo|vlo
|
||||
vico
|
||||
vico|vic
|
||||
vocabolo|voc
|
||||
zona industriale|zin
|
||||
|
||||
@@ -10,4 +10,4 @@ giardino|g.no|gno
|
||||
numero|nº|no|n.º|n.o|n|#
|
||||
officina|off.a|off
|
||||
palazzo|pal.zo|palzo
|
||||
piano|pº|p.º|p.o|p.nº|p.no|pnº|pno
|
||||
piano|pº|p.º|p.o|p.nº|p.no|pnº|pno
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
|
||||
#define ADDRESS_DICTIONARY_SIGNATURE 0xBABABABA
|
||||
|
||||
#define ADDRESS_DICTIONARY_SETUP_ERROR "address_dictionary module not setup, call libpostal_setup() or address_dictionary_module_setup()\n"
|
||||
|
||||
address_dictionary_t *address_dict = NULL;
|
||||
|
||||
address_dictionary_t *get_address_dictionary(void) {
|
||||
@@ -12,28 +14,47 @@ address_dictionary_t *get_address_dictionary(void) {
|
||||
}
|
||||
|
||||
address_expansion_array *address_dictionary_get_expansions(char *key) {
|
||||
if (address_dict == NULL || address_dict->expansions == NULL) return NULL;
|
||||
if (address_dict == NULL || address_dict->expansions == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
khiter_t k = kh_get(str_expansions, address_dict->expansions, key);
|
||||
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
||||
}
|
||||
|
||||
int32_t address_dictionary_next_canonical_index(void) {
|
||||
if (address_dict == NULL || address_dict->canonical == NULL) return -1;
|
||||
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return -1;
|
||||
}
|
||||
return (int32_t)cstring_array_num_strings(address_dict->canonical);
|
||||
}
|
||||
|
||||
bool address_dictionary_add_canonical(char *canonical) {
|
||||
if (address_dict == NULL || address_dict->canonical == NULL) return false;
|
||||
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return false;
|
||||
}
|
||||
cstring_array_add_string(address_dict->canonical, canonical);
|
||||
return true;
|
||||
}
|
||||
|
||||
char *address_dictionary_get_canonical(uint32_t index) {
|
||||
if (address_dict == NULL || address_dict->canonical == NULL || index > cstring_array_num_strings(address_dict->canonical)) return NULL;
|
||||
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return NULL;
|
||||
} else if (index > cstring_array_num_strings(address_dict->canonical)) {
|
||||
return NULL;
|
||||
}
|
||||
return cstring_array_get_string(address_dict->canonical, index);
|
||||
}
|
||||
|
||||
bool address_dictionary_add_expansion(char *name, char *language, address_expansion_t expansion) {
|
||||
if (address_dict == NULL || address_dict->expansions == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (name == NULL) return false;
|
||||
|
||||
int ret;
|
||||
@@ -162,6 +183,10 @@ static trie_prefix_result_t get_language_prefix(char *lang) {
|
||||
|
||||
bool search_address_dictionaries_with_phrases(char *str, char *lang, phrase_array **phrases) {
|
||||
if (str == NULL) return false;
|
||||
if (address_dict == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||
|
||||
@@ -185,6 +210,10 @@ phrase_array *search_address_dictionaries(char *str, char *lang) {
|
||||
|
||||
bool search_address_dictionaries_tokens_with_phrases(char *str, token_array *tokens, char *lang, phrase_array **phrases) {
|
||||
if (str == NULL) return false;
|
||||
if (address_dict == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||
|
||||
@@ -208,6 +237,10 @@ phrase_array *search_address_dictionaries_tokens(char *str, token_array *tokens,
|
||||
|
||||
phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang) {
|
||||
if (str == NULL) return NULL_PHRASE;
|
||||
if (address_dict == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return NULL_PHRASE;
|
||||
}
|
||||
|
||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||
|
||||
@@ -221,6 +254,10 @@ phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang) {
|
||||
|
||||
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) {
|
||||
if (str == NULL) return NULL_PHRASE;
|
||||
if (address_dict == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
return NULL_PHRASE;
|
||||
}
|
||||
|
||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -803,13 +803,18 @@ address_parser_response_t *address_parser_response_new(void) {
|
||||
address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context) {
|
||||
if (address == NULL || context == NULL) return NULL;
|
||||
|
||||
address_parser_t *parser = get_address_parser();
|
||||
if (parser == NULL) {
|
||||
log_error("parser is not setup, call libpostal_setup_address_parser()\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *normalized = address_parser_normalize_string(address);
|
||||
bool is_normalized = normalized != NULL;
|
||||
if (!is_normalized) {
|
||||
normalized = address;
|
||||
}
|
||||
|
||||
address_parser_t *parser = get_address_parser();
|
||||
averaged_perceptron_t *model = parser->model;
|
||||
|
||||
token_array *tokens = tokenize(normalized);
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
#define LANGUAGE_CLASSIFIER_SIGNATURE 0xCCCCCCCC
|
||||
|
||||
#define LANGUAGE_CLASSIFIER_SETUP_ERROR "language_classifier not loaded, run libpostal_setup_language_classifier()\n"
|
||||
|
||||
#define MIN_PROB (0.05 - DBL_EPSILON)
|
||||
|
||||
static language_classifier_t *language_classifier = NULL;
|
||||
@@ -58,7 +60,7 @@ language_classifier_response_t *classify_languages(char *address) {
|
||||
language_classifier_t *classifier = get_language_classifier();
|
||||
|
||||
if (classifier == NULL) {
|
||||
log_error("classifier NULL\n");
|
||||
log_error(LANGUAGE_CLASSIFIER_SETUP_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -158,8 +158,6 @@ string_tree_t *normalize_string_languages(char *str, uint64_t options, size_t nu
|
||||
ptr += script_len;
|
||||
}
|
||||
|
||||
char_array *normalized = char_array_new_size(len);
|
||||
|
||||
add_latin_alternatives(tree, str, len, options);
|
||||
|
||||
size_t non_latin_scripts = kh_size(scripts);
|
||||
|
||||
12
src/numex.c
12
src/numex.c
@@ -3,8 +3,12 @@
|
||||
#include "numex.h"
|
||||
#include "file_utils.h"
|
||||
|
||||
#include "log/log.h"
|
||||
|
||||
#define NUMEX_TABLE_SIGNATURE 0xBBBBBBBB
|
||||
|
||||
#define NUMEX_SETUP_ERROR "numex module not setup, call libpostal_setup() or numex_module_setup()\n"
|
||||
|
||||
#define SEPARATOR_TOKENS "-"
|
||||
|
||||
#define FLOOR_LOG_BASE(num, base) floor((log((float)num) / log((float)base)) + FLT_EPSILON)
|
||||
@@ -118,6 +122,7 @@ void numex_language_destroy(numex_language_t *self) {
|
||||
|
||||
bool numex_table_add_language(numex_language_t *language) {
|
||||
if (numex_table == NULL) {
|
||||
log_error(NUMEX_SETUP_ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -130,6 +135,7 @@ bool numex_table_add_language(numex_language_t *language) {
|
||||
|
||||
numex_language_t *get_numex_language(char *name) {
|
||||
if (numex_table == NULL) {
|
||||
log_error(NUMEX_SETUP_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -616,7 +622,10 @@ static inline numex_rule_t get_numex_rule(size_t i) {
|
||||
}
|
||||
|
||||
numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
||||
if (numex_table == NULL) return NULL;
|
||||
if (numex_table == NULL) {
|
||||
log_error(NUMEX_SETUP_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
trie_t *trie = numex_table->trie;
|
||||
if (trie == NULL) return NULL;
|
||||
@@ -897,6 +906,7 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
||||
|
||||
char *get_ordinal_suffix(char *numeric_string, char *lang, numex_result_t result) {
|
||||
if (numex_table == NULL) {
|
||||
log_error(NUMEX_SETUP_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
#include "transliterate.h"
|
||||
#include "file_utils.h"
|
||||
|
||||
#include "log/log.h"
|
||||
|
||||
#define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA
|
||||
|
||||
#define NFD "NFD"
|
||||
@@ -664,7 +666,14 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
|
||||
}
|
||||
|
||||
char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
if (trans_name == NULL || str == NULL || trans_table == NULL) return NULL;
|
||||
if (trans_name == NULL || str == NULL) return NULL;
|
||||
|
||||
transliteration_table_t *trans_table = get_transliteration_table();
|
||||
|
||||
if (trans_table == NULL) {
|
||||
log_error("transliteration table is NULL. Call libpostal_setup() or transliteration_module_setup()\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
trie_t *trie = trans_table->trie;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user