Merge branch 'master' into parser-data
This commit is contained in:
@@ -1,4 +1,7 @@
|
|||||||
language: c
|
language: c
|
||||||
|
brances:
|
||||||
|
only:
|
||||||
|
- master
|
||||||
env:
|
env:
|
||||||
global:
|
global:
|
||||||
- secure: "bHrAu46oecEj3gjamT+XWXtf2J0ZJCFa8tUdgM4evscaJiiwv1TtsGXyhIj/ai7DlRIPVJUtBUy6uoGGjr6GT43zTrzSxYAOMdVXZYsnTDcdL1/0dbwcIK6/u0EI377s1buGIxG1fHveWKXuXwJWDAw4KS+5HU88a42+zMbhKe4="
|
- secure: "bHrAu46oecEj3gjamT+XWXtf2J0ZJCFa8tUdgM4evscaJiiwv1TtsGXyhIj/ai7DlRIPVJUtBUy6uoGGjr6GT43zTrzSxYAOMdVXZYsnTDcdL1/0dbwcIK6/u0EI377s1buGIxG1fHveWKXuXwJWDAw4KS+5HU88a42+zMbhKe4="
|
||||||
|
|||||||
1
resources/dictionaries/it/no_number.txt
Normal file
1
resources/dictionaries/it/no_number.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
senza numero civico|snc|s.n.c.|s n c
|
||||||
@@ -15,12 +15,12 @@ corso|c.so|cso
|
|||||||
corte|c.te|cte
|
corte|c.te|cte
|
||||||
cortile|c.ile|ctile
|
cortile|c.ile|ctile
|
||||||
ferrata|f.rata|frata
|
ferrata|f.rata|frata
|
||||||
fondamenta|f.ta|fta
|
fondamenta|f.ta|fta|fon
|
||||||
forca|f.ca|fca
|
forca|f.ca|fca
|
||||||
forcella|forc.la|forcla
|
forcella|forc.la|forcla
|
||||||
frazione|fraz.e|fraze
|
frazione|fraz.e|fraze|frz
|
||||||
frazioni|fraz.i|frazi
|
frazioni|fraz.i|frazi
|
||||||
galleria|gall.a|galla
|
galleria|gall.a|galla|gal
|
||||||
gallerie|gall.e|galle
|
gallerie|gall.e|galle
|
||||||
genna|g.na|gna
|
genna|g.na|gna
|
||||||
genne|g.ne|gne
|
genne|g.ne|gne
|
||||||
@@ -32,26 +32,35 @@ piazzale|p.le|ple
|
|||||||
piazzetta|p.ta|pta
|
piazzetta|p.ta|pta
|
||||||
ponte|p.te|pte
|
ponte|p.te|pte
|
||||||
porta|p.ta|pta
|
porta|p.ta|pta
|
||||||
|
porto|pto
|
||||||
provinciale
|
provinciale
|
||||||
|
regione|reg
|
||||||
salizada|s.da|sda
|
salizada|s.da|sda
|
||||||
stazione|staz
|
stazione|staz
|
||||||
strada|s|str.a|stra|str
|
strada|s|str.a|stra|str
|
||||||
strada comunale|sc|s.c.|s.c|s c
|
strada comunale|sc|s.c.|s.c|s c|stc
|
||||||
strada provinciale|sp|s.p.|s.p|s p|str provinciale|s provinciale|str.prov|str prov|s.prov|s prov
|
strada provinciale|sp|s.p.|s.p|s p|str provinciale|s provinciale|str.prov|str prov|s.prov|s prov|stp
|
||||||
strada regionale|sr|s.r.|s.r|s r|str.reg|str reg|str r|str.r|str reginale|s reginale
|
strada regionale|sr|s.r.|s.r|s r|str.reg|str reg|str r|str.r|str reginale|s reginale
|
||||||
strada statale|ss|s.s.|s.s|s s|str.s|str s|str.st|str st|str statale|s statale
|
strada statale|ss|s.s.|s.s|s s|str.s|str s|str.st|str st|str statale|s statale|sts
|
||||||
strada vicinale|sv|s.v.|s.v|s v|str.vic|str vic|str vicinale|s vicinale
|
strada vicinale|sv|s.v.|s.v|s v|str.vic|str vic|str vicinale|s vicinale|svc
|
||||||
stretta|str.ta|strta
|
stretta|str.ta|strta
|
||||||
strette|str.te|strte
|
strette|str.te|strte
|
||||||
stretti|str.ti|strti
|
stretti|str.ti|strti
|
||||||
stretto|str.to|strto
|
stretto|str.to|strto
|
||||||
superstrada
|
superstrada
|
||||||
tangenziale
|
tangenziale
|
||||||
traversa
|
traversa|tra
|
||||||
|
traversa nuova|tnu
|
||||||
|
traversa privata|tpr
|
||||||
|
traversa strada statale|tra s.s.
|
||||||
|
traversa vicinale|tvc
|
||||||
via|v
|
via|v
|
||||||
via comunale|vc|v.c.|v.c|v c
|
via comunale|vc|v.c.|v.c|v c
|
||||||
via provincale|vp|v.p.|v.p|v p|v provinciale
|
via provincale|vp|v.p.|v.p|v p|v provinciale
|
||||||
via statale|vs|v.s.|v.s|v s
|
via statale|vs|v.s.|v.s|v s
|
||||||
|
via vicinale|vvc
|
||||||
viale|v.le|vle|vl
|
viale|v.le|vle|vl
|
||||||
vicolo|v.lo|vlo
|
vicolo|v.lo|vlo
|
||||||
vico
|
vico|vic
|
||||||
|
vocabolo|voc
|
||||||
|
zona industriale|zin
|
||||||
|
|||||||
@@ -10,4 +10,4 @@ giardino|g.no|gno
|
|||||||
numero|nº|no|n.º|n.o|n|#
|
numero|nº|no|n.º|n.o|n|#
|
||||||
officina|off.a|off
|
officina|off.a|off
|
||||||
palazzo|pal.zo|palzo
|
palazzo|pal.zo|palzo
|
||||||
piano|pº|p.º|p.o|p.nº|p.no|pnº|pno
|
piano|pº|p.º|p.o|p.nº|p.no|pnº|pno
|
||||||
|
|||||||
@@ -5,6 +5,8 @@
|
|||||||
|
|
||||||
#define ADDRESS_DICTIONARY_SIGNATURE 0xBABABABA
|
#define ADDRESS_DICTIONARY_SIGNATURE 0xBABABABA
|
||||||
|
|
||||||
|
#define ADDRESS_DICTIONARY_SETUP_ERROR "address_dictionary module not setup, call libpostal_setup() or address_dictionary_module_setup()\n"
|
||||||
|
|
||||||
address_dictionary_t *address_dict = NULL;
|
address_dictionary_t *address_dict = NULL;
|
||||||
|
|
||||||
address_dictionary_t *get_address_dictionary(void) {
|
address_dictionary_t *get_address_dictionary(void) {
|
||||||
@@ -12,28 +14,47 @@ address_dictionary_t *get_address_dictionary(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
address_expansion_array *address_dictionary_get_expansions(char *key) {
|
address_expansion_array *address_dictionary_get_expansions(char *key) {
|
||||||
if (address_dict == NULL || address_dict->expansions == NULL) return NULL;
|
if (address_dict == NULL || address_dict->expansions == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
khiter_t k = kh_get(str_expansions, address_dict->expansions, key);
|
khiter_t k = kh_get(str_expansions, address_dict->expansions, key);
|
||||||
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t address_dictionary_next_canonical_index(void) {
|
int32_t address_dictionary_next_canonical_index(void) {
|
||||||
if (address_dict == NULL || address_dict->canonical == NULL) return -1;
|
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return (int32_t)cstring_array_num_strings(address_dict->canonical);
|
return (int32_t)cstring_array_num_strings(address_dict->canonical);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool address_dictionary_add_canonical(char *canonical) {
|
bool address_dictionary_add_canonical(char *canonical) {
|
||||||
if (address_dict == NULL || address_dict->canonical == NULL) return false;
|
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
cstring_array_add_string(address_dict->canonical, canonical);
|
cstring_array_add_string(address_dict->canonical, canonical);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *address_dictionary_get_canonical(uint32_t index) {
|
char *address_dictionary_get_canonical(uint32_t index) {
|
||||||
if (address_dict == NULL || address_dict->canonical == NULL || index > cstring_array_num_strings(address_dict->canonical)) return NULL;
|
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return NULL;
|
||||||
|
} else if (index > cstring_array_num_strings(address_dict->canonical)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
return cstring_array_get_string(address_dict->canonical, index);
|
return cstring_array_get_string(address_dict->canonical, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool address_dictionary_add_expansion(char *name, char *language, address_expansion_t expansion) {
|
bool address_dictionary_add_expansion(char *name, char *language, address_expansion_t expansion) {
|
||||||
|
if (address_dict == NULL || address_dict->expansions == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (name == NULL) return false;
|
if (name == NULL) return false;
|
||||||
|
|
||||||
int ret;
|
int ret;
|
||||||
@@ -162,6 +183,10 @@ static trie_prefix_result_t get_language_prefix(char *lang) {
|
|||||||
|
|
||||||
bool search_address_dictionaries_with_phrases(char *str, char *lang, phrase_array **phrases) {
|
bool search_address_dictionaries_with_phrases(char *str, char *lang, phrase_array **phrases) {
|
||||||
if (str == NULL) return false;
|
if (str == NULL) return false;
|
||||||
|
if (address_dict == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||||
|
|
||||||
@@ -185,6 +210,10 @@ phrase_array *search_address_dictionaries(char *str, char *lang) {
|
|||||||
|
|
||||||
bool search_address_dictionaries_tokens_with_phrases(char *str, token_array *tokens, char *lang, phrase_array **phrases) {
|
bool search_address_dictionaries_tokens_with_phrases(char *str, token_array *tokens, char *lang, phrase_array **phrases) {
|
||||||
if (str == NULL) return false;
|
if (str == NULL) return false;
|
||||||
|
if (address_dict == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||||
|
|
||||||
@@ -208,6 +237,10 @@ phrase_array *search_address_dictionaries_tokens(char *str, token_array *tokens,
|
|||||||
|
|
||||||
phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang) {
|
phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang) {
|
||||||
if (str == NULL) return NULL_PHRASE;
|
if (str == NULL) return NULL_PHRASE;
|
||||||
|
if (address_dict == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return NULL_PHRASE;
|
||||||
|
}
|
||||||
|
|
||||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||||
|
|
||||||
@@ -221,6 +254,10 @@ phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang) {
|
|||||||
|
|
||||||
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) {
|
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) {
|
||||||
if (str == NULL) return NULL_PHRASE;
|
if (str == NULL) return NULL_PHRASE;
|
||||||
|
if (address_dict == NULL) {
|
||||||
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
return NULL_PHRASE;
|
||||||
|
}
|
||||||
|
|
||||||
trie_prefix_result_t prefix = get_language_prefix(lang);
|
trie_prefix_result_t prefix = get_language_prefix(lang);
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -803,13 +803,18 @@ address_parser_response_t *address_parser_response_new(void) {
|
|||||||
address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context) {
|
address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context) {
|
||||||
if (address == NULL || context == NULL) return NULL;
|
if (address == NULL || context == NULL) return NULL;
|
||||||
|
|
||||||
|
address_parser_t *parser = get_address_parser();
|
||||||
|
if (parser == NULL) {
|
||||||
|
log_error("parser is not setup, call libpostal_setup_address_parser()\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
char *normalized = address_parser_normalize_string(address);
|
char *normalized = address_parser_normalize_string(address);
|
||||||
bool is_normalized = normalized != NULL;
|
bool is_normalized = normalized != NULL;
|
||||||
if (!is_normalized) {
|
if (!is_normalized) {
|
||||||
normalized = address;
|
normalized = address;
|
||||||
}
|
}
|
||||||
|
|
||||||
address_parser_t *parser = get_address_parser();
|
|
||||||
averaged_perceptron_t *model = parser->model;
|
averaged_perceptron_t *model = parser->model;
|
||||||
|
|
||||||
token_array *tokens = tokenize(normalized);
|
token_array *tokens = tokenize(normalized);
|
||||||
|
|||||||
@@ -10,6 +10,8 @@
|
|||||||
|
|
||||||
#define LANGUAGE_CLASSIFIER_SIGNATURE 0xCCCCCCCC
|
#define LANGUAGE_CLASSIFIER_SIGNATURE 0xCCCCCCCC
|
||||||
|
|
||||||
|
#define LANGUAGE_CLASSIFIER_SETUP_ERROR "language_classifier not loaded, run libpostal_setup_language_classifier()\n"
|
||||||
|
|
||||||
#define MIN_PROB (0.05 - DBL_EPSILON)
|
#define MIN_PROB (0.05 - DBL_EPSILON)
|
||||||
|
|
||||||
static language_classifier_t *language_classifier = NULL;
|
static language_classifier_t *language_classifier = NULL;
|
||||||
@@ -58,7 +60,7 @@ language_classifier_response_t *classify_languages(char *address) {
|
|||||||
language_classifier_t *classifier = get_language_classifier();
|
language_classifier_t *classifier = get_language_classifier();
|
||||||
|
|
||||||
if (classifier == NULL) {
|
if (classifier == NULL) {
|
||||||
log_error("classifier NULL\n");
|
log_error(LANGUAGE_CLASSIFIER_SETUP_ERROR);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -158,8 +158,6 @@ string_tree_t *normalize_string_languages(char *str, uint64_t options, size_t nu
|
|||||||
ptr += script_len;
|
ptr += script_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
char_array *normalized = char_array_new_size(len);
|
|
||||||
|
|
||||||
add_latin_alternatives(tree, str, len, options);
|
add_latin_alternatives(tree, str, len, options);
|
||||||
|
|
||||||
size_t non_latin_scripts = kh_size(scripts);
|
size_t non_latin_scripts = kh_size(scripts);
|
||||||
|
|||||||
12
src/numex.c
12
src/numex.c
@@ -3,8 +3,12 @@
|
|||||||
#include "numex.h"
|
#include "numex.h"
|
||||||
#include "file_utils.h"
|
#include "file_utils.h"
|
||||||
|
|
||||||
|
#include "log/log.h"
|
||||||
|
|
||||||
#define NUMEX_TABLE_SIGNATURE 0xBBBBBBBB
|
#define NUMEX_TABLE_SIGNATURE 0xBBBBBBBB
|
||||||
|
|
||||||
|
#define NUMEX_SETUP_ERROR "numex module not setup, call libpostal_setup() or numex_module_setup()\n"
|
||||||
|
|
||||||
#define SEPARATOR_TOKENS "-"
|
#define SEPARATOR_TOKENS "-"
|
||||||
|
|
||||||
#define FLOOR_LOG_BASE(num, base) floor((log((float)num) / log((float)base)) + FLT_EPSILON)
|
#define FLOOR_LOG_BASE(num, base) floor((log((float)num) / log((float)base)) + FLT_EPSILON)
|
||||||
@@ -118,6 +122,7 @@ void numex_language_destroy(numex_language_t *self) {
|
|||||||
|
|
||||||
bool numex_table_add_language(numex_language_t *language) {
|
bool numex_table_add_language(numex_language_t *language) {
|
||||||
if (numex_table == NULL) {
|
if (numex_table == NULL) {
|
||||||
|
log_error(NUMEX_SETUP_ERROR);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,6 +135,7 @@ bool numex_table_add_language(numex_language_t *language) {
|
|||||||
|
|
||||||
numex_language_t *get_numex_language(char *name) {
|
numex_language_t *get_numex_language(char *name) {
|
||||||
if (numex_table == NULL) {
|
if (numex_table == NULL) {
|
||||||
|
log_error(NUMEX_SETUP_ERROR);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -616,7 +622,10 @@ static inline numex_rule_t get_numex_rule(size_t i) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
||||||
if (numex_table == NULL) return NULL;
|
if (numex_table == NULL) {
|
||||||
|
log_error(NUMEX_SETUP_ERROR);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
trie_t *trie = numex_table->trie;
|
trie_t *trie = numex_table->trie;
|
||||||
if (trie == NULL) return NULL;
|
if (trie == NULL) return NULL;
|
||||||
@@ -897,6 +906,7 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
|||||||
|
|
||||||
char *get_ordinal_suffix(char *numeric_string, char *lang, numex_result_t result) {
|
char *get_ordinal_suffix(char *numeric_string, char *lang, numex_result_t result) {
|
||||||
if (numex_table == NULL) {
|
if (numex_table == NULL) {
|
||||||
|
log_error(NUMEX_SETUP_ERROR);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
#include "transliterate.h"
|
#include "transliterate.h"
|
||||||
#include "file_utils.h"
|
#include "file_utils.h"
|
||||||
|
|
||||||
|
#include "log/log.h"
|
||||||
|
|
||||||
#define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA
|
#define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA
|
||||||
|
|
||||||
#define NFD "NFD"
|
#define NFD "NFD"
|
||||||
@@ -664,7 +666,14 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *transliterate(char *trans_name, char *str, size_t len) {
|
char *transliterate(char *trans_name, char *str, size_t len) {
|
||||||
if (trans_name == NULL || str == NULL || trans_table == NULL) return NULL;
|
if (trans_name == NULL || str == NULL) return NULL;
|
||||||
|
|
||||||
|
transliteration_table_t *trans_table = get_transliteration_table();
|
||||||
|
|
||||||
|
if (trans_table == NULL) {
|
||||||
|
log_error("transliteration table is NULL. Call libpostal_setup() or transliteration_module_setup()\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
trie_t *trie = trans_table->trie;
|
trie_t *trie = trans_table->trie;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user