Merge branch 'master' into parser-data
This commit is contained in:
@@ -207,6 +207,7 @@ Libpostal is designed to be used by higher-level languages. If you don't see yo
|
||||
**Unofficial language bindings**
|
||||
|
||||
- LuaJIT: [lua-resty-postal](https://github.com/bungle/lua-resty-postal)
|
||||
- R: [poster](https://github.com/ironholds/poster)
|
||||
|
||||
**Database extensions**
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
senza numero civico|snc|s.n.c.|s n c
|
||||
senza numero|sn
|
||||
|
||||
@@ -2,6 +2,7 @@ accesso|acc.so|accso
|
||||
alto|alt
|
||||
alzaia|alz
|
||||
androna|and
|
||||
angolo|ang.|ang
|
||||
arco|arc
|
||||
autostrada|aut.da|autda|aut
|
||||
audostradale|aut.sle|autsle|aut sle|aut.ale|autale
|
||||
|
||||
@@ -22,6 +22,18 @@ address_expansion_array *address_dictionary_get_expansions(char *key) {
|
||||
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
||||
}
|
||||
|
||||
inline bool address_expansion_in_dictionary(address_expansion_t expansion, uint16_t dictionary_id) {
|
||||
for (uint32_t i = 0; i < expansion.num_dictionaries; i++) {
|
||||
if (expansion.dictionary_ids[i] == dictionary_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int32_t address_dictionary_next_canonical_index(void) {
|
||||
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||
|
||||
@@ -67,6 +67,7 @@ phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang);
|
||||
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang);
|
||||
|
||||
address_expansion_array *address_dictionary_get_expansions(char *key);
|
||||
bool address_expansion_in_dictionary(address_expansion_t expansion, uint16_t dictionary_id);
|
||||
char *address_dictionary_get_canonical(uint32_t index);
|
||||
int32_t address_dictionary_next_canonical_index(void);
|
||||
bool address_dictionary_add_canonical(char *canonical);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -303,7 +303,8 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
||||
|
||||
for (int j = 0; j < expansions->n; j++) {
|
||||
address_expansion_t expansion = expansions->a[j];
|
||||
if ((expansion.address_components & options.address_components) == 0) {
|
||||
|
||||
if ((expansion.address_components & options.address_components) == 0 && !address_expansion_in_dictionary(expansion, DICTIONARY_AMBIGUOUS_EXPANSION)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
if [ "$#" -lt 3 ]; then
|
||||
echo "Usage: ./libpostal_data [upload|download] [base|geodb] data_dir"
|
||||
exit 1
|
||||
@@ -29,6 +31,8 @@ GEODB_MODULE_DIR=geodb
|
||||
PARSER_MODULE_DIR=address_parser
|
||||
LANGUAGE_CLASSIFIER_MODULE_DIR=language_classifier
|
||||
|
||||
export LC_ALL=C
|
||||
|
||||
EPOCH_DATE="Jan 1 00:00:00 1970"
|
||||
|
||||
MB=$((1024*1024))
|
||||
|
||||
@@ -69,6 +69,7 @@ TEST test_expansions(void) {
|
||||
normalize_options_t options = get_libpostal_default_options();
|
||||
|
||||
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th"));
|
||||
|
||||
Reference in New Issue
Block a user