Merge branch 'master' into parser-data
This commit is contained in:
@@ -207,6 +207,7 @@ Libpostal is designed to be used by higher-level languages. If you don't see yo
|
|||||||
**Unofficial language bindings**
|
**Unofficial language bindings**
|
||||||
|
|
||||||
- LuaJIT: [lua-resty-postal](https://github.com/bungle/lua-resty-postal)
|
- LuaJIT: [lua-resty-postal](https://github.com/bungle/lua-resty-postal)
|
||||||
|
- R: [poster](https://github.com/ironholds/poster)
|
||||||
|
|
||||||
**Database extensions**
|
**Database extensions**
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
senza numero civico|snc|s.n.c.|s n c
|
senza numero civico|snc|s.n.c.|s n c
|
||||||
|
senza numero|sn
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ accesso|acc.so|accso
|
|||||||
alto|alt
|
alto|alt
|
||||||
alzaia|alz
|
alzaia|alz
|
||||||
androna|and
|
androna|and
|
||||||
|
angolo|ang.|ang
|
||||||
arco|arc
|
arco|arc
|
||||||
autostrada|aut.da|autda|aut
|
autostrada|aut.da|autda|aut
|
||||||
audostradale|aut.sle|autsle|aut sle|aut.ale|autale
|
audostradale|aut.sle|autsle|aut sle|aut.ale|autale
|
||||||
|
|||||||
@@ -22,6 +22,18 @@ address_expansion_array *address_dictionary_get_expansions(char *key) {
|
|||||||
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool address_expansion_in_dictionary(address_expansion_t expansion, uint16_t dictionary_id) {
|
||||||
|
for (uint32_t i = 0; i < expansion.num_dictionaries; i++) {
|
||||||
|
if (expansion.dictionary_ids[i] == dictionary_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int32_t address_dictionary_next_canonical_index(void) {
|
int32_t address_dictionary_next_canonical_index(void) {
|
||||||
if (address_dict == NULL || address_dict->canonical == NULL) {
|
if (address_dict == NULL || address_dict->canonical == NULL) {
|
||||||
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang);
|
|||||||
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang);
|
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang);
|
||||||
|
|
||||||
address_expansion_array *address_dictionary_get_expansions(char *key);
|
address_expansion_array *address_dictionary_get_expansions(char *key);
|
||||||
|
bool address_expansion_in_dictionary(address_expansion_t expansion, uint16_t dictionary_id);
|
||||||
char *address_dictionary_get_canonical(uint32_t index);
|
char *address_dictionary_get_canonical(uint32_t index);
|
||||||
int32_t address_dictionary_next_canonical_index(void);
|
int32_t address_dictionary_next_canonical_index(void);
|
||||||
bool address_dictionary_add_canonical(char *canonical);
|
bool address_dictionary_add_canonical(char *canonical);
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -303,7 +303,8 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
|||||||
|
|
||||||
for (int j = 0; j < expansions->n; j++) {
|
for (int j = 0; j < expansions->n; j++) {
|
||||||
address_expansion_t expansion = expansions->a[j];
|
address_expansion_t expansion = expansions->a[j];
|
||||||
if ((expansion.address_components & options.address_components) == 0) {
|
|
||||||
|
if ((expansion.address_components & options.address_components) == 0 && !address_expansion_in_dictionary(expansion, DICTIONARY_AMBIGUOUS_EXPANSION)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
if [ "$#" -lt 3 ]; then
|
if [ "$#" -lt 3 ]; then
|
||||||
echo "Usage: ./libpostal_data [upload|download] [base|geodb] data_dir"
|
echo "Usage: ./libpostal_data [upload|download] [base|geodb] data_dir"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -29,6 +31,8 @@ GEODB_MODULE_DIR=geodb
|
|||||||
PARSER_MODULE_DIR=address_parser
|
PARSER_MODULE_DIR=address_parser
|
||||||
LANGUAGE_CLASSIFIER_MODULE_DIR=language_classifier
|
LANGUAGE_CLASSIFIER_MODULE_DIR=language_classifier
|
||||||
|
|
||||||
|
export LC_ALL=C
|
||||||
|
|
||||||
EPOCH_DATE="Jan 1 00:00:00 1970"
|
EPOCH_DATE="Jan 1 00:00:00 1970"
|
||||||
|
|
||||||
MB=$((1024*1024))
|
MB=$((1024*1024))
|
||||||
|
|||||||
@@ -69,6 +69,7 @@ TEST test_expansions(void) {
|
|||||||
normalize_options_t options = get_libpostal_default_options();
|
normalize_options_t options = get_libpostal_default_options();
|
||||||
|
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
|
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
|
||||||
|
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
|
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
|
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th"));
|
CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th"));
|
||||||
|
|||||||
Reference in New Issue
Block a user