Merge branch 'master' into parser-data

This commit is contained in:
Al
2016-05-11 12:21:00 -04:00
9 changed files with 2967 additions and 2938 deletions

View File

@@ -207,6 +207,7 @@ Libpostal is designed to be used by higher-level languages. If you don't see yo
**Unofficial language bindings**
- LuaJIT: [lua-resty-postal](https://github.com/bungle/lua-resty-postal)
- R: [poster](https://github.com/ironholds/poster)
**Database extensions**

View File

@@ -1 +1,2 @@
senza numero civico|snc|s.n.c.|s n c
senza numero|sn

View File

@@ -2,6 +2,7 @@ accesso|acc.so|accso
alto|alt
alzaia|alz
androna|and
angolo|ang.|ang
arco|arc
autostrada|aut.da|autda|aut
audostradale|aut.sle|autsle|aut sle|aut.ale|autale

View File

@@ -22,6 +22,18 @@ address_expansion_array *address_dictionary_get_expansions(char *key) {
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
}
inline bool address_expansion_in_dictionary(address_expansion_t expansion, uint16_t dictionary_id) {
for (uint32_t i = 0; i < expansion.num_dictionaries; i++) {
if (expansion.dictionary_ids[i] == dictionary_id) {
return true;
}
}
return false;
}
int32_t address_dictionary_next_canonical_index(void) {
if (address_dict == NULL || address_dict->canonical == NULL) {
log_error(ADDRESS_DICTIONARY_SETUP_ERROR);

View File

@@ -67,6 +67,7 @@ phrase_t search_address_dictionaries_prefix(char *str, size_t len, char *lang);
phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang);
address_expansion_array *address_dictionary_get_expansions(char *key);
bool address_expansion_in_dictionary(address_expansion_t expansion, uint16_t dictionary_id);
char *address_dictionary_get_canonical(uint32_t index);
int32_t address_dictionary_next_canonical_index(void);
bool address_dictionary_add_canonical(char *canonical);

File diff suppressed because it is too large Load Diff

View File

@@ -303,7 +303,8 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
for (int j = 0; j < expansions->n; j++) {
address_expansion_t expansion = expansions->a[j];
if ((expansion.address_components & options.address_components) == 0) {
if ((expansion.address_components & options.address_components) == 0 && !address_expansion_in_dictionary(expansion, DICTIONARY_AMBIGUOUS_EXPANSION)) {
continue;
}

View File

@@ -1,5 +1,7 @@
#!/usr/bin/env bash
set -e
if [ "$#" -lt 3 ]; then
echo "Usage: ./libpostal_data [upload|download] [base|geodb] data_dir"
exit 1
@@ -29,6 +31,8 @@ GEODB_MODULE_DIR=geodb
PARSER_MODULE_DIR=address_parser
LANGUAGE_CLASSIFIER_MODULE_DIR=language_classifier
export LC_ALL=C
EPOCH_DATE="Jan 1 00:00:00 1970"
MB=$((1024*1024))

View File

@@ -69,6 +69,7 @@ TEST test_expansions(void) {
normalize_options_t options = get_libpostal_default_options();
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th"));