[osm] Adjusting priors for country code expansion

This commit is contained in:
Al
2015-10-03 15:13:16 -04:00
parent 0b98a26426
commit 77be2fe433

View File

@@ -516,7 +516,7 @@ def build_address_format_training_data(language_rtree, infile, out_dir, tag_comp
''' '''
# 1. use the country name in the current language or the country's local language # 1. use the country name in the current language or the country's local language
if address_country and random.random() < 0.7: if address_country and random.random() < 0.8:
localized = None localized = None
if language and language not in (AMBIGUOUS_LANGUAGE, UNKNOWN_LANGUAGE): if language and language not in (AMBIGUOUS_LANGUAGE, UNKNOWN_LANGUAGE):
localized = language_country_names.get(language, {}).get(address_country.upper()) localized = language_country_names.get(language, {}).get(address_country.upper())
@@ -527,7 +527,7 @@ def build_address_format_training_data(language_rtree, infile, out_dir, tag_comp
if localized: if localized:
address_components[AddressFormatter.COUNTRY] = localized address_components[AddressFormatter.COUNTRY] = localized
# 2. country's name in a language samples from the distribution of languages on the Internet # 2. country's name in a language samples from the distribution of languages on the Internet
elif address_country and random.random() < 0.7: elif address_country and random.random() < 0.5:
lang = sample_random_language() lang = sample_random_language()
lang_country = language_country_names.get(lang, {}).get(address_country.upper()) lang_country = language_country_names.get(lang, {}).get(address_country.upper())
if lang_country: if lang_country: