[osm/formatting] Adding in more ISO alpha-3 codes for countries in the training data

This commit is contained in:
Al
2015-11-28 14:08:07 -05:00
parent d3040036ec
commit 15d9e00121

View File

@@ -393,6 +393,8 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
remove_keys = OSM_IGNORE_KEYS
alpha3_codes = {c.alpha2: c.alpha3 for c in pycountry.countries}
for node_id, value, deps in parse_osm(infile):
try:
latitude, longitude = latlon_to_decimal(value['lat'], value['lon'])
@@ -471,7 +473,12 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
lang_country = language_country_names.get(non_local_language, {}).get(address_country.upper())
if lang_country:
address_components[AddressFormatter.COUNTRY] = lang_country
# 3. Implicit: the rest of the time keep the country code
# 3. 10% of the time: use the country's alpha-3 ISO code
elif address_country and r < 0.8:
iso_code_alpha3 = alpha3_codes.get(address_country)
if iso_code_alpha3:
address_components[AddressFormatter.COUNTRY] = iso_code_alpha3
# 4. Implicit: the rest of the time keep the alpha-2 country code
'''
States