[osm] Shortening state names obtained from reverse geocoding for relevant countries

This commit is contained in:
Al
2015-11-22 22:09:31 -05:00
parent 8b035814c7
commit 2695b5dd26
2 changed files with 13 additions and 2 deletions

View File

@@ -58,7 +58,7 @@ from geodata.coordinates.conversion import *
from geodata.countries.country_names import *
from geodata.language_id.disambiguation import *
from geodata.language_id.sample import sample_random_language
from geodata.states.state_abbreviations import STATE_ABBREVIATIONS
from geodata.states.state_abbreviations import STATE_ABBREVIATIONS, STATE_EXPANSIONS
from geodata.language_id.polygon_lookup import country_and_languages
from geodata.i18n.languages import *
from geodata.address_formatting.formatter import AddressFormatter
@@ -554,7 +554,10 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
for component, vals in poly_components.iteritems():
if component not in address_components:
address_components[component] = u', '.join(vals)
value = u', '.join(vals)
if component == AddressFormatter.STATE and random.random() < 0.7:
value = STATE_EXPANSIONS.get(address_country, {}).get(value, value)
address_components[component] = value
'''
Neighborhoods