[osm] Shortening state names obtained from reverse geocoding for relevant countries
This commit is contained in:
@@ -58,7 +58,7 @@ from geodata.coordinates.conversion import *
|
|||||||
from geodata.countries.country_names import *
|
from geodata.countries.country_names import *
|
||||||
from geodata.language_id.disambiguation import *
|
from geodata.language_id.disambiguation import *
|
||||||
from geodata.language_id.sample import sample_random_language
|
from geodata.language_id.sample import sample_random_language
|
||||||
from geodata.states.state_abbreviations import STATE_ABBREVIATIONS
|
from geodata.states.state_abbreviations import STATE_ABBREVIATIONS, STATE_EXPANSIONS
|
||||||
from geodata.language_id.polygon_lookup import country_and_languages
|
from geodata.language_id.polygon_lookup import country_and_languages
|
||||||
from geodata.i18n.languages import *
|
from geodata.i18n.languages import *
|
||||||
from geodata.address_formatting.formatter import AddressFormatter
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
@@ -554,7 +554,10 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
|
|||||||
|
|
||||||
for component, vals in poly_components.iteritems():
|
for component, vals in poly_components.iteritems():
|
||||||
if component not in address_components:
|
if component not in address_components:
|
||||||
address_components[component] = u', '.join(vals)
|
value = u', '.join(vals)
|
||||||
|
if component == AddressFormatter.STATE and random.random() < 0.7:
|
||||||
|
value = STATE_EXPANSIONS.get(address_country, {}).get(value, value)
|
||||||
|
address_components[component] = value
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Neighborhoods
|
Neighborhoods
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
STATE_ABBREVIATIONS = {
|
STATE_ABBREVIATIONS = {
|
||||||
'US': {
|
'US': {
|
||||||
@@ -83,3 +84,10 @@ STATE_ABBREVIATIONS = {
|
|||||||
'WA': {'en': 'Western Australia'},
|
'WA': {'en': 'Western Australia'},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STATE_EXPANSIONS = defaultdict(dict)
|
||||||
|
|
||||||
|
for country, values in STATE_ABBREVIATIONS.iteritems():
|
||||||
|
for abbrev, expansions in values.iteritems():
|
||||||
|
for lang, expansion in expansions.iteritems():
|
||||||
|
STATE_EXPANSIONS[country][expansion] = abbrev
|
||||||
|
|||||||
Reference in New Issue
Block a user