[fix] ISO code and simple/international name checks should be on the polygons
This commit is contained in:
@@ -392,7 +392,7 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
|
|||||||
|
|
||||||
remove_keys = OSM_IGNORE_KEYS
|
remove_keys = OSM_IGNORE_KEYS
|
||||||
|
|
||||||
for key, value, deps in parse_osm(infile):
|
for node_id, value, deps in parse_osm(infile):
|
||||||
try:
|
try:
|
||||||
latitude, longitude = latlon_to_decimal(value['lat'], value['lon'])
|
latitude, longitude = latlon_to_decimal(value['lat'], value['lon'])
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -526,49 +526,50 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
|
|||||||
|
|
||||||
poly_components = defaultdict(list)
|
poly_components = defaultdict(list)
|
||||||
|
|
||||||
for component, values in osm_components.iteritems():
|
for component, components_values in osm_components.iteritems():
|
||||||
seen = set()
|
seen = set()
|
||||||
|
|
||||||
# Choose which name to use with given probabilities
|
# Choose which name to use with given probabilities
|
||||||
r = random.random()
|
r = random.random()
|
||||||
if iso_code_key in value and r < 0.3:
|
if r < 0.7:
|
||||||
if r < 0.1 and iso_code3_key in value:
|
|
||||||
key = iso_code3_key
|
|
||||||
else:
|
|
||||||
key = iso_code_key
|
|
||||||
elif language == 'en' and not non_local_language and r < 0.7:
|
|
||||||
# Particularly to address the US (prefer United States,
|
|
||||||
# not United States of America) but may capture variations
|
|
||||||
# in other English-speaking countries as well.
|
|
||||||
if simple_name_key in value:
|
|
||||||
key = raw_key = simple_name_key
|
|
||||||
elif international_name_key in value:
|
|
||||||
key = raw_key = international_name_key
|
|
||||||
else:
|
|
||||||
key = name_key
|
|
||||||
raw_key = raw_name_key
|
|
||||||
elif r < 0.1:
|
|
||||||
# 10% of the time use the short name
|
|
||||||
key = short_name_key
|
|
||||||
raw_key = raw_short_name_key
|
|
||||||
elif r < 0.2:
|
|
||||||
# 10% of the time use the official name
|
|
||||||
key = official_name_key
|
|
||||||
raw_key = raw_official_name_key
|
|
||||||
elif r < 0.3:
|
|
||||||
# 10% of the time use the official name
|
|
||||||
key = alt_name_key
|
|
||||||
raw_key = raw_alt_name_key
|
|
||||||
else:
|
|
||||||
# 70% of the time use the name tag
|
# 70% of the time use the name tag
|
||||||
key = name_key
|
key = name_key
|
||||||
raw_key = raw_name_key
|
raw_key = raw_name_key
|
||||||
|
elif r < 0.8:
|
||||||
|
# 10% of the time use the short name
|
||||||
|
key = short_name_key
|
||||||
|
raw_key = raw_short_name_key
|
||||||
|
elif r < 0.9:
|
||||||
|
# 10% of the time use the official name
|
||||||
|
key = official_name_key
|
||||||
|
raw_key = raw_official_name_key
|
||||||
|
else:
|
||||||
|
# 10% of the time use the official name
|
||||||
|
key = alt_name_key
|
||||||
|
raw_key = raw_alt_name_key
|
||||||
|
|
||||||
for value in values:
|
for component_value in components_values:
|
||||||
name = value.get(key, value.get(raw_key))
|
r = random.random()
|
||||||
|
name = None
|
||||||
|
|
||||||
|
if iso_code3_key in component_value and r < 0.1:
|
||||||
|
name = component_value[iso_code3_key]
|
||||||
|
elif iso_code_key in component_value and r < 0.3:
|
||||||
|
name = component_value[iso_code_key]
|
||||||
|
elif language == 'en' and not non_local_language and r < 0.7:
|
||||||
|
# Particularly to address the US (prefer United States,
|
||||||
|
# not United States of America) but may capture variations
|
||||||
|
# in other English-speaking countries as well.
|
||||||
|
if simple_name_key in component_value:
|
||||||
|
name = component_value[simple_name_key]
|
||||||
|
elif international_name_key in component_value:
|
||||||
|
name = component_value[international_name_key]
|
||||||
|
|
||||||
if not name:
|
if not name:
|
||||||
name = value.get(name_key, value.get(raw_name_key))
|
name = component_value.get(key, component_value.get(raw_key))
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
name = component_value.get(name_key, component_value.get(raw_name_key))
|
||||||
|
|
||||||
if not name:
|
if not name:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user