[osm] Randomly select up to n components for state_district OSM boundaries. For all other fields select one name at random

This commit is contained in:
Al
2015-12-09 00:20:20 -05:00
parent 2c254ebc5e
commit aeb72d7d26

View File

@@ -635,8 +635,13 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
seen.add((component, name))
for component, vals in poly_components.iteritems():
if component not in address_components or non_local_language and random.random() < 0.4:
val = u', '.join(vals)
if component not in address_components or (non_local_language and random.random() < 0.4):
if component == AddressFormatter.STATE_DISTRICT and random.random() < 0.5:
num = random.randrange(1, len(vals) + 1)
val = u', '.join(vals[:num])
else:
val = random.choice(vals)
if component == AddressFormatter.STATE and random.random() < 0.7:
val = STATE_EXPANSIONS.get(address_country, {}).get(val, val)
address_components[component] = val