[osm] Adding GeoNames abbreviated city names in a small percentage of cases to get variations like NYC, BK, SF, etc. in the training data

This commit is contained in:
Al
2015-12-03 18:00:05 -05:00
parent 25e89bcc41
commit 5af95ee613

View File

@@ -642,7 +642,13 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
if not names or lang not in names:
continue
city = names[lang][0][0]
if 'abbr' not in names or non_local_language:
# Use the common city name in the target language
city = names[lang][0][0]
elif random.random() < 0.1:
# Use an abbreviation: NYC, BK, SF, etc.
city = random.choice(names['abbr'])[0]
if not city or not city.strip():
continue
address_components[AddressFormatter.CITY] = city