From 543048bc26796d865cfe6b23c19a12e6ebf6c7c2 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 28 Jul 2016 02:37:12 -0400 Subject: [PATCH] [osm] use CLDR country names with random probability --- resources/parser/data_sets/osm.yaml | 1 + scripts/geodata/osm/formatter.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/resources/parser/data_sets/osm.yaml b/resources/parser/data_sets/osm.yaml index 9f01241d..ad2f24b5 100644 --- a/resources/parser/data_sets/osm.yaml +++ b/resources/parser/data_sets/osm.yaml @@ -14,6 +14,7 @@ intersections: places: drop_postcode_probability: 0.6 + cldr_country_probability: 0.5 streets: abbreviate_probability: 0.3 diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index 6006f84d..4eb0dea6 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -459,6 +459,8 @@ class OSMAddressFormatter(object): if num_references > 1000: num_references = 1000 + cldr_country_prob = float(nested_get(self.config, ('places', 'cldr_country_probability'), default=0.0)) + for name_tag in ('name', 'alt_name', 'loc_name', 'short_name', 'int_name'): if more_than_one_official_language: name = tags.get(name_tag) @@ -477,6 +479,11 @@ class OSMAddressFormatter(object): language_suffix=language_suffix, drop_duplicate_city_names=False) + if random.random() < cldr_country_prob and AddressFormatter.COUNTRY in address_components: + address_country = self.cldr_country_name(country, language) + if address_country: + address_components[AddressFormatter.COUNTRY] = address_country + place_tags.append((address_components, None, True)) for language, is_default in local_languages: @@ -501,6 +508,10 @@ class OSMAddressFormatter(object): random_key=is_default, language_suffix=language_suffix, drop_duplicate_city_names=False) + if random.random() < cldr_country_prob and AddressFormatter.COUNTRY in address_components: + address_country = self.cldr_country_name(country, language) + if address_country: + address_components[AddressFormatter.COUNTRY] = address_country place_tags.append((address_components, language, is_default)) @@ -526,6 +537,11 @@ class OSMAddressFormatter(object): language_suffix=language_suffix, drop_duplicate_city_names=False) + if random.random() < cldr_country_prob and AddressFormatter.COUNTRY in address_components: + address_country = self.cldr_country_name(country, language) + if address_country: + address_components[AddressFormatter.COUNTRY] = address_country + place_tags.append((address_components, language, False)) if postal_codes: