From c40ad99ec739227d21c374bcd83792cd2acdc8d6 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 2 Aug 2016 14:52:12 -0400 Subject: [PATCH] [osm] removing postcode phrase from place training data and adding CLDR countries only after all the other normalizations --- scripts/geodata/osm/formatter.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index 0ffcb560..c27138ad 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -557,11 +557,6 @@ class OSMAddressFormatter(object): revised_place_tags = [] for address_components, language, is_default in place_tags: - if (AddressFormatter.COUNTRY in address_components or place_config.include_component(AddressFormatter.COUNTRY, containing_ids, country=country)) and random.random() < cldr_country_prob: - address_country = self.components.cldr_country_name(country, language) - if address_country: - address_components[AddressFormatter.COUNTRY] = address_country - revised_address_components = place_config.dropout_components(address_components, osm_components, country=country, population=population) revised_address_components[component_name] = address_components[component_name] self.components.drop_invalid_components(revised_address_components) @@ -573,7 +568,10 @@ class OSMAddressFormatter(object): self.components.remove_numeric_boundary_names(revised_address_components) self.components.cleanup_boundary_names(revised_address_components) - self.components.add_postcode_phrase(revised_address_components, language, country=country) + if (AddressFormatter.COUNTRY in address_components or place_config.include_component(AddressFormatter.COUNTRY, containing_ids, country=country)) and random.random() < cldr_country_prob: + address_country = self.components.cldr_country_name(country, language) + if address_country: + address_components[AddressFormatter.COUNTRY] = address_country if revised_address_components: revised_place_tags.append((revised_address_components, language, is_default))