From aa1f4fdd20e517d5b18ae5e5ac44a309ef7a0f2f Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 22 Nov 2016 09:51:04 -0500 Subject: [PATCH] [places] adding section called city_replacements to places config, for countries where something like the state_district/county, suburb or city_district should stand in for the city when one cannot be reverse geocoded (unincorporated county addresses, etc.) --- resources/places/countries/global.yaml | 24 ++++++++++++++++++++++++ scripts/geodata/places/config.py | 6 +++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/resources/places/countries/global.yaml b/resources/places/countries/global.yaml index eb862918..5823d754 100644 --- a/resources/places/countries/global.yaml +++ b/resources/places/countries/global.yaml @@ -3,6 +3,7 @@ # and standalone place queries) global: + city_replacements: [] # Probability of including individual components components: suburb: @@ -61,6 +62,10 @@ countries: # Australia au: + city_replacements: + - suburb + - city_district + - state_district components: suburb: dependencies: @@ -84,6 +89,10 @@ countries: # Brazil br: + city_replacements: + - suburb + - city_district + - state_district components: suburb: dependencies: @@ -102,6 +111,10 @@ countries: # Canada ca: + city_replacements: + - suburb + - city_district + - state_district components: suburb: dependencies: @@ -130,6 +143,10 @@ countries: # United Kingdom gb: + city_replacements: + - suburb + - city_district + - state_district components: state_district: probability: 0.15 @@ -216,6 +233,9 @@ countries: # Mexico mx: + city_replacements: + - suburb + - city_district components: suburb: dependencies: @@ -258,6 +278,10 @@ countries: # United States us: + city_replacements: + - suburb + - city_district + - state_district # Definitions new_york_county: &new_york_county id: 2552485 # New York County (Manhattan, NY) diff --git a/scripts/geodata/places/config.py b/scripts/geodata/places/config.py index afcada2c..32fe3c04 100644 --- a/scripts/geodata/places/config.py +++ b/scripts/geodata/places/config.py @@ -159,10 +159,14 @@ class PlaceConfig(object): new_components = components.copy() + city_replacements = set() + if AddressFormatter.CITY not in components: + city_replacements = set(self.get_property(('city_replacements', ), country=country)) + for component in admin_components: include = self.include_component(component, containing_ids, country=country, population=population) - if not include: + if not include and component not in city_replacements: # Note: this check is for cities that have the same name as their admin # areas e.g. Luxembourg, Luxembourg. In cases like this, if we were to drop # city, we don't want to include country on its own. This should help the parser