From 7f851810d252829e4b27f0dd8260aaffd7e29bdf Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 11 Jan 2017 16:17:35 -0500 Subject: [PATCH] [addresses] formatting addresses in Brasilia, so e.g. "Bloco B" is never part of the street name or building name, it's the house number. place=neighbourhood maps to nothing in Brasilia as these are basically subdivisions whose streets are identically named --- resources/boundaries/osm/br.yaml | 27 +++++++++++++-------- scripts/geodata/addresses/components.py | 31 +++++++++++++++++++++++++ scripts/geodata/osm/components.py | 22 +++++++++--------- 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/resources/boundaries/osm/br.yaml b/resources/boundaries/osm/br.yaml index b02c1e16..dcba7233 100644 --- a/resources/boundaries/osm/br.yaml +++ b/resources/boundaries/osm/br.yaml @@ -1,12 +1,19 @@ --- - admin_level: - "2": "country" - "3": "country_region" - "4": "state" - "5": "state_district" - "6": "state_district" - "7": "state_district" - "8": "city" - "9": "city_district" - "10": "suburb" + admin_level: + "2": "country" + "3": "country_region" + "4": "state" + "5": "state_district" + "6": "state_district" + "7": "state_district" + "8": "city" + "9": "city_district" + "10": "suburb" + overrides: + contained_by: + relation: + # Brasilia + "2758138": + place: + "neighborhood": null \ No newline at end of file diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 50c99bd6..c67747a1 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -907,6 +907,33 @@ class AddressComponents(object): street = six.u('Calle {}').format(street) return street + BRASILIA_RELATION_ID = '2758138' + + @classmethod + def is_in(cls, osm_components, component_id, component_type='relation'): + for c in osm_components: + if c.get('type') == component_type and c.get('id') == component_id: + return True + return False + + brasilia_street_name_regex = re.compile('(?:\\s*\-\\s*)?\\bbloco\\b.*$', re.I | re.U) + brasilia_building_regex = re.compile('^\\s*bloco.*$', re.I | re.U) + + @classmethod + def format_brasilia_address(cls, address_components): + ''' + Brasília, Brazil's capital, uses a grid-like system + ''' + + street = address_components.get(AddressFormatter.ROAD) + if street: + address_components[AddressFormatter.ROAD] = street = cls.brasilia_street_name_regex.sub(six.u(''), street) + + name = address_components.get(AddressFormatter.HOUSE) + + if name and cls.brasilia_building_regex.match(name): + address_components[AddressFormatter.HOUSE_NUMBER] = address_components.pop(AddressFormatter.HOUSE) + street_unit_suffix_regex = re.compile("^(.+?)(?:\\s+\(?\\s*(?:unit|apartment|apt\.?|suite|ste\.?|bldg\.?|lot)\\b(?:(?:\\s*#|\\s+(?:number|no|no.)\\b)?)).*$", re.I) unit_type_regexes = {} @@ -961,6 +988,8 @@ class AddressComponents(object): for props, lat, lon, dist in self.places_index.nearest_points(latitude, longitude): component = self.categorize_osm_component(country, props, containing_components) + if component is None: + continue have_sub_city = any((key in grouped_components and key in city_replacements for key in (AddressFormatter.SUBURB, AddressFormatter.CITY_DISTRICT))) @@ -1707,6 +1736,8 @@ class AddressComponents(object): self.replace_country_name(address_components, country, non_local_language or language) self.country_specific_cleanup(address_components, country) + if self.is_in(osm_components, self.BRASILIA_RELATION_ID): + self.format_brasilia_address(address_components) self.add_admin_boundaries(address_components, osm_components, country, language, latitude, longitude, diff --git a/scripts/geodata/osm/components.py b/scripts/geodata/osm/components.py index 7d566686..c12c2c8c 100644 --- a/scripts/geodata/osm/components.py +++ b/scripts/geodata/osm/components.py @@ -6,7 +6,7 @@ import yaml from copy import deepcopy from geodata.address_formatting.formatter import AddressFormatter -from geodata.configs.utils import recursive_merge +from geodata.configs.utils import recursive_merge, DoesNotExist from geodata.encoding import safe_encode @@ -147,36 +147,36 @@ class OSMAddressComponents(object): # place=city, place=suburb, etc. override per-country boundaries if not global_overrides_last: for k, v in values: - containing_component = self.global_keys_override.get(k, {}).get(v, None) + containing_component = self.global_keys_override.get(k, {}).get(v, DoesNotExist) - if containing_component is not None: + if containing_component is not DoesNotExist: return containing_component if k != self.ADMIN_LEVEL and k in config: - containing_component = config.get(k, {}).get(v, None) - if containing_component: + containing_component = config.get(k, {}).get(v, DoesNotExist) + if containing_component is not DoesNotExist: return containing_component # admin_level tags are mapped per country for k, v in values: - containing_component = config.get(k, {}).get(v, None) + containing_component = config.get(k, {}).get(v, DoesNotExist) - if containing_component is not None: + if containing_component is not DoesNotExist: return containing_component # other place keys like place=state, etc. serve as a backup # when no admin_level tags are available for k, v in values: - containing_component = self.global_keys.get(k, {}).get(v, None) + containing_component = self.global_keys.get(k, {}).get(v, DoesNotExist) - if containing_component is not None: + if containing_component is not DoesNotExist: return containing_component if global_overrides_last: for k, v in values: - containing_component = self.global_keys_override.get(k, {}).get(v, None) + containing_component = self.global_keys_override.get(k, {}).get(v, DoesNotExist) - if containing_component is not None: + if containing_component is not DoesNotExist: return containing_component return None