From ba0f097d78a2cf473bcd4e03f73aa2caf17d8285 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 12 Jan 2017 12:05:42 -0500 Subject: [PATCH] [boundaries] adding check for valid name key in formatted places, and removing short_name from the Sao Paulo relation as well --- resources/boundaries/names/global.yaml | 9 +++++++++ scripts/geodata/boundaries/names.py | 3 +++ scripts/geodata/osm/formatter.py | 8 +++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/resources/boundaries/names/global.yaml b/resources/boundaries/names/global.yaml index 6f7865ce..ba113e23 100644 --- a/resources/boundaries/names/global.yaml +++ b/resources/boundaries/names/global.yaml @@ -86,6 +86,15 @@ names: probability: 0.09 - alternative: official_name probability: 0.01 + - id: 298285 # Sao Paulo (relation) + type: relation + default: name + probability: 0.9 + alternatives: + - alternative: alt_name + probability: 0.09 + - alternative: official_name + probability: 0.01 - id: 556706 # New Zealand type: relation default: name:en diff --git a/scripts/geodata/boundaries/names.py b/scripts/geodata/boundaries/names.py index d8fa7326..9da5372d 100644 --- a/scripts/geodata/boundaries/names.py +++ b/scripts/geodata/boundaries/names.py @@ -110,6 +110,9 @@ class BoundaryNames(object): def _string_as_regex(self, s): return safe_decode(s).replace(six.u('.'), six.u('\\.')) + def valid_name(self, object_type, object_id, name): + return name in self.exceptions.get((object_type, object_id), ((), ()))[0] + def name_key_dist(self, props, component): object_type = props.get('type') object_id = safe_encode(props.get('id', '')) diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index e3925598..a5d0a43d 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -606,7 +606,7 @@ class OSMAddressFormatter(object): except Exception: return (), None - if 'name' not in tags: + if 'name' not in tags and not any((t.startswith('name:') for t in tags)): return (), None osm_components = self.components.osm_reverse_geocoded_components(latitude, longitude) @@ -732,7 +732,13 @@ class OSMAddressFormatter(object): revised_tags = self.fix_component_encodings(revised_tags) + object_type = tags.get('type') + object_id = tags.get('id') + for name_tag in ('name', 'alt_name', 'loc_name', 'short_name', 'int_name', 'name:simple', 'official_name'): + if not boundary_names.valid_name(object_type, object_id, name_tag): + continue + if more_than_one_official_language: name = tags.get(name_tag) language_suffix = ''