From afbb79b81dc2fc60a951f6d66cb122adeed1a55c Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 31 Jul 2016 20:40:44 -0400 Subject: [PATCH] [osm/parser] Making a much lower probability of generating sub-building components for named venues (usually on the ground floor, etc.) --- resources/parser/data_sets/osm.yaml | 1 + scripts/geodata/osm/formatter.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/resources/parser/data_sets/osm.yaml b/resources/parser/data_sets/osm.yaml index ad2f24b5..69a9fa25 100644 --- a/resources/parser/data_sets/osm.yaml +++ b/resources/parser/data_sets/osm.yaml @@ -24,6 +24,7 @@ venues: address_probability: 0.4 abbreviate_probability: 0.3 separate_probability: 0.0 + sub_building_probability: 0.05 countries: jp: diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index 2c42ee3a..70f623da 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -689,7 +689,7 @@ class OSMAddressFormatter(object): building_tags = self.normalize_address_components(building_tags) for k, v in six.iteritems(building_tags): - if k not in revised_tags and k in (AddressFormatter.HOUSE_NUMBER, AddressFormatter.ROAD): + if k not in revised_tags and k in (AddressFormatter.HOUSE_NUMBER, AddressFormatter.ROAD, AddressFormatter.POSTCODE): revised_tags[k] = v elif k == AddressFormatter.HOUSE: building_venue_names.append(v) @@ -698,7 +698,8 @@ class OSMAddressFormatter(object): if subdivision_components: zone = self.zone(subdivision_components) - add_sub_building_components = AddressFormatter.HOUSE_NUMBER in revised_tags + venue_sub_building_prob = float(nested_get(self.config, ('venues', 'sub_building_probability'), default=0.0)) + add_sub_building_components = AddressFormatter.HOUSE_NUMBER in revised_tags and (AddressFormatter.HOUSE not in revised_tags or random.random() < venue_sub_building_prob) address_components, country, language = self.components.expanded(revised_tags, latitude, longitude, language=namespaced_language, num_floors=num_floors, num_basements=num_basements,