From 8b87d224c946b2227cada34ffcccca254721498e Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 23 May 2016 05:28:37 -0400 Subject: [PATCH] [parser/osm] Adding address sans name for venues probabilistically --- resources/parser/data_sets/osm.yaml | 1 + scripts/geodata/osm/formatter.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/resources/parser/data_sets/osm.yaml b/resources/parser/data_sets/osm.yaml index 140311b4..2cad6f70 100644 --- a/resources/parser/data_sets/osm.yaml +++ b/resources/parser/data_sets/osm.yaml @@ -17,6 +17,7 @@ streets: separate_probability: 0.2 venues: + address_probability: 0.4 abbreviate_probability: 0.3 separate_probability: 0.0 diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index 084a7538..da9fce54 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -211,10 +211,18 @@ class OSMAddressFormatter(object): def formatted_addresses_with_venue_names(self, address_components, venue_names, country, language=None, tag_components=True, minimal_only=False): # Since venue names are only one-per-record, this wrapper will try them all (name, alt_name, etc.) formatted_addresses = [] + if AddressFormatter.HOUSE not in address_components or not venue_names: return [self.formatter.format_address(address_components, country, language=language, tag_components=tag_components, minimal_only=minimal_only)] + address_prob = float(nested_get(self.config, ('venues', 'address_probability'), default=0.0)) + if random.random() < address_prob: + address_components.pop(AddressFormatter.HOUSE) + formatted_address = self.formatter.format_address(address_components, country, language=language, + tag_components=tag_components, minimal_only=minimal_only) + formatted_addresses.append(formatted_address) + for venue_name in venue_names: if venue_name: address_components[AddressFormatter.HOUSE] = venue_name