diff --git a/resources/parser/data_sets/openaddresses.yaml b/resources/parser/data_sets/openaddresses.yaml index 3a8056e0..62e6d60c 100644 --- a/resources/parser/data_sets/openaddresses.yaml +++ b/resources/parser/data_sets/openaddresses.yaml @@ -11,6 +11,7 @@ global: abbreviate_toponym_probability: 0.3 + address_only_probability: 0.1 place_only_probability: 0.2 place_and_postcode_probability: 0.1 diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 421bf03b..2228860d 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -218,13 +218,13 @@ class OpenAddressesFormatter(object): separate_unit_prob = float(self.get_property('separate_unit_probability', *configs) or 0.0) abbreviate_toponym_prob = float(self.get_property('abbreviate_toponym_probability', *configs)) - add_osm_boundaries = bool(self.get_property('add_osm_boundaries', *configs) or False) add_osm_neighborhoods = bool(self.get_property('add_osm_neighborhoods', *configs) or False) non_numeric_units = bool(self.get_property('non_numeric_units', *configs) or False) numeric_postcodes_only = bool(self.get_property('numeric_postcodes_only', *configs) or False) postcode_strip_non_digit_chars = bool(self.get_property('postcode_strip_non_digit_chars', *configs) or False) + address_only_probability = float(self.get_property('address_only_probability', *configs)) place_only_probability = float(self.get_property('place_only_probability', *configs)) place_and_postcode_probability = float(self.get_property('place_and_postcode_probability', *configs)) @@ -452,6 +452,13 @@ class OpenAddressesFormatter(object): minimal_only=False, tag_components=tag_components) yield (language, country, formatted) + if random.random() < address_only_probability: + address_only_components = self.components.drop_places(components) + address_only_components = self.components.drop_postcode(address_only_components) + formatted = self.formatter.format_address(address_only_components, country, language=language, + minimal_only=False, tag_components=tag_components) + yield (language, country, formatted) + rand_val = random.random() if street and house_number and rand_val < drop_address_probability: