diff --git a/resources/parser/data_sets/openaddresses.yaml b/resources/parser/data_sets/openaddresses.yaml index 68455fd1..1227c076 100644 --- a/resources/parser/data_sets/openaddresses.yaml +++ b/resources/parser/data_sets/openaddresses.yaml @@ -9,6 +9,8 @@ global: abbreviate_unit_probability: 0.3 separate_unit_probability: 0.2 + abbreviate_toponym_probability: 0.3 + place_only_probability: 0.2 place_and_postcode_probability: 0.1 diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 9efb9175..c5673170 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -216,6 +216,7 @@ class OpenAddressesFormatter(object): separate_street_prob = float(self.get_property('separate_street_probability', *configs) or 0.0) abbreviate_unit_prob = float(self.get_property('abbreviate_unit_probability', *configs)) separate_unit_prob = float(self.get_property('separate_unit_probability', *configs) or 0.0) + abbreviate_toponym_prob = float(self.get_property('abbreviate_toponym_probability', *configs)) add_osm_boundaries = bool(self.get_property('add_osm_boundaries', *configs) or False) add_osm_neighborhoods = bool(self.get_property('add_osm_neighborhoods', *configs) or False) @@ -374,6 +375,13 @@ class OpenAddressesFormatter(object): components.pop(AddressFormatter.UNIT) unit = None + for component_key in AddressFormatter.BOUNDARY_COMPONENTS: + component = components.get(component_key, None) + if component is not None: + component = abbreviate(toponym_gazetteer, component, language, + abbreviate_prob=abbreviate_toponym_prob) + components[AddressFormatter.ROAD] = component + # CLDR country name country_name = self.cldr_country_name(country, language, configs) if country_name: