From b25f5f26aee46ef902fe62b33c443174d2d9a374 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 27 Jan 2017 13:17:07 -0500 Subject: [PATCH] [openaddresses] not requiring street name in former Soviet countries (may be village + house_number). Only allowing address-only if street is present --- scripts/geodata/openaddresses/formatter.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 0922c34d..7abf4d9d 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -15,6 +15,7 @@ from geodata.address_expansions.address_dictionaries import address_phrase_dicti from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_abbreviations_gazetteer from geodata.address_formatting.formatter import AddressFormatter from geodata.addresses.components import AddressComponents +from geodata.countries.constants import Countries from geodata.countries.names import country_names from geodata.encoding import safe_decode, safe_encode from geodata.i18n.languages import get_country_languages @@ -48,8 +49,6 @@ SPANISH = 'es' PORTUGUESE = 'pt' RUSSIAN = 'ru' -JAPAN = 'jp' - class OpenAddressesFormatter(object): field_regex_replacements = { @@ -394,8 +393,10 @@ class OpenAddressesFormatter(object): unit = components.get(AddressFormatter.UNIT, None) + street_not_required = country == Countries.JAPAN or country in Countries.FORMER_SOVIET_COUNTRIES + # If there's a postcode, we can still use just the city/state/postcode, otherwise discard - if ((not street or country == JAPAN) and not house_number) or (street and house_number and (street.lower() == house_number.lower())) or (unit and street and street.lower() == unit.lower()): + if ((not street or street_not_required) and not house_number) or (street and house_number and (street.lower() == house_number.lower())) or (unit and street and street.lower() == unit.lower()): if not postcode: continue components = self.components.drop_address(components) @@ -502,7 +503,7 @@ class OpenAddressesFormatter(object): minimal_only=False, tag_components=tag_components) yield (language, country, formatted) - if random.random() < address_only_probability: + if random.random() < address_only_probability and street: address_only_components = self.components.drop_places(components) address_only_components = self.components.drop_postcode(address_only_components) formatted = self.formatter.format_address(address_only_components, country, language=language,