[openaddresses] not requiring street name in former Soviet countries (may be village + house_number). Only allowing address-only if street is present
This commit is contained in:
@@ -15,6 +15,7 @@ from geodata.address_expansions.address_dictionaries import address_phrase_dicti
|
|||||||
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_abbreviations_gazetteer
|
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_abbreviations_gazetteer
|
||||||
from geodata.address_formatting.formatter import AddressFormatter
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
from geodata.addresses.components import AddressComponents
|
from geodata.addresses.components import AddressComponents
|
||||||
|
from geodata.countries.constants import Countries
|
||||||
from geodata.countries.names import country_names
|
from geodata.countries.names import country_names
|
||||||
from geodata.encoding import safe_decode, safe_encode
|
from geodata.encoding import safe_decode, safe_encode
|
||||||
from geodata.i18n.languages import get_country_languages
|
from geodata.i18n.languages import get_country_languages
|
||||||
@@ -48,8 +49,6 @@ SPANISH = 'es'
|
|||||||
PORTUGUESE = 'pt'
|
PORTUGUESE = 'pt'
|
||||||
RUSSIAN = 'ru'
|
RUSSIAN = 'ru'
|
||||||
|
|
||||||
JAPAN = 'jp'
|
|
||||||
|
|
||||||
|
|
||||||
class OpenAddressesFormatter(object):
|
class OpenAddressesFormatter(object):
|
||||||
field_regex_replacements = {
|
field_regex_replacements = {
|
||||||
@@ -394,8 +393,10 @@ class OpenAddressesFormatter(object):
|
|||||||
|
|
||||||
unit = components.get(AddressFormatter.UNIT, None)
|
unit = components.get(AddressFormatter.UNIT, None)
|
||||||
|
|
||||||
|
street_not_required = country == Countries.JAPAN or country in Countries.FORMER_SOVIET_COUNTRIES
|
||||||
|
|
||||||
# If there's a postcode, we can still use just the city/state/postcode, otherwise discard
|
# If there's a postcode, we can still use just the city/state/postcode, otherwise discard
|
||||||
if ((not street or country == JAPAN) and not house_number) or (street and house_number and (street.lower() == house_number.lower())) or (unit and street and street.lower() == unit.lower()):
|
if ((not street or street_not_required) and not house_number) or (street and house_number and (street.lower() == house_number.lower())) or (unit and street and street.lower() == unit.lower()):
|
||||||
if not postcode:
|
if not postcode:
|
||||||
continue
|
continue
|
||||||
components = self.components.drop_address(components)
|
components = self.components.drop_address(components)
|
||||||
@@ -502,7 +503,7 @@ class OpenAddressesFormatter(object):
|
|||||||
minimal_only=False, tag_components=tag_components)
|
minimal_only=False, tag_components=tag_components)
|
||||||
yield (language, country, formatted)
|
yield (language, country, formatted)
|
||||||
|
|
||||||
if random.random() < address_only_probability:
|
if random.random() < address_only_probability and street:
|
||||||
address_only_components = self.components.drop_places(components)
|
address_only_components = self.components.drop_places(components)
|
||||||
address_only_components = self.components.drop_postcode(address_only_components)
|
address_only_components = self.components.drop_postcode(address_only_components)
|
||||||
formatted = self.formatter.format_address(address_only_components, country, language=language,
|
formatted = self.formatter.format_address(address_only_components, country, language=language,
|
||||||
|
|||||||
Reference in New Issue
Block a user