[addresses] adding country-specific cleanups for Kingston (city=Kingston 12 split into city=Kingston, postcode=12) and Dublin (e.g. Dublin 3 specified various ways will be treated as a city_district, whereas Eirecodes are treated as postal codes)
This commit is contained in:
@@ -3,6 +3,7 @@ import operator
|
|||||||
import os
|
import os
|
||||||
import pycountry
|
import pycountry
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import six
|
import six
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
@@ -45,7 +46,6 @@ this_dir = os.path.realpath(os.path.dirname(__file__))
|
|||||||
PARSER_DEFAULT_CONFIG = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
PARSER_DEFAULT_CONFIG = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
||||||
'resources', 'parser', 'default.yaml')
|
'resources', 'parser', 'default.yaml')
|
||||||
|
|
||||||
|
|
||||||
class ComponentDependencies(object):
|
class ComponentDependencies(object):
|
||||||
'''
|
'''
|
||||||
Declare an address component and its dependencies e.g.
|
Declare an address component and its dependencies e.g.
|
||||||
@@ -133,6 +133,9 @@ class AddressComponents(object):
|
|||||||
ALPHANUMERIC_PHRASE = 'alphanumeric'
|
ALPHANUMERIC_PHRASE = 'alphanumeric'
|
||||||
STANDALONE_PHRASE = 'standalone'
|
STANDALONE_PHRASE = 'standalone'
|
||||||
|
|
||||||
|
IRELAND = 'ie'
|
||||||
|
JAMAICA = 'jm'
|
||||||
|
|
||||||
class zones:
|
class zones:
|
||||||
COMMERCIAL = 'commercial'
|
COMMERCIAL = 'commercial'
|
||||||
RESIDENTIAL = 'residential'
|
RESIDENTIAL = 'residential'
|
||||||
@@ -675,6 +678,87 @@ class AddressComponents(object):
|
|||||||
else:
|
else:
|
||||||
return ':{}'.format(language_script or non_local_language or language)
|
return ':{}'.format(language_script or non_local_language or language)
|
||||||
|
|
||||||
|
# e.g. Dublin 3
|
||||||
|
dublin_postal_district_regex_str = '(?:[1-9]|1[1-9]|2[0-4]|6w)'
|
||||||
|
dublin_postal_district_regex = re.compile('^{}$'.format(dublin_postal_district_regex_str), re.I)
|
||||||
|
dublin_city_district_regex = re.compile('dublin {}$'.format(dublin_postal_district_regex_str), re.I)
|
||||||
|
|
||||||
|
def format_dublin_postal_district(self, address_components):
|
||||||
|
'''
|
||||||
|
Dublin postal districts
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Since the introduction of the Eire code, former Dublin postcodes
|
||||||
|
are basically being used as what we would call a city_district in
|
||||||
|
libpostal, so fix that here.
|
||||||
|
|
||||||
|
If addr:city is given as "Dublin 3", make it city_district instead
|
||||||
|
If addr:city is given as "Dublin" or "City of Dublin" and addr:postcode
|
||||||
|
is given as "3", remove city/postcode and make it city_district "Dublin 3"
|
||||||
|
'''
|
||||||
|
|
||||||
|
city = address_components.get(AddressFormatter.CITY)
|
||||||
|
# Change to city_district
|
||||||
|
if city and self.dublin_city_district_regex.match(city):
|
||||||
|
address_components[AddressFormatter.CITY_DISTRICT] = address_components.pop(AddressFormatter.CITY)
|
||||||
|
postcode = address_components.get(AddressFormatter.POSTCODE)
|
||||||
|
if postcode and (self.dublin_postal_district_regex.match(postcode) or self.dublin_city_district_regex.match(postcode)):
|
||||||
|
address_components.pop(AddressFormatter.POSTCODE)
|
||||||
|
return True
|
||||||
|
elif city.lower() in ('dublin', 'city of dublin', 'dublin city') and AddressFormatter.POSTCODE in address_components:
|
||||||
|
postcode = address_components[AddressFormatter.POSTCODE]
|
||||||
|
if self.dublin_postal_district_regex.match(postcode):
|
||||||
|
address_components.pop(AddressFormatter.CITY)
|
||||||
|
address_components[AddressFormatter.CITY_DISTRICT] = 'Dublin {}'.format(address_components.pop(AddressFormatter.POSTCODE))
|
||||||
|
return True
|
||||||
|
elif self.dublin_city_district_regex.match(postcode):
|
||||||
|
address_components[AddressFormatter.CITY_DISTRICT] = address_components.pop(AddressFormatter.POSTCODE)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
# e.g. Kingston 5
|
||||||
|
kingston_postcode_regex = re.compile('(kingston )?([1-9]|1[1-9]|20|c\.?s\.?o\.?)$', re.I)
|
||||||
|
|
||||||
|
def format_kingston_postcode(self, address_components):
|
||||||
|
'''
|
||||||
|
Kingston postcodes
|
||||||
|
------------------
|
||||||
|
Jamaica does not have a postcode system, except in Kingston where
|
||||||
|
there are postal zones 1-20 plus the Central Sorting Office (CSO).
|
||||||
|
|
||||||
|
These are not always consistently labeled in OSM, so normalize here.
|
||||||
|
|
||||||
|
If city is given as "Kingston 20", separate into city="Kingston", postcode="20"
|
||||||
|
'''
|
||||||
|
city = address_components.get(AddressFormatter.CITY)
|
||||||
|
postcode = address_components.get(AddressFormatter.POSTCODE)
|
||||||
|
|
||||||
|
if city:
|
||||||
|
match = self.kingston_postcode_regex.match(city)
|
||||||
|
if match:
|
||||||
|
city, postcode = match.groups()
|
||||||
|
if city:
|
||||||
|
address_components[AddressFormatter.CITY] = city
|
||||||
|
else:
|
||||||
|
address_components.pop(AddressFormatter.CITY)
|
||||||
|
|
||||||
|
if postcode:
|
||||||
|
address_components[AddressFormatter.POSTCODE] = postcode
|
||||||
|
|
||||||
|
return True
|
||||||
|
elif postcode:
|
||||||
|
match = self.kingston_postcode_regex.match(postcode)
|
||||||
|
if match:
|
||||||
|
city, postcode = match.groups()
|
||||||
|
if city and AddressFormatter.CITY not in address_components:
|
||||||
|
address_components[AddressFormatter.CITY] = city
|
||||||
|
|
||||||
|
if postcode:
|
||||||
|
address_components[AddressFormatter.POSTCODE] = postcode
|
||||||
|
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def add_admin_boundaries(self, address_components,
|
def add_admin_boundaries(self, address_components,
|
||||||
osm_components,
|
osm_components,
|
||||||
country, language,
|
country, language,
|
||||||
@@ -1138,6 +1222,12 @@ class AddressComponents(object):
|
|||||||
else:
|
else:
|
||||||
address_components.pop(AddressFormatter.HOUSE_NUMBER, None)
|
address_components.pop(AddressFormatter.HOUSE_NUMBER, None)
|
||||||
|
|
||||||
|
def country_specific_cleanup(self, address_components, country):
|
||||||
|
if country == self.IRELAND:
|
||||||
|
return self.format_dublin_postal_district(address_components)
|
||||||
|
elif country == self.JAMAICA:
|
||||||
|
return self.format_kingston_postcode(address_components)
|
||||||
|
|
||||||
def add_house_number_phrase(self, address_components, language, country=None):
|
def add_house_number_phrase(self, address_components, language, country=None):
|
||||||
house_number = address_components.get(AddressFormatter.HOUSE_NUMBER, None)
|
house_number = address_components.get(AddressFormatter.HOUSE_NUMBER, None)
|
||||||
if not is_numeric(house_number) and (not house_number or house_number.lower() not in self.latin_alphabet_lower):
|
if not is_numeric(house_number) and (not house_number or house_number.lower() not in self.latin_alphabet_lower):
|
||||||
@@ -1292,6 +1382,9 @@ class AddressComponents(object):
|
|||||||
|
|
||||||
street = address_components.get(AddressFormatter.ROAD)
|
street = address_components.get(AddressFormatter.ROAD)
|
||||||
|
|
||||||
|
self.cleanup_boundary_names(address_components)
|
||||||
|
self.country_specific_cleanup(address_components, country)
|
||||||
|
|
||||||
self.replace_name_affixes(address_components, non_local_language or language)
|
self.replace_name_affixes(address_components, non_local_language or language)
|
||||||
|
|
||||||
self.replace_names(address_components)
|
self.replace_names(address_components)
|
||||||
@@ -1303,7 +1396,6 @@ class AddressComponents(object):
|
|||||||
self.cleanup_house_number(address_components)
|
self.cleanup_house_number(address_components)
|
||||||
|
|
||||||
self.remove_numeric_boundary_names(address_components)
|
self.remove_numeric_boundary_names(address_components)
|
||||||
self.cleanup_boundary_names(address_components)
|
|
||||||
self.add_house_number_phrase(address_components, language, country=country)
|
self.add_house_number_phrase(address_components, language, country=country)
|
||||||
self.add_postcode_phrase(address_components, language, country=country)
|
self.add_postcode_phrase(address_components, language, country=country)
|
||||||
self.add_metro_station_phrase(address_components, language, country=country)
|
self.add_metro_station_phrase(address_components, language, country=country)
|
||||||
|
|||||||
Reference in New Issue
Block a user