[addresses] adding new config for postal codes around the world. Allows appending the ISO alpha-2 country code to the beginning of the postcode as in e.g. SI-1000 (only used if the postcode begins with a digit). This system was used for postal codes in continental Europe as a recommendation from the CEPT. Now 7 member states still use it, so in those countries add the country-code with higher probability. The config also contains the license plate codes for countries where e.g. L-1234 might be used instead of LU-1234. Allows configuring in which countries postcodes should be validated using Google's per-country validation regexes (and the ability to override with a custom regex), and in which countries other admin component names should be stripped.
This commit is contained in:
0
scripts/geodata/postal_codes/__init__.py
Normal file
0
scripts/geodata/postal_codes/__init__.py
Normal file
51
scripts/geodata/postal_codes/config.py
Normal file
51
scripts/geodata/postal_codes/config.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import copy
|
||||
import operator
|
||||
import os
|
||||
import random
|
||||
import six
|
||||
import yaml
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
||||
from geodata.address_formatting.formatter import AddressFormatter
|
||||
from geodata.configs.utils import nested_get, recursive_merge
|
||||
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
|
||||
POSTAL_CODES_CONFIG_FILE = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
||||
'resources', 'postal_codes', 'config.yaml')
|
||||
|
||||
|
||||
class PostalCodesConfig(object):
|
||||
def __init__(self, config_file=POSTAL_CODES_CONFIG_FILE):
|
||||
self.cache = {}
|
||||
postal_codes_config = yaml.load(open(config_file))
|
||||
|
||||
self.global_config = postal_codes_config['global']
|
||||
self.country_configs = {}
|
||||
|
||||
countries = postal_codes_config.pop('countries', {})
|
||||
|
||||
for k, v in six.iteritems(countries):
|
||||
country_config = countries[k]
|
||||
global_config_copy = copy.deepcopy(self.global_config)
|
||||
self.country_configs[k] = recursive_merge(global_config_copy, country_config)
|
||||
|
||||
self.country_configs[None] = self.global_config
|
||||
|
||||
def get_property(self, key, country=None, default=None):
|
||||
if isinstance(key, six.string_types):
|
||||
key = key.split(u'.')
|
||||
|
||||
config = self.global_config
|
||||
|
||||
if country:
|
||||
country_config = self.country_configs.get(country.lower(), {})
|
||||
if country_config:
|
||||
config = country_config
|
||||
|
||||
return nested_get(config, key, default=default)
|
||||
|
||||
postal_codes_config = PostalCodesConfig()
|
||||
56
scripts/geodata/postal_codes/phrases.py
Normal file
56
scripts/geodata/postal_codes/phrases.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import random
|
||||
|
||||
from geodata.configs.utils import alternative_probabilities
|
||||
from geodata.math.sampling import weighted_choice, cdf
|
||||
from geodata.postal_codes.config import postal_codes_config
|
||||
from geodata.postal_codes.validation import postcode_regexes
|
||||
|
||||
|
||||
class PostalCodes(object):
|
||||
@classmethod
|
||||
def is_valid(cls, postal_code, country):
|
||||
regex = postcode_regexes.get(country)
|
||||
|
||||
if regex:
|
||||
postal_code = postal_code.strip()
|
||||
m = regex.match(postal_code)
|
||||
if m and m.end() == len(postal_code):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def needs_validation(cls, country):
|
||||
return postal_codes_config.get_property('validate_postcode', country=country, default=False)
|
||||
|
||||
@classmethod
|
||||
def should_strip_components(cls, country_code):
|
||||
return postal_codes_config.get_property('strip_components', country=country_code)
|
||||
|
||||
@classmethod
|
||||
def add_country_code(cls, postal_code, country):
|
||||
postal_code = postal_code.strip()
|
||||
if not postal_codes_config.get_property('add_country_code', country=country):
|
||||
return postal_code
|
||||
|
||||
cc_probability = postal_codes_config.get_property('country_code_probablity', country=country, default=0.0)
|
||||
if random.random() >= cc_probability or not postal_code or not postal_code[0].isdigit():
|
||||
return postal_code
|
||||
|
||||
country_code_phrases = postal_codes_config.get_property('country_code_phrase', country=country, default=None)
|
||||
if country_code_phrases is None:
|
||||
country_code_phrase = country.upper()
|
||||
else:
|
||||
alternates, probs = alternative_probabilities(country_code_phrases)
|
||||
probs_cdf = cdf(probs)
|
||||
country_code_phrase = weighted_choice(alternates, probs_cdf)
|
||||
|
||||
cc_hyphen_probability = postal_codes_config.get_property('country_code_hyphen_probability', country=country, default=0.0)
|
||||
|
||||
separator = u''
|
||||
r = random.random()
|
||||
if r < cc_hyphen_probability:
|
||||
separator = u'-'
|
||||
|
||||
return u'{}{}{}'.format(country_code_phrase, separator, postal_code)
|
||||
33
scripts/geodata/postal_codes/validation.py
Normal file
33
scripts/geodata/postal_codes/validation.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import re
|
||||
from geodata.i18n.google import google_i18n
|
||||
from geodata.postal_codes.config import postal_codes_config
|
||||
|
||||
|
||||
class PostcodeRegexes(object):
|
||||
def __init__(self):
|
||||
self.responses = {}
|
||||
self.postcode_regexes = {}
|
||||
|
||||
def get(self, country_code):
|
||||
ret = self.postcode_regexes.get(country_code.lower())
|
||||
if ret is None:
|
||||
|
||||
override_regex = postal_codes_config.get_property('override_regex', country=country_code)
|
||||
if override_regex:
|
||||
ret = re.compile(override_regex, re.I)
|
||||
self.postcode_regexes[country_code.lower()] = ret
|
||||
return ret
|
||||
|
||||
response = google_i18n.get(country_code)
|
||||
if response:
|
||||
postcode_expression = response.get('zip')
|
||||
if not postcode_expression:
|
||||
self.postcode_regexes[country_code.lower()] = None
|
||||
return None
|
||||
ret = re.compile(postcode_expression, re.I)
|
||||
self.postcode_regexes[country_code.lower()] = ret
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
postcode_regexes = PostcodeRegexes()
|
||||
Reference in New Issue
Block a user