[addresses] adding new config for postal codes around the world. Allows appending the ISO alpha-2 country code to the beginning of the postcode as in e.g. SI-1000 (only used if the postcode begins with a digit). This system was used for postal codes in continental Europe as a recommendation from the CEPT. Now 7 member states still use it, so in those countries add the country-code with higher probability. The config also contains the license plate codes for countries where e.g. L-1234 might be used instead of LU-1234. Allows configuring in which countries postcodes should be validated using Google's per-country validation regexes (and the ability to override with a custom regex), and in which countries other admin component names should be stripped.
This commit is contained in:
56
scripts/geodata/postal_codes/phrases.py
Normal file
56
scripts/geodata/postal_codes/phrases.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import random
|
||||
|
||||
from geodata.configs.utils import alternative_probabilities
|
||||
from geodata.math.sampling import weighted_choice, cdf
|
||||
from geodata.postal_codes.config import postal_codes_config
|
||||
from geodata.postal_codes.validation import postcode_regexes
|
||||
|
||||
|
||||
class PostalCodes(object):
|
||||
@classmethod
|
||||
def is_valid(cls, postal_code, country):
|
||||
regex = postcode_regexes.get(country)
|
||||
|
||||
if regex:
|
||||
postal_code = postal_code.strip()
|
||||
m = regex.match(postal_code)
|
||||
if m and m.end() == len(postal_code):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def needs_validation(cls, country):
|
||||
return postal_codes_config.get_property('validate_postcode', country=country, default=False)
|
||||
|
||||
@classmethod
|
||||
def should_strip_components(cls, country_code):
|
||||
return postal_codes_config.get_property('strip_components', country=country_code)
|
||||
|
||||
@classmethod
|
||||
def add_country_code(cls, postal_code, country):
|
||||
postal_code = postal_code.strip()
|
||||
if not postal_codes_config.get_property('add_country_code', country=country):
|
||||
return postal_code
|
||||
|
||||
cc_probability = postal_codes_config.get_property('country_code_probablity', country=country, default=0.0)
|
||||
if random.random() >= cc_probability or not postal_code or not postal_code[0].isdigit():
|
||||
return postal_code
|
||||
|
||||
country_code_phrases = postal_codes_config.get_property('country_code_phrase', country=country, default=None)
|
||||
if country_code_phrases is None:
|
||||
country_code_phrase = country.upper()
|
||||
else:
|
||||
alternates, probs = alternative_probabilities(country_code_phrases)
|
||||
probs_cdf = cdf(probs)
|
||||
country_code_phrase = weighted_choice(alternates, probs_cdf)
|
||||
|
||||
cc_hyphen_probability = postal_codes_config.get_property('country_code_hyphen_probability', country=country, default=0.0)
|
||||
|
||||
separator = u''
|
||||
r = random.random()
|
||||
if r < cc_hyphen_probability:
|
||||
separator = u'-'
|
||||
|
||||
return u'{}{}{}'.format(country_code_phrase, separator, postal_code)
|
||||
Reference in New Issue
Block a user