[po boxes] random PO box generation

This commit is contained in:
Al
2016-05-18 00:43:01 -04:00
parent 3860cd031f
commit fc94753481
4 changed files with 42 additions and 6 deletions

View File

@@ -2,14 +2,13 @@ import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
class Floor(NumberedComponent):
config_key = 'levels'
# When we don't know the number of floors, use a Zipfian distribution
# to choose randomly between 1 and max_floors with 1 being much more
# likely than 2, etc.

View File

@@ -107,7 +107,6 @@ class NumberedComponent(object):
ALPHA = 'alpha'
ALPHA_PLUS_NUMERIC = 'alpha_plus_numeric'
NUMERIC_PLUS_ALPHA = 'numeric_plus_alpha'
DIRECTIONAL = 'directional'
@classmethod
def choose_alphanumeric_type(cls, key, language, country=None):
@@ -116,7 +115,7 @@ class NumberedComponent(object):
values = []
probs = []
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.DIRECTIONAL):
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA):
key = '{}_probability'.format(num_type)
prob = alphanumeric_props.get(key)
if prob is not None:

View File

@@ -1,8 +1,10 @@
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import cdf, weighted_choice
class POBox(NumberedComponent):
@@ -24,9 +26,44 @@ class POBox(NumberedComponent):
return six.u('').join([safe_decode(cls.random_digits(num_digits)), suffix])
@classmethod
def random_letter(cls, alphabet=latin_alphabet):
def random_letter(cls, language, country=None):
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
return sample_alphabet(alphabet)
@classmethod
def random(cls, language, country=None):
num_type, num_type_props = cls.choose_alphanumeric_type('po_boxes.alphanumeric', language, country=country)
if num_type != cls.ALPHA:
digit_config = address_config.get_property('po_boxes.digits', language, country=country, default=[])
values = []
probs = []
for val in digit_config:
values.append(val['length'])
probs.append(val['probability'])
probs = cdf(probs)
num_digits = weighted_choice(values, probs)
number = cls.random_digits(num_digits)
if num_type == cls.NUMERIC:
return safe_decode(number)
else:
letter = cls.random_letter(language, country=country)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}{}').format(number, whitespace_phrase, letter)
else:
return cls.random_letter(language, country=country)
@classmethod
def phrase(cls, box_number, language, country=None):
return cls.numeric_phrase('po_boxes.alphanumeric', safe_decode(box_number), language,

View File

@@ -5,6 +5,7 @@ from geodata.addresses.config import address_config
from geodata.addresses.directions import RelativeDirection
from geodata.addresses.floors import Floor
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
from geodata.configs.utils import nested_get
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
@@ -67,7 +68,7 @@ class Unit(NumberedComponent):
else:
number = weighted_choice(cls.positive_units_letters, cls.positive_units_letters_cdf)
whitespace_probability = num_type_props.get('{}_whitespace_probability'.format(num_type))
whitespace_probability = nested_get(num_type_props, (num_type, 'whitespace_probability'))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
if num_type == cls.ALPHA_PLUS_NUMERIC: