Initial fork commit

This commit is contained in:
2025-09-06 22:03:29 -04:00
commit 2d238cd339
1748 changed files with 932506 additions and 0 deletions

View File

View File

@@ -0,0 +1,59 @@
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent
from geodata.encoding import safe_decode
from geodata.configs.utils import nested_get
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
class Block(NumberedComponent):
max_blocks = 10
block_range = range(1, max_blocks + 1)
block_range_probs = zipfian_distribution(len(block_range), 2.0)
block_range_cdf = cdf(block_range_probs)
@classmethod
def random(cls, language, country=None):
num_type, num_type_props = cls.choose_alphanumeric_type('blocks.alphanumeric', language, country=country)
if num_type is None:
return None
if num_type == cls.NUMERIC:
number = weighted_choice(cls.block_range, cls.block_range_cdf)
return safe_decode(number)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
if alphabet_probability is not None and random.random() >= alphabet_probability:
alphabet = latin_alphabet
letter = sample_alphabet(alphabet, 2.0)
if num_type == cls.ALPHA:
return safe_decode(letter)
else:
number = weighted_choice(cls.block_range, cls.block_range_cdf)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}{}').format(number, whitespace_phrase, letter)
@classmethod
def phrase(cls, block, language, country=None):
if block is None:
return None
phrase_prob = address_config.get_property('blocks.alphanumeric_phrase_probability', language, country=country, default=0.0)
if random.random() < phrase_prob:
return cls.numeric_phrase('blocks.alphanumeric', block, language,
dictionaries=['qualifiers'], country=country)
else:
return None

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,152 @@
import copy
import os
import six
import yaml
from collections import Mapping
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
from geodata.configs.utils import nested_get, DoesNotExist, recursive_merge, alternative_probabilities
from geodata.math.sampling import cdf, check_probability_distribution
this_dir = os.path.realpath(os.path.dirname(__file__))
ADDRESS_CONFIG_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
'resources', 'addresses')
DICTIONARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
'resources', 'dictionaries')
class AddressConfig(object):
def __init__(self, config_dir=ADDRESS_CONFIG_DIR, dictionaries_dir=DICTIONARIES_DIR):
self.address_configs = {}
self.cache = {}
for filename in os.listdir(config_dir):
if not filename.endswith('.yaml'):
continue
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))
countries = config.pop('countries', {})
for k in countries.keys():
country_config = countries[k]
config_copy = copy.deepcopy(config)
countries[k] = recursive_merge(config_copy, country_config)
config['countries'] = countries
lang = filename.rsplit('.yaml')[0]
self.address_configs[lang] = config
self.sample_phrases = {}
for language in address_phrase_dictionaries.languages:
for dictionary in address_phrase_dictionaries.language_dictionaries[language]:
self.sample_phrases[(language, dictionary)] = {}
for phrases in address_phrase_dictionaries.phrases[(language, dictionary)]:
self.sample_phrases[(language, dictionary)][phrases[0]] = phrases[1:]
def get_property(self, key, language, country=None, default=None):
keys = key.split('.')
config = self.address_configs.get(language, {})
if country:
country_config = config.get('countries', {}).get(country, {})
if country_config:
config = country_config
value = nested_get(config, keys)
if value is not DoesNotExist:
return value
return default
def cache_key(self, prop, language, dictionaries=(), country=None):
return (prop, language, country, tuple(dictionaries))
def alternative_probabilities(self, prop, language, dictionaries=(), country=None):
'''Get a probability distribution over alternatives'''
key = self.cache_key(prop, language, dictionaries, country=country)
if key not in self.cache:
properties = self.get_property(prop, language, country=country, default=None)
if properties is None:
return None, None
alternatives, probs = alternative_probabilities(properties)
if alternatives is None:
return None, None
forms = []
form_probs = []
for props, prob in zip(alternatives, probs):
phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries)
forms.extend([(p, props) for p in phrases])
form_probs.extend([prob * p for p in phrase_probs])
sample_probability = properties.get('sample_probability')
if sample_probability is not None:
sample_phrases = []
for dictionary in dictionaries:
phrases = self.sample_phrases.get((language, dictionary), [])
for canonical, surface_forms in six.iteritems(phrases):
sample_phrases.append(canonical)
sample_phrases.extend(surface_forms)
# Note: use the outer properties dictionary e.g. units.alphanumeric
forms.extend([(p, properties) for p in sample_phrases])
form_probs.extend([float(sample_probability) / len(sample_phrases)] * len(sample_phrases))
try:
check_probability_distribution(form_probs)
except AssertionError:
print 'values were: {}'.format(forms)
raise
form_probs_cdf = cdf(form_probs)
self.cache[key] = (forms, form_probs_cdf)
return self.cache[key]
def form_probabilities(self, properties, language, dictionaries=()):
probs = []
alternatives = []
canonical_prob = properties.get('canonical_probability', 1.0)
canonical = properties['canonical']
alternatives.append(canonical)
probs.append(canonical_prob)
if 'abbreviated_probability' in properties:
probs.append(properties['abbreviated_probability'])
abbreviated = properties['abbreviated']
assert isinstance(abbreviated, basestring)
alternatives.append(abbreviated)
if properties.get('sample', False) and 'sample_probability' in properties:
sample_prob = properties['sample_probability']
samples = set()
for dictionary in dictionaries:
phrases = self.sample_phrases.get((language, dictionary), {})
samples |= set(phrases.get(canonical, []))
if 'sample_exclude' in properties:
samples -= set(properties['sample_exclude'])
if samples:
for phrase in samples:
probs.append(sample_prob / float(len(samples)))
alternatives.append(phrase)
else:
total = sum(probs)
probs = [p / total for p in probs]
try:
check_probability_distribution(probs)
except AssertionError:
print 'values were: {}'.format(alternatives)
raise
return alternatives, probs
address_config = AddressConfig()

View File

@@ -0,0 +1,37 @@
import six
from geodata.addresses.config import address_config
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice
class Conjunction(object):
DEFAULT_WHITESPACE_JOIN = ', '
DEFAULT_NON_WHITESPACE_JOIN = ''
key = 'and'
@classmethod
def join(cls, phrases, language, country=None):
if not hasattr(phrases, '__iter__'):
raise ValueError('Param phrases must be iterable')
values, probs = address_config.alternative_probabilities(cls.key, language, country=country)
phrase, props = weighted_choice(values, probs)
whitespace = props.get('whitespace', True)
whitespace_phrase = six.u(' ') if whitespace else six.u('')
phrases = [safe_decode(p) for p in phrases]
max_phrase_join = props.get('max_phrase_join', 2)
if len(phrases) > max_phrase_join:
default_join = safe_decode(props.get('default_join', cls.DEFAULT_WHITESPACE_JOIN if whitespace else cls.DEFAULT_NON_WHITESPACE_JOIN))
prefix = default_join.join(phrases[:-max_phrase_join] + [six.u('')])
else:
prefix = six.u('')
if whitespace:
phrase = six.u('{}{}{}').format(whitespace_phrase, phrase, whitespace_phrase)
joined_phrase = phrase.join(phrases[-max_phrase_join:])
return six.u('').join([prefix, joined_phrase])

View File

@@ -0,0 +1,19 @@
import random
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent
from geodata.encoding import safe_decode
class ConscriptionNumber(NumberedComponent):
@classmethod
def phrase(cls, number, language, country=None):
if number is None:
return number
key = 'conscription_numbers.alphanumeric'
dictionaries = ['house_numbers']
default = safe_decode(number)
return cls.numeric_phrase(key, safe_decode(number), language,
dictionaries=dictionaries, country=country)

View File

@@ -0,0 +1,42 @@
import operator
import six
from geodata.graph.topsort import topsort
class ComponentDependencies(object):
'''
Declare an address component and its dependencies e.g.
a house_numer cannot be used in the absence of a road name.
'''
component_bit_values = {}
def __init__(self, graph):
self.dependencies = {}
self.all_values = long('1' * len(graph), 2)
self.dependency_order = [c for c in topsort(graph)]
for component, deps in six.iteritems(graph):
self.dependencies[component] = self.component_bitset(deps) if deps else self.all_values
def __getitem__(self, key):
return self.dependencies.__getitem__(key)
def __contains__(self, key):
return self.dependencies.__contains__(key)
@classmethod
def get_component_bit_value(cls, name):
val = cls.component_bit_values.get(name)
if val is None:
num_values = len(cls.component_bit_values)
val = 1 << num_values
cls.component_bit_values[name] = val
return val
@classmethod
def component_bitset(cls, components):
return reduce(operator.or_, [cls.get_component_bit_value(name) for name in components])

View File

@@ -0,0 +1,37 @@
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumericPhrase
from geodata.math.sampling import weighted_choice
class RelativeDirection(NumericPhrase):
key = 'directions'
dictionaries = ['unit_directions']
class AnteroposteriorDirection(RelativeDirection):
key = 'directions.anteroposterior'
class LateralDirection(RelativeDirection):
key = 'directions.lateral'
class CardinalDirection(NumericPhrase):
key = 'cardinal_directions'
dictionaries = ['cardinal_directions']
class Direction(object):
CARDINAL = 'cardinal'
RELATIVE = 'relative'
@classmethod
def random(cls, language, country=None, cardinal_proability=0.5):
values = [cls.CARDINAL, cls.RELATIVE]
probs_cdf = [cardinal_proability, 1.0]
choice = weighted_choice(values, probs_cdf)
if choice == cls.CARDINAL:
return CardinalDirection.phrase(None, language, country=country)
else:
return RelativeDirection.phrase(None, language, country=country)

View File

@@ -0,0 +1,66 @@
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent
from geodata.encoding import safe_decode
from geodata.configs.utils import nested_get
from geodata.addresses.directions import RelativeDirection
from geodata.addresses.floors import Floor
from geodata.addresses.numbering import NumberedComponent, Digits, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
class Entrance(NumberedComponent):
max_entrances = 10
entrance_range = range(1, max_entrances + 1)
entrance_range_probs = zipfian_distribution(len(entrance_range), 2.0)
entrance_range_cdf = cdf(entrance_range_probs)
@classmethod
def random(cls, language, country=None):
num_type, num_type_props = cls.choose_alphanumeric_type('entrances.alphanumeric', language, country=country)
if num_type is None:
return None
if num_type == cls.NUMERIC:
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
number2 = number + weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
return u'{}-{}'.format(number, number2)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
if alphabet_probability is not None and random.random() >= alphabet_probability:
alphabet = latin_alphabet
letter = sample_alphabet(alphabet, 2.0)
if num_type == cls.ALPHA:
return safe_decode(letter)
else:
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
whitespace_phrase = u''
r = random.random()
if r < whitespace_probability:
whitespace_phrase = u' '
elif r < (whitespace_probability + hyphen_probability):
whitespace_phrase = u'-'
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}{}').format(number, whitespace_phrase, letter)
@classmethod
def phrase(cls, entrance, language, country=None):
if entrance is None:
return None
return cls.numeric_phrase('entrances.alphanumeric', entrance, language,
dictionaries=['entrances'], country=country)

View File

@@ -0,0 +1,165 @@
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
from geodata.numbers.spellout import numeric_expressions
class Floor(NumberedComponent):
# When we don't know the number of floors, use a Zipfian distribution
# to choose randomly between 1 and max_floors with 1 being much more
# likely than 2, etc.
max_floors = 10
max_basements = 2
numbered_floors = range(max_floors + 1) + range(-1, -max_basements - 1, -1)
floor_probs = zipfian_distribution(len(numbered_floors), 0.75)
floor_probs_cdf = cdf(floor_probs)
# For use with letters e.g. A0 is probably not as common
floors_letters = range(1, max_floors + 1) + [0]
floors_letters_probs = zipfian_distribution(len(floors_letters), 2.0)
floors_letters_cdf = cdf(floors_letters_probs)
@classmethod
def sample_floors(cls, num_floors, num_basements=0):
num_floors = int(num_floors)
return random.randint(-num_basements, (num_floors - 1) if num_floors > 0 else 0)
@classmethod
def sample_floors_range(cls, min_floor, max_floor):
return random.randint(min_floor, (max_floor - 1) if max_floor > min_floor else min_floor)
@classmethod
def random_int(cls, language, country=None, num_floors=None, num_basements=None):
number = None
if num_floors is not None:
try:
num_floors = int(num_floors)
except (ValueError, TypeError):
return weighted_choice(cls.numbered_floors, cls.floor_probs_cdf)
if num_floors <= cls.max_floors:
number = cls.sample_floors(num_floors, num_basements=num_basements or 0)
else:
number = cls.sample_floors_range(cls.max_floors + 1, num_floors)
else:
number = weighted_choice(cls.numbered_floors, cls.floor_probs_cdf)
return number
@classmethod
def random_from_int(cls, number, language, country=None):
num_type, num_type_props = cls.choose_alphanumeric_type('levels.alphanumeric', language, country=country)
if num_type is None:
return None
numbering_starts_at = int(address_config.get_property('levels.numbering_starts_at', language, country=country, default=0))
if number >= 0:
number += numbering_starts_at
if num_type == cls.NUMERIC:
return safe_decode(number)
elif num_type == cls.ROMAN_NUMERAL:
roman_numeral = numeric_expressions.roman_numeral(number)
if roman_numeral is not None:
return roman_numeral
else:
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number2 = number + sample_floors_range(1, cls.max_floors)
return u'{}-{}'.format(number, number2)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
if alphabet_probability is not None and random.random() >= alphabet_probability:
alphabet = latin_alphabet
letter = sample_alphabet(alphabet)
if num_type == cls.ALPHA:
return letter
else:
number = weighted_choice(cls.floors_letters, cls.floors_letters_cdf)
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}').format(letter, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}').format(number, letter)
return None
@classmethod
def random(cls, language, country=None, num_floors=None, num_basements=None):
number = cls.random_int(language, country=country, num_floors=num_floors, num_basements=num_basements)
return cls.random_from_int(number, language, country=country)
@classmethod
def phrase(cls, floor, language, country=None, num_floors=None):
if floor is None:
return None
integer_floor = False
floor = safe_decode(floor)
try:
floor = int(floor)
integer_floor = True
except (ValueError, TypeError):
try:
floor = float(floor)
integer_floor = int(floor) == floor
except (ValueError, TypeError):
return cls.numeric_phrase('levels.alphanumeric', floor, language,
dictionaries=['level_types_numbered'], country=country)
numbering_starts_at = int(address_config.get_property('levels.numbering_starts_at', language, country=country, default=0))
try:
num_floors = int(num_floors)
top_floor = num_floors if numbering_starts_at == 1 else num_floors - 1
is_top = num_floors and floor == top_floor
except (ValueError, TypeError):
is_top = False
alias_prefix = 'levels.aliases'
aliases = address_config.get_property(alias_prefix, language, country=country)
if aliases:
alias = None
if not integer_floor and floor >= 0 and 'half_floors' in aliases:
floor = int(floor)
alias = 'half_floors'
elif not integer_floor and floor < 0 and 'half_floors_negative' in aliases:
floor = int(floor)
alias = 'half_floors_negative'
elif floor < -1 and '<-1' in aliases:
alias = '<-1'
elif is_top and 'top' in aliases:
alias = 'top'
elif safe_decode(floor) in aliases:
alias = safe_decode(floor)
floor = safe_decode(floor)
if alias:
alias_props = aliases.get(alias)
# Aliases upon aliases, e.g. for something like "Upper Mezzanine"
# where it's an alias for "1" under the half_floors key
if safe_decode(floor) in alias_props.get('aliases', {}):
alias_prefix = '{}.{}.aliases'.format(alias_prefix, alias)
alias = safe_decode(floor)
if alias:
return cls.numeric_phrase('{}.{}'.format(alias_prefix, alias), floor, language,
dictionaries=['level_types_basement',
'level_types_mezzanine',
'level_types_numbered',
'level_types_standalone',
'level_types_sub_basement'],
country=country)
return cls.numeric_phrase('levels.alphanumeric', floor, language,
dictionaries=['level_types_numbered'], country=country)

View File

@@ -0,0 +1,26 @@
import random
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent
from geodata.encoding import safe_decode
class HouseNumber(NumberedComponent):
@classmethod
def phrase(cls, number, language, country=None):
if number is not None:
prob_key = 'house_numbers.alphanumeric_phrase_probability'
key = 'house_numbers.alphanumeric'
dictionaries = ['house_numbers', 'number']
default = safe_decode(number)
else:
prob_key = 'house_numbers.no_number_probability'
key = 'house_numbers.no_number'
dictionaries = ['no_number']
default = None
phrase_prob = address_config.get_property(prob_key, language, country=country, default=0.0)
if random.random() < phrase_prob:
return cls.numeric_phrase(key, safe_decode(number), language,
dictionaries=dictionaries, country=country)
return default

View File

@@ -0,0 +1,24 @@
from geodata.addresses.config import address_config
import random
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumericPhrase
from geodata.encoding import safe_decode
class MetroStationPhrase(NumericPhrase):
key = 'metro_stations.alphanumeric'
dictionaries = ['qualifiers']
class MetroStation(object):
@classmethod
def phrase(cls, station, language, country=None):
if station is None:
return None
phrase_prob = address_config.get_property('metro_stations.alphanumeric_phrase_probability', language, country=country, default=0.0)
if random.random() < phrase_prob:
return MetroStationPhrase.phrase(station, language, country=country)
return None

View File

@@ -0,0 +1,434 @@
# -*- coding: utf-8 -*-
import random
import six
from geodata.addresses.config import address_config
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
from geodata.math.floats import isclose
from geodata.numbers.ordinals import ordinal_expressions
from geodata.numbers.spellout import numeric_expressions
from geodata.text.tokenize import tokenize, token_types
alphabets = {}
def sample_alphabet(alphabet, b=1.5):
'''
Sample an "alphabet" using a Zipfian distribution (frequent items are very
frequent, long tail of infrequent items). If we look at something like
unit numbers, "Unit A" or "Unit B" are much more likely than "Unit X" or
"Unit Z" simply because most dwellings only have a few units. Sampling
letters from a Zipfian distribution rather than uniformly means that instead
of every letter having the same likelihood (1/26), letters toward the beginning
of the alphabet are much more likely to be selected. Letters toward the end can
still be selected sometimes, but are not very likely.
Note letters don't necessarily need to be sorted alphabetically, just in order
of frequency.
'''
global alphabets
alphabet = tuple(alphabet)
if alphabet not in alphabets:
probs = zipfian_distribution(len(alphabet), b)
probs_cdf = cdf(probs)
alphabets[alphabet] = probs_cdf
probs_cdf = alphabets[alphabet]
return weighted_choice(alphabet, probs_cdf)
latin_alphabet = [chr(i) for i in range(ord('A'), ord('Z') + 1)]
class Digits(object):
ASCII = 'ascii'
SPELLOUT = 'spellout'
UNICODE_FULL_WIDTH = 'unicode_full_width'
ROMAN_NUMERAL = 'roman_numeral'
CARDINAL = 'cardinal'
ORDINAL = 'ordinal'
unicode_full_width_map = {
'0': safe_decode(''),
'1': safe_decode(''),
'2': safe_decode(''),
'3': safe_decode(''),
'4': safe_decode(''),
'5': safe_decode(''),
'6': safe_decode(''),
'7': safe_decode(''),
'8': safe_decode(''),
'9': safe_decode(''),
}
full_width_digit_map = {
v: k for k, v in six.iteritems(unicode_full_width_map)
}
@classmethod
def rewrite_full_width(cls, s):
return six.u('').join([cls.unicode_full_width_map.get(c, c) for c in s])
@classmethod
def rewrite_standard_width(cls, s):
return six.u('').join([cls.full_width_digit_map.get(c, c) for c in s])
@classmethod
def rewrite_roman_numeral(cls, s):
roman_numeral = None
if s.isdigit():
roman_numeral = numeric_expressions.roman_numeral(s)
if roman_numeral:
return roman_numeral
else:
return s
@classmethod
def rewrite_spellout(cls, s, lang, num_type, props):
if s.isdigit():
num = int(s)
spellout = None
gender = props.get('gender')
category = props.get('category')
if num_type == cls.CARDINAL:
spellout = numeric_expressions.spellout_cardinal(num, lang, gender=gender, category=category)
elif num_type == cls.ORDINAL:
spellout = numeric_expressions.spellout_ordinal(num, lang, gender=gender, category=category)
if spellout:
return spellout.title()
return s
else:
return s
@classmethod
def rewrite(cls, d, lang, props, num_type=CARDINAL):
if not props:
return d
d = safe_decode(d)
values = []
probs = []
for digit_type in (cls.SPELLOUT, cls.UNICODE_FULL_WIDTH, cls.ROMAN_NUMERAL):
key = '{}_probability'.format(digit_type)
if key in props:
values.append(digit_type)
probs.append(props[key])
if not isclose(sum(probs), 1.0):
values.append(cls.ASCII)
probs.append(1.0 - sum(probs))
probs = cdf(probs)
digit_type = weighted_choice(values, probs)
if digit_type == cls.ASCII:
return d
elif digit_type == cls.SPELLOUT:
return cls.rewrite_spellout(d, lang, num_type, props)
elif digit_type == cls.ROMAN_NUMERAL:
roman_numeral = cls.rewrite_roman_numeral(d)
if random.random() < props.get('ordinal_suffix_probability', 0.0):
ordinal_suffix = ordinal_expressions.get_suffix(d, lang, gender=props.get('gender', None))
if ordinal_suffix:
roman_numeral = six.u('{}{}').format(roman_numeral, ordinal_suffix)
return roman_numeral
elif digit_type == cls.UNICODE_FULL_WIDTH:
return cls.rewrite_full_width(d)
else:
return d
class NumericPhrase(object):
key = None
NUMERIC = 'numeric'
NUMERIC_AFFIX = 'numeric_affix'
@classmethod
def pick_phrase_and_type(cls, number, language, country=None):
values, probs = address_config.alternative_probabilities(cls.key, language, dictionaries=cls.dictionaries, country=country)
if not values:
return None, safe_decode(number) if number is not None else None, None
phrase, phrase_props = weighted_choice(values, probs)
values = []
probs = []
for num_type in (cls.NUMERIC, cls.NUMERIC_AFFIX):
key = '{}_probability'.format(num_type)
prob = phrase_props.get(key, None)
if prob is not None:
values.append(num_type)
probs.append(prob)
if not probs:
num_type = cls.NUMERIC
else:
probs = cdf(probs)
num_type = weighted_choice(values, probs)
return num_type, phrase, phrase_props[num_type]
@classmethod
def combine_with_number(cls, number, phrase, num_type, props, whitespace_default=False):
if num_type == cls.NUMERIC_AFFIX:
phrase = props['affix']
if 'zero_pad' in props and number.isdigit():
number = number.rjust(props['zero_pad'], props.get('zero_char', '0'))
direction = props['direction']
whitespace = props.get('whitespace', whitespace_default)
whitespace_probability = props.get('whitespace_probability')
if whitespace_probability is not None:
whitespace = random.random() < whitespace_probability
if props.get('title_case', True):
# Title case unless the config specifies otherwise
phrase = phrase.title()
if number is None:
return phrase
whitespace_phrase = six.u(' ') if whitespace else six.u('')
# Phrase goes to the left of hte number
if direction == 'left':
return six.u('{}{}{}').format(phrase, whitespace_phrase, number)
# Phrase goes to the right of the number
elif direction == 'right':
return six.u('{}{}{}').format(number, whitespace_phrase, phrase)
# Need to specify a direction, otherwise return naked number
else:
return safe_decode(number)
@classmethod
def phrase(cls, number, language, country=None):
num_type, phrase, props = cls.pick_phrase_and_type(number, language, country=country)
whitespace_default = num_type == cls.NUMERIC
return cls.combine_with_number(number, phrase, num_type, props, whitespace_default=whitespace_default)
class Number(NumericPhrase):
key = 'numbers'
dictionaries = ['number']
class NumberedComponent(object):
NUMERIC = 'numeric'
ALPHA = 'alpha'
ALPHA_PLUS_NUMERIC = 'alpha_plus_numeric'
NUMERIC_PLUS_ALPHA = 'numeric_plus_alpha'
HYPHENATED_NUMBER = 'hyphenated_number'
ROMAN_NUMERAL = 'roman_numeral'
@classmethod
def choose_alphanumeric_type(cls, key, language, country=None):
alphanumeric_props = address_config.get_property(key, language, country=country, default=None)
if alphanumeric_props is None:
return None, None
values = []
probs = []
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.HYPHENATED_NUMBER, cls.ROMAN_NUMERAL):
key = '{}_probability'.format(num_type)
prob = alphanumeric_props.get(key)
if prob is not None:
values.append(num_type)
probs.append(prob)
if not values:
return None, None
probs = cdf(probs)
num_type = weighted_choice(values, probs)
num_type_props = alphanumeric_props.get(num_type, {})
return num_type, num_type_props
@classmethod
def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False, is_alpha=False):
has_alpha = False
has_numeric = True
is_integer = False
is_none = False
if num is not None:
try:
num_int = int(num)
is_integer = True
except ValueError:
try:
num_float = float(num)
except ValueError:
tokens = tokenize(safe_decode(num))
has_numeric = False
for t, c in tokens:
if c == token_types.NUMERIC:
has_numeric = True
if any((ch.isalpha() for ch in t)):
has_alpha = True
if strict_numeric and has_alpha:
return safe_decode(num)
else:
is_none = True
values, probs = None, None
if is_alpha:
values, probs = address_config.alternative_probabilities('{}.alpha'.format(key), language, dictionaries=dictionaries, country=country)
# Pick a phrase given the probability distribution from the config
if values is None:
values, probs = address_config.alternative_probabilities(key, language, dictionaries=dictionaries, country=country)
if not values:
return safe_decode(num) if not is_none else None
phrase, phrase_props = weighted_choice(values, probs)
values = []
probs = []
# Dictionaries are lowercased, so title case here
if phrase_props.get('title_case', True):
phrase = phrase.title()
'''
There are a few ways we can express the number itself
1. Alias it as some standalone word like basement (for floor "-1")
2. Use the number itself, so "Floor 2"
3. Append/prepend an affix e.g. 2/F for second floor
4. As an ordinal expression e.g. "2nd Floor"
'''
have_standalone = False
have_null = False
for num_type in ('standalone', 'null', 'numeric', 'numeric_affix', 'ordinal'):
key = '{}_probability'.format(num_type)
prob = phrase_props.get(key)
if prob is not None:
if num_type == 'standalone':
have_standalone = True
elif num_type == 'null':
have_null = True
values.append(num_type)
probs.append(prob)
elif num_type in phrase_props:
values.append(num_type)
probs.append(1.0)
break
if not probs or is_none:
return phrase
# If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
if has_alpha:
values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')])
total = float(sum(probs))
if isclose(total, 0.0):
return None
probs = [p / total for p in probs]
probs = cdf(probs)
if len(values) < 2:
if have_standalone:
num_type = 'standalone'
elif have_null:
num_type = 'null'
else:
num_type = 'numeric'
else:
num_type = weighted_choice(values, probs)
if num_type == 'standalone':
return phrase
elif num_type == 'null':
return safe_decode(num)
props = phrase_props[num_type]
if is_integer:
num_int = int(num)
if phrase_props.get('number_abs_value', False):
num_int = abs(num_int)
num = num_int
if 'number_min_abs_value' in phrase_props and num_int < phrase_props['number_min_abs_value']:
return None
if 'number_max_abs_value' in phrase_props and num_int > phrase_props['number_max_abs_value']:
return None
if phrase_props.get('number_subtract_abs_value'):
num_int -= phrase_props['number_subtract_abs_value']
num = num_int
num = safe_decode(num)
digits_props = props.get('digits')
if digits_props:
# Inherit the gender and category e.g. for ordinals
for k in ('gender', 'category'):
if k in props:
digits_props[k] = props[k]
num = Digits.rewrite(num, language, digits_props, num_type=Digits.CARDINAL if num_type != 'ordinal' else Digits.ORDINAL)
# Do we add the numeric phrase e.g. Floor No 1
add_number_phrase = props.get('add_number_phrase', False)
if add_number_phrase and random.random() < props['add_number_phrase_probability']:
num = Number.phrase(num, language, country=country)
whitespace_default = True
if num_type == 'numeric_affix':
phrase = props['affix']
if props.get('upper_case', True):
phrase = phrase.upper()
if 'zero_pad' in props and num.isdigit():
num = num.rjust(props['zero_pad'], props.get('zero_char', '0'))
whitespace_default = False
elif num_type == 'ordinal' and safe_decode(num).isdigit():
ordinal_expression = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None))
if ordinal_expression is not None:
num = ordinal_expression
if 'null_phrase_probability' in props and (num_type == 'ordinal' or (has_alpha and (has_numeric or 'null_phrase_alpha_only' in props))):
if random.random() < props['null_phrase_probability']:
return num
direction = props['direction']
whitespace = props.get('whitespace', whitespace_default)
whitespace_probability = props.get('whitespace_probability')
if whitespace_probability is not None:
whitespace = random.random() < whitespace_probability
# Occasionally switch up if direction_probability is specified
if random.random() > props.get('direction_probability', 1.0):
if direction == 'left':
direction = 'right'
elif direction == 'right':
direction = 'left'
whitespace_phrase = six.u(' ') if whitespace else six.u('')
# Phrase goes to the left of hte number
if direction == 'left':
return six.u('{}{}{}').format(phrase, whitespace_phrase, num)
# Phrase goes to the right of the number
elif direction == 'right':
return six.u('{}{}{}').format(num, whitespace_phrase, phrase)
# Need to specify a direction, otherwise return naked number
else:
return safe_decode(num)

View File

@@ -0,0 +1,76 @@
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent, Digits, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import cdf, weighted_choice
class POBox(NumberedComponent):
@classmethod
def random_digits(cls, num_digits):
# Note: PO Boxes can have leading zeros but not important for the parser
# since it only cares about how many digits there are in a number
low = 10 ** (num_digits - 1)
high = (10 ** num_digits) - 1
return random.randint(low, high)
@classmethod
def random_digits_with_prefix(cls, num_digits, prefix=six.u('')):
return six.u('').join([prefix, safe_decode(cls.random_digits(num_digits))])
@classmethod
def random_digits_with_suffix(cls, num_digits, suffix=six.u('')):
return six.u('').join([safe_decode(cls.random_digits(num_digits)), suffix])
@classmethod
def random_letter(cls, language, country=None):
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
return sample_alphabet(alphabet)
@classmethod
def random(cls, language, country=None):
num_type, num_type_props = cls.choose_alphanumeric_type('po_boxes.alphanumeric', language, country=country)
if num_type is None:
return None
if num_type != cls.ALPHA:
digit_config = address_config.get_property('po_boxes.digits', language, country=country, default=[])
values = []
probs = []
for val in digit_config:
values.append(val['length'])
probs.append(val['probability'])
probs = cdf(probs)
num_digits = weighted_choice(values, probs)
digits = cls.random_digits(num_digits)
number = Digits.rewrite(digits, language, num_type_props)
if num_type == cls.NUMERIC:
return safe_decode(number)
else:
letter = cls.random_letter(language, country=country)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}{}').format(number, whitespace_phrase, letter)
else:
return cls.random_letter(language, country=country)
@classmethod
def phrase(cls, box_number, language, country=None):
if box_number is None:
return None
return cls.numeric_phrase('po_boxes.alphanumeric', safe_decode(box_number), language,
dictionaries=['post_office'], country=country)

View File

@@ -0,0 +1,11 @@
from geodata.addresses.numbering import NumberedComponent
from geodata.encoding import safe_decode
class PostCode(NumberedComponent):
@classmethod
def phrase(cls, postcode, language, country=None):
if postcode is None:
return None
return cls.numeric_phrase('postcodes.alphanumeric', postcode, language,
dictionaries=['postcodes'], country=country)

View File

@@ -0,0 +1,66 @@
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.numbering import NumberedComponent
from geodata.encoding import safe_decode
from geodata.configs.utils import nested_get
from geodata.addresses.directions import RelativeDirection
from geodata.addresses.floors import Floor
from geodata.addresses.numbering import NumberedComponent, Digits, sample_alphabet, latin_alphabet
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
class Staircase(NumberedComponent):
max_staircases = 10
staircase_range = range(1, max_staircases + 1)
staircase_range_probs = zipfian_distribution(len(staircase_range), 2.0)
staircase_range_cdf = cdf(staircase_range_probs)
@classmethod
def random(cls, language, country=None):
num_type, num_type_props = cls.choose_alphanumeric_type('staircases.alphanumeric', language, country=country)
if num_type is None:
return None
if num_type == cls.NUMERIC:
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
number2 = number + weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
return u'{}-{}'.format(number, number2)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
if alphabet_probability is not None and random.random() >= alphabet_probability:
alphabet = latin_alphabet
letter = sample_alphabet(alphabet, 2.0)
if num_type == cls.ALPHA:
return safe_decode(letter)
else:
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
whitespace_phrase = u''
r = random.random()
if r < whitespace_probability:
whitespace_phrase = u' '
elif r < (whitespace_probability + hyphen_probability):
whitespace_phrase = u'-'
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}{}').format(number, whitespace_phrase, letter)
@classmethod
def phrase(cls, staircase, language, country=None):
if staircase is None:
return None
return cls.numeric_phrase('staircases.alphanumeric', staircase, language,
dictionaries=['staircases'], country=country)

View File

@@ -0,0 +1,285 @@
import itertools
import random
import six
from geodata.addresses.config import address_config
from geodata.addresses.directions import RelativeDirection, LateralDirection, AnteroposteriorDirection
from geodata.addresses.floors import Floor
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
from geodata.configs.utils import nested_get
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
from geodata.text.utils import is_numeric_strict
class Unit(NumberedComponent):
# When we don't know the number of units, use a Zipfian distribution
# to choose randomly between 1 and max_units with 1 being much more
# likely than 2, etc.
max_units = 99
max_basements = 2
hundreds_numbered_units_tens = [range(101, 110) + [100],
range(201, 210) + [200],
range(301, 310) + [300],
range(401, 410) + [400],
range(501, 510) + [500],
]
hundreds_numbered_units = [range(110, 200),
range(210, 300),
range(310, 400),
range(410, 500),
range(510, 600),
]
thousands_numbered_units = [range(1001, 1030) + [1000],
range(2001, 2030) + [2000],
range(3001, 3030) + [3000],
range(4001, 4030) + [4000],
range(5001, 5030) + [5000]
]
numbered_units = range(1, 10)
numbered_units.extend(itertools.chain(*itertools.izip(*hundreds_numbered_units_tens)))
numbered_units.extend(range(10, 100))
numbered_units.extend(itertools.chain(*itertools.izip(*hundreds_numbered_units)))
numbered_units.extend(itertools.chain(*itertools.izip(*thousands_numbered_units)))
numbered_units.extend(range(10001, 10100) + [10000])
numbered_units.append(0)
numbered_units.extend(range(0, -max_basements - 1, -1))
unit_probs = zipfian_distribution(len(numbered_units), 0.7)
unit_probs_cdf = cdf(unit_probs)
num_digits = [2, 3, 4]
num_digits_probs = zipfian_distribution(len(num_digits), 4.0)
num_digits_cdf = cdf(num_digits_probs)
# For use with floors e.g. #301 more common than #389
positive_units_floors = range(1, 10) + [0] + range(10, max_units + 1)
positive_units_floors_probs = zipfian_distribution(len(positive_units_floors), 0.6)
positive_units_floors_cdf = cdf(positive_units_floors_probs)
# For basic positive units
positive_units = range(1, max_units + 1)
positive_units_probs = zipfian_distribution(len(positive_units), 0.6)
positive_units_cdf = cdf(positive_units_probs)
# For use with letters e.g. A0 less common
positive_units_letters = range(1, max_units + 1) + [0]
positive_units_letters_probs = zipfian_distribution(len(positive_units_letters), 0.6)
positive_units_letters_cdf = cdf(positive_units_letters_probs)
RESIDENTIAL = 'residential'
COMMERCIAL = 'commercial'
INDUSTRIAL = 'industrial'
UNIVERSITY = 'university'
@classmethod
def sample_num_digits(cls):
return weighted_choice(cls.num_digits, cls.num_digits_cdf)
@classmethod
def for_floor(cls, floor_number, num_digits=None):
num_digits = num_digits if num_digits is not None else cls.sample_num_digits()
unit = weighted_choice(cls.positive_units_floors, cls.positive_units_floors_cdf)
return six.u('{}{}').format(floor_number, safe_decode(unit).zfill(num_digits))
@classmethod
def random(cls, language, country=None, num_floors=None, num_basements=None, floor=None):
num_type, num_type_props = cls.choose_alphanumeric_type('units.alphanumeric', language, country=country)
if num_type is None:
return None
use_floor_prob = address_config.get_property('units.alphanumeric.use_floor_probability', language, country=country, default=0.0)
use_positive_numbers_prob = address_config.get_property('units.alphanumeric.use_positive_numbers_probability', language, country=country, default=0.0)
if (num_floors is None and floor is None) or random.random() >= use_floor_prob:
if random.random() >= use_positive_numbers_prob:
number = weighted_choice(cls.numbered_units, cls.unit_probs_cdf)
else:
number = weighted_choice(cls.positive_units, cls.positive_units_cdf)
else:
if floor is None or not floor.isdigit():
floor = Floor.random_int(language, country=country, num_floors=num_floors, num_basements=num_basements)
floor_numbering_starts_at = address_config.get_property('levels.numbering_starts_at', language, country=country, default=0)
ground_floor_starts_at = address_config.get_property('units.alphanumeric.use_floor_ground_starts_at', language, country=country, default=None)
if ground_floor_starts_at is not None:
try:
floor = int(floor)
if floor >= floor_numbering_starts_at:
floor -= floor_numbering_starts_at
floor += ground_floor_starts_at
floor = safe_decode(floor)
except (TypeError, ValueError):
pass
use_floor_affix_prob = address_config.get_property('units.alphanumeric.use_floor_numeric_affix_probability', language, country=country, default=0.0)
if use_floor_affix_prob and random.random() < use_floor_affix_prob:
floor_phrase = Floor.phrase(floor, language, country=country)
# Only works if the floor phrase is strictly numeric e.g. "1" or "H1"
if is_numeric_strict(floor_phrase):
unit = weighted_choice(cls.positive_units, cls.positive_units_cdf)
unit_num_digits = address_config.get_property('units.alphanumeric.use_floor_unit_num_digits', language, country=country, default=None)
if unit_num_digits is not None:
unit = safe_decode(unit).zfill(unit_num_digits)
return six.u('{}{}').format(floor_phrase, unit)
floor_num_digits = address_config.get_property('units.alphanumeric.use_floor_floor_num_digits', language, country=country, default=None)
if floor_num_digits is not None and floor.isdigit():
floor = floor.zfill(floor_num_digits)
number = cls.for_floor(floor)
if num_type == cls.NUMERIC:
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number2 = weighted_choice(cls.positive_units, cls.positive_units_cdf)
range_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.range_probability', language, country=country, default=0.5))
direction = address_config.get_property('units.alphanumeric.hyphenated_number.direction', language, country=country, default='right')
direction_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.direction_probability', language, country=country, default=0.0))
if random.random() < direction_prob:
direction = 'left' if direction == 'right' else 'right'
direction_right = direction == 'right'
if random.random() < range_prob:
if direction_right:
number2 += number
else:
number2 = max(0, number - number2)
if direction == 'right':
return u'{}-{}'.format(number, number2)
else:
return u'{}-{}'.format(number2, number)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
if alphabet_probability is not None and random.random() >= alphabet_probability:
alphabet = latin_alphabet
letter = sample_alphabet(alphabet)
if num_type == cls.ALPHA:
return safe_decode(letter)
else:
if num_floors is None:
number = weighted_choice(cls.positive_units_letters, cls.positive_units_letters_cdf)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
whitespace_phrase = u''
r = random.random()
if r < whitespace_probability:
whitespace_phrase = u' '
elif r < (whitespace_probability + hyphen_probability):
whitespace_phrase = u'-'
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
elif num_type == cls.NUMERIC_PLUS_ALPHA:
return six.u('{}{}{}').format(number, whitespace_phrase, letter)
@classmethod
def add_direction(cls, key, unit, language, country=None):
add_direction_probability = address_config.get_property('{}.add_direction_probability'.format(key),
language, country=country, default=0.0)
if not random.random() < add_direction_probability:
return unit
add_direction_numeric = address_config.get_property('{}.add_direction_numeric'.format(key),
language, country=country)
try:
unit = int(unit)
integer_unit = True
except (ValueError, TypeError):
integer_unit = False
if add_direction_numeric and integer_unit:
return RelativeDirection.phrase(unit, language, country=country)
elif not integer_unit:
add_direction_standalone = address_config.get_property('{}.add_direction_standalone'.format(key),
language, country=country)
if add_direction_standalone:
return RelativeDirection.phrase(None, language, country=country)
@classmethod
def add_quadrant(cls, key, unit, language, country=None):
add_quadrant_probability = address_config.get_property('{}.add_quadrant_probability'.format(key),
language, country=country, default=0.0)
if not random.random() < add_quadrant_probability:
return unit
add_quadrant_numeric = address_config.get_property('{}.add_quadrant_numeric'.format(key),
language, country=country)
try:
unit = int(unit)
integer_unit = True
except (ValueError, TypeError):
integer_unit = False
first_direction = address_config.get_property('{}.add_quadrant_first_direction'.format(key),
language, country=country)
if first_direction == 'lateral':
ordering = (LateralDirection, AnteroposteriorDirection)
elif first_direction == 'anteroposterior':
ordering = (AnteroposteriorDirection, LateralDirection)
else:
return unit
if not integer_unit:
add_quadrant_standalone = address_config.get_property('{}.add_quadrant_standalone'.format(key),
language, country=country)
if add_quadrant_standalone:
unit = None
else:
return None
last_num_type = None
for i, c in enumerate(ordering):
num_type, phrase, props = c.pick_phrase_and_type(unit, language, country=country)
whitespace_default = num_type == c.NUMERIC or last_num_type == c.NUMERIC
unit = c.combine_with_number(unit, phrase, num_type, props, whitespace_default=whitespace_default)
last_num_type = num_type
return unit
@classmethod
def phrase(cls, unit, language, country=None, zone=None):
if unit is not None:
key = 'units.alphanumeric' if zone is None else 'units.zones.{}'.format(zone)
if not address_config.get_property(key, language, country=country):
return None
is_alpha = safe_decode(unit).isalpha()
direction_unit = None
add_direction = address_config.get_property('{}.add_direction'.format(key), language, country=country)
if add_direction:
direction_unit = cls.add_direction(key, unit, language, country=country)
if direction_unit and direction_unit != unit:
unit = direction_unit
is_alpha = False
else:
add_quadrant = address_config.get_property('{}.add_quadrant'.format(key), language, country=country)
if add_quadrant:
unit = cls.add_quadrant(key, unit, language, country=country)
is_alpha = False
return cls.numeric_phrase(key, safe_decode(unit), language,
dictionaries=['unit_types_numbered'], country=country, is_alpha=is_alpha)
else:
key = 'units.standalone'
values, probs = address_config.alternative_probabilities(key, language,
dictionaries=['unit_types_standalone'],
country=country)
if values is None:
return None
phrase, phrase_props = weighted_choice(values, probs)
return phrase.title()