[addresses] Handling digit rewrites (spellout, Roman numerals, etc.) in the base class
This commit is contained in:
@@ -28,8 +28,7 @@ class Entrance(NumberedComponent):
|
|||||||
|
|
||||||
if num_type == cls.NUMERIC:
|
if num_type == cls.NUMERIC:
|
||||||
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
|
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
|
||||||
number = safe_decode(number)
|
return safe_decode(number)
|
||||||
return Digits.rewrite(number, language, num_type_props)
|
|
||||||
else:
|
else:
|
||||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||||
letter = sample_alphabet(alphabet, 2.0)
|
letter = sample_alphabet(alphabet, 2.0)
|
||||||
|
|||||||
@@ -45,6 +45,10 @@ class Digits(object):
|
|||||||
ASCII = 'ascii'
|
ASCII = 'ascii'
|
||||||
SPELLOUT = 'spellout'
|
SPELLOUT = 'spellout'
|
||||||
UNICODE_FULL_WIDTH = 'unicode_full_width'
|
UNICODE_FULL_WIDTH = 'unicode_full_width'
|
||||||
|
ROMAN_NUMERAL = 'roman_numeral'
|
||||||
|
|
||||||
|
CARDINAL = 'cardinal'
|
||||||
|
ORDINAL = 'ordinal'
|
||||||
|
|
||||||
unicode_full_width_map = {
|
unicode_full_width_map = {
|
||||||
'0': safe_decode('0'),
|
'0': safe_decode('0'),
|
||||||
@@ -64,18 +68,37 @@ class Digits(object):
|
|||||||
return six.u('').join([cls.unicode_full_width_map.get(c, c) for c in s])
|
return six.u('').join([cls.unicode_full_width_map.get(c, c) for c in s])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def rewrite_spellout(cls, s, lang):
|
def rewrite_roman_numeral(cls, s):
|
||||||
|
roman_numeral = None
|
||||||
|
if s.isdigit():
|
||||||
|
roman_numeral = numeric_expressions.roman_numeral(s)
|
||||||
|
|
||||||
|
if roman_numeral:
|
||||||
|
return roman_numeral
|
||||||
|
else:
|
||||||
|
return s
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rewrite_spellout(cls, s, lang, num_type, props):
|
||||||
if s.isdigit():
|
if s.isdigit():
|
||||||
num = int(s)
|
num = int(s)
|
||||||
cardinal = numeric_expressions.spellout_cardinal(num, lang)
|
spellout = None
|
||||||
if cardinal:
|
gender = props.get('gender')
|
||||||
return cardinal
|
category = props.get('category')
|
||||||
|
|
||||||
|
if num_type == cls.CARDINAL:
|
||||||
|
spellout = numeric_expressions.spellout_cardinal(num, lang, gender=gender, category=category)
|
||||||
|
elif num_type == cls.ORDINAL:
|
||||||
|
spellout = numeric_expressions.spellout_ordinal(num, lang, gender=gender, category=category)
|
||||||
|
|
||||||
|
if spellout:
|
||||||
|
return spellout.title()
|
||||||
return s
|
return s
|
||||||
else:
|
else:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def rewrite(cls, d, lang, props):
|
def rewrite(cls, d, lang, props, num_type=CARDINAL):
|
||||||
if not props:
|
if not props:
|
||||||
return d
|
return d
|
||||||
|
|
||||||
@@ -84,7 +107,7 @@ class Digits(object):
|
|||||||
values = []
|
values = []
|
||||||
probs = []
|
probs = []
|
||||||
|
|
||||||
for digit_type in (cls.SPELLOUT, cls.UNICODE_FULL_WIDTH):
|
for digit_type in (cls.SPELLOUT, cls.UNICODE_FULL_WIDTH, cls.ROMAN_NUMERAL):
|
||||||
key = '{}_probability'.format(digit_type)
|
key = '{}_probability'.format(digit_type)
|
||||||
if key in props:
|
if key in props:
|
||||||
values.append(digit_type)
|
values.append(digit_type)
|
||||||
@@ -99,10 +122,12 @@ class Digits(object):
|
|||||||
|
|
||||||
if digit_type == cls.ASCII:
|
if digit_type == cls.ASCII:
|
||||||
return d
|
return d
|
||||||
|
elif digit_type == cls.SPELLOUT:
|
||||||
|
return cls.rewrite_spellout(d, lang, num_type, props)
|
||||||
|
elif digit_type == cls.ROMAN_NUMERAL:
|
||||||
|
return cls.rewrite_roman_numeral(d)
|
||||||
elif digit_type == cls.UNICODE_FULL_WIDTH:
|
elif digit_type == cls.UNICODE_FULL_WIDTH:
|
||||||
return cls.rewrite_full_width(d)
|
return cls.rewrite_full_width(d)
|
||||||
elif digit_type == cls.SPELLOUT:
|
|
||||||
return cls.rewrite_spellout(d, lang)
|
|
||||||
else:
|
else:
|
||||||
return d
|
return d
|
||||||
|
|
||||||
@@ -142,6 +167,7 @@ class NumericPhrase(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def combine_with_number(cls, number, phrase, num_type, props, whitespace_default=False):
|
def combine_with_number(cls, number, phrase, num_type, props, whitespace_default=False):
|
||||||
|
|
||||||
if num_type == cls.NUMERIC_AFFIX:
|
if num_type == cls.NUMERIC_AFFIX:
|
||||||
phrase = props['affix']
|
phrase = props['affix']
|
||||||
if 'zero_pad' in props and number.isdigit():
|
if 'zero_pad' in props and number.isdigit():
|
||||||
@@ -330,6 +356,13 @@ class NumberedComponent(object):
|
|||||||
num -= phrase_props['number_subtract_abs_value']
|
num -= phrase_props['number_subtract_abs_value']
|
||||||
|
|
||||||
num = safe_decode(num)
|
num = safe_decode(num)
|
||||||
|
digits_props = props.get('digits')
|
||||||
|
if digits_props:
|
||||||
|
# Inherit the gender and category e.g. for ordinals
|
||||||
|
for k in ('gender', 'category'):
|
||||||
|
if k in props:
|
||||||
|
digits_props[k] = props[k]
|
||||||
|
num = Digits.rewrite(num, language, digits_props, num_type=Digits.CARDINAL if num_type != 'ordinal' else Digits.ORDINAL)
|
||||||
|
|
||||||
# Do we add the numeric phrase e.g. Floor No 1
|
# Do we add the numeric phrase e.g. Floor No 1
|
||||||
add_number_phrase = props.get('add_number_phrase', False)
|
add_number_phrase = props.get('add_number_phrase', False)
|
||||||
@@ -338,31 +371,7 @@ class NumberedComponent(object):
|
|||||||
|
|
||||||
whitespace_default = True
|
whitespace_default = True
|
||||||
|
|
||||||
if num_type == 'numeric' and safe_decode(num).isdigit():
|
if num_type == 'numeric_affix':
|
||||||
values = []
|
|
||||||
probs = []
|
|
||||||
for cardinal_type in ('roman_numeral', 'spellout'):
|
|
||||||
key = '{}_probability'.format(cardinal_type)
|
|
||||||
if key in props:
|
|
||||||
values.append(cardinal_type)
|
|
||||||
probs.append(props[key])
|
|
||||||
|
|
||||||
values.append(None)
|
|
||||||
probs.append(1.0 - sum(probs))
|
|
||||||
|
|
||||||
probs = cdf(probs)
|
|
||||||
|
|
||||||
cardinal_type = weighted_choice(values, probs)
|
|
||||||
cardinal_expression = None
|
|
||||||
if cardinal_type == 'roman_numeral':
|
|
||||||
cardinal_expression = numeric_expressions.roman_numeral(num)
|
|
||||||
elif cardinal_type == 'spellout':
|
|
||||||
cardinal_expression = numeric_expressions.spellout_cardinal(num, language, gender=props.get('gender', None))
|
|
||||||
|
|
||||||
if cardinal_expression is not None:
|
|
||||||
num = cardinal_expression
|
|
||||||
|
|
||||||
elif num_type == 'numeric_affix':
|
|
||||||
phrase = props['affix']
|
phrase = props['affix']
|
||||||
if props.get('upper_case', True):
|
if props.get('upper_case', True):
|
||||||
phrase = phrase.upper()
|
phrase = phrase.upper()
|
||||||
@@ -370,30 +379,7 @@ class NumberedComponent(object):
|
|||||||
num = num.rjust(props['zero_pad'], props.get('zero_char', '0'))
|
num = num.rjust(props['zero_pad'], props.get('zero_char', '0'))
|
||||||
whitespace_default = False
|
whitespace_default = False
|
||||||
elif num_type == 'ordinal' and safe_decode(num).isdigit():
|
elif num_type == 'ordinal' and safe_decode(num).isdigit():
|
||||||
values = []
|
ordinal_expression = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None))
|
||||||
probs = []
|
|
||||||
|
|
||||||
for ordinal_type in ('roman_numeral', 'spellout'):
|
|
||||||
key = '{}_probability'.format(ordinal_type)
|
|
||||||
if key in props:
|
|
||||||
values.append(ordinal_type)
|
|
||||||
probs.append(props[key])
|
|
||||||
|
|
||||||
values.append('digit_suffix')
|
|
||||||
probs.append(1.0 - sum(probs))
|
|
||||||
|
|
||||||
probs = cdf(probs)
|
|
||||||
|
|
||||||
ordinal_type = weighted_choice(values, probs)
|
|
||||||
|
|
||||||
ordinal_expression = None
|
|
||||||
if ordinal_type == 'digit_suffix':
|
|
||||||
ordinal_expression = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None))
|
|
||||||
|
|
||||||
elif ordinal_type == 'roman_numeral':
|
|
||||||
ordinal_expression = numeric_expressions.roman_numeral(num)
|
|
||||||
elif ordinal_type == 'spellout':
|
|
||||||
ordinal_expression = numeric_expressions.spellout_ordinal(num, language, gender=props.get('gender', None))
|
|
||||||
|
|
||||||
if ordinal_expression is not None:
|
if ordinal_expression is not None:
|
||||||
num = ordinal_expression
|
num = ordinal_expression
|
||||||
|
|||||||
@@ -28,8 +28,7 @@ class Staircase(NumberedComponent):
|
|||||||
|
|
||||||
if num_type == cls.NUMERIC:
|
if num_type == cls.NUMERIC:
|
||||||
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
|
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
|
||||||
number = safe_decode(number)
|
return safe_decode(number)
|
||||||
return Digits.rewrite(number, language, num_type_props)
|
|
||||||
else:
|
else:
|
||||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||||
letter = sample_alphabet(alphabet, 2.0)
|
letter = sample_alphabet(alphabet, 2.0)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import six
|
|||||||
from geodata.addresses.config import address_config
|
from geodata.addresses.config import address_config
|
||||||
from geodata.addresses.directions import RelativeDirection, LateralDirection, AnteroposteriorDirection
|
from geodata.addresses.directions import RelativeDirection, LateralDirection, AnteroposteriorDirection
|
||||||
from geodata.addresses.floors import Floor
|
from geodata.addresses.floors import Floor
|
||||||
from geodata.addresses.numbering import NumberedComponent, Digits, sample_alphabet, latin_alphabet
|
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
|
||||||
from geodata.configs.utils import nested_get
|
from geodata.configs.utils import nested_get
|
||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
|
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
|
||||||
@@ -71,7 +71,7 @@ class Unit(NumberedComponent):
|
|||||||
else:
|
else:
|
||||||
number = weighted_choice(cls.positive_units, cls.positive_units_cdf)
|
number = weighted_choice(cls.positive_units, cls.positive_units_cdf)
|
||||||
else:
|
else:
|
||||||
if floor is None:
|
if floor is None or not floor.isdigit():
|
||||||
floor = Floor.random_int(language, country=country, num_floors=num_floors, num_basements=num_basements)
|
floor = Floor.random_int(language, country=country, num_floors=num_floors, num_basements=num_basements)
|
||||||
|
|
||||||
floor_numbering_starts_at = address_config.get_property('levels.numbering_starts_at', language, country=country, default=0)
|
floor_numbering_starts_at = address_config.get_property('levels.numbering_starts_at', language, country=country, default=0)
|
||||||
@@ -107,8 +107,7 @@ class Unit(NumberedComponent):
|
|||||||
number = cls.for_floor(floor)
|
number = cls.for_floor(floor)
|
||||||
|
|
||||||
if num_type == cls.NUMERIC:
|
if num_type == cls.NUMERIC:
|
||||||
number = safe_decode(number)
|
return safe_decode(number)
|
||||||
return Digits.rewrite(number, language, num_type_props)
|
|
||||||
else:
|
else:
|
||||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||||
letter = sample_alphabet(alphabet)
|
letter = sample_alphabet(alphabet)
|
||||||
|
|||||||
Reference in New Issue
Block a user