[addresses] adding the ability to hyphenate the generated unit/floor numbers, either for ranges or simple hyphenated numbers, including hyphenated variants of the letter + number or number + letter forms. Implementing for English but something similar can be done in the other configs.

This commit is contained in:
Al
2017-03-27 01:48:25 -04:00
parent 56f00250c2
commit 217de3a8a2
6 changed files with 74 additions and 11 deletions

View File

@@ -29,6 +29,10 @@ class Entrance(NumberedComponent):
if num_type == cls.NUMERIC:
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
number2 = number + weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
return u'{}-{}'.format(number, number2)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
@@ -41,7 +45,13 @@ class Entrance(NumberedComponent):
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
whitespace_phrase = u''
r = random.random()
if r < whitespace_probability:
whitespace_phrase = u' '
elif r < (whitespace_probability + hyphen_probability):
whitespace_phrase = u'-'
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)

View File

@@ -71,6 +71,9 @@ class Floor(NumberedComponent):
return roman_numeral
else:
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number2 = number + sample_floors_range(1, cls.max_floors)
return u'{}-{}'.format(number, number2)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)

View File

@@ -226,6 +226,7 @@ class NumberedComponent(object):
ALPHA = 'alpha'
ALPHA_PLUS_NUMERIC = 'alpha_plus_numeric'
NUMERIC_PLUS_ALPHA = 'numeric_plus_alpha'
HYPHENATED_NUMBER = 'hyphenated_number'
ROMAN_NUMERAL = 'roman_numeral'
@classmethod
@@ -237,7 +238,7 @@ class NumberedComponent(object):
values = []
probs = []
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.ROMAN_NUMERAL):
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.HYPHENATED_NUMBER, cls.ROMAN_NUMERAL):
key = '{}_probability'.format(num_type)
prob = alphanumeric_props.get(key)
if prob is not None:

View File

@@ -29,6 +29,10 @@ class Staircase(NumberedComponent):
if num_type == cls.NUMERIC:
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
number2 = number + weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
return u'{}-{}'.format(number, number2)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
@@ -41,7 +45,13 @@ class Staircase(NumberedComponent):
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
whitespace_phrase = u''
r = random.random()
if r < whitespace_probability:
whitespace_phrase = u' '
elif r < (whitespace_probability + hyphen_probability):
whitespace_phrase = u'-'
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)

View File

@@ -139,6 +139,26 @@ class Unit(NumberedComponent):
if num_type == cls.NUMERIC:
return safe_decode(number)
elif num_type == cls.HYPHENATED_NUMBER:
number2 = weighted_choice(cls.positive_units, cls.positive_units_cdf)
range_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.range_probability', language, country=country, default=0.5))
direction = address_config.get_property('units.alphanumeric.hyphenated_number.direction', language, country=country, default='right')
direction_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.direction_probability', language, country=country, default=0.0))
if random.random() < direction_prob:
direction = 'left' if direction == 'right' else 'right'
direction_right = direction == 'right'
if random.random() < range_prob:
if direction_right:
number2 += number
else:
number2 = max(0, number - number2)
if direction == 'right':
return u'{}-{}'.format(number, number2)
else:
return u'{}-{}'.format(number2, number)
else:
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
@@ -151,8 +171,14 @@ class Unit(NumberedComponent):
if num_floors is None:
number = weighted_choice(cls.positive_units_letters, cls.positive_units_letters_cdf)
whitespace_probability = nested_get(num_type_props, (num_type, 'whitespace_probability'))
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
whitespace_phrase = u''
r = random.random()
if r < whitespace_probability:
whitespace_phrase = u' '
elif r < (whitespace_probability + hyphen_probability):
whitespace_phrase = u'-'
if num_type == cls.ALPHA_PLUS_NUMERIC:
return six.u('{}{}{}').format(letter, whitespace_phrase, number)