[addresses] adding the ability to hyphenate the generated unit/floor numbers, either for ranges or simple hyphenated numbers, including hyphenated variants of the letter + number or number + letter forms. Implementing for English but something similar can be done in the other configs.
This commit is contained in:
@@ -142,7 +142,6 @@ levels:
|
||||
# Numbered floors
|
||||
floor: &floor
|
||||
canonical: floor
|
||||
plural: floors
|
||||
abbreviated: fl
|
||||
canonical_probability: 0.5 # With this probability, use canonical version
|
||||
abbreviated_probability: 0.4 # With this probability, use abbreviated version
|
||||
@@ -150,6 +149,9 @@ levels:
|
||||
sample_exclude:
|
||||
- / f # Exclude this abbreviation since it's used as an affix
|
||||
sample: true
|
||||
plural:
|
||||
canonical: floors
|
||||
abbreviated: fls
|
||||
# e.g. Floor 1
|
||||
numeric:
|
||||
direction: left # Floor/Fl goes to the left of the number
|
||||
@@ -500,15 +502,19 @@ levels:
|
||||
probability: 0.025
|
||||
- alternative: *storey
|
||||
probability: 0.025
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
numeric_probability: 0.96 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. Floor A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2
|
||||
hyphenated_number_probability: 0.03 # e.g. 11-10
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
hyphenated_number:
|
||||
range_probability: 0.5
|
||||
direction: right
|
||||
direction_probability: 0.6
|
||||
|
||||
# Intersections
|
||||
# =============
|
||||
@@ -1180,15 +1186,22 @@ units:
|
||||
probability: 0.01
|
||||
- alternative: *apartment
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. Flat 1
|
||||
numeric_probability: 0.87 # e.g. Flat 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. Flat A
|
||||
hyphenated_number_probability: 0.03 # e.g. 11-10
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
whitespace_probability: 0.2
|
||||
hyphen_probability: 0.2
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
whitespace_probability: 0.2
|
||||
hyphen_probability: 0.2
|
||||
hyphenated_number:
|
||||
range_probability: 0.5
|
||||
direction: right
|
||||
direction_probability: 0.6
|
||||
|
||||
# Separate random probability for adding directions like 2L, 2R, etc.
|
||||
add_direction: true
|
||||
|
||||
@@ -29,6 +29,10 @@ class Entrance(NumberedComponent):
|
||||
if num_type == cls.NUMERIC:
|
||||
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
|
||||
return safe_decode(number)
|
||||
elif num_type == cls.HYPHENATED_NUMBER:
|
||||
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
|
||||
number2 = number + weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
|
||||
return u'{}-{}'.format(number, number2)
|
||||
else:
|
||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
|
||||
@@ -41,7 +45,13 @@ class Entrance(NumberedComponent):
|
||||
number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf)
|
||||
|
||||
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
|
||||
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
|
||||
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
|
||||
whitespace_phrase = u''
|
||||
r = random.random()
|
||||
if r < whitespace_probability:
|
||||
whitespace_phrase = u' '
|
||||
elif r < (whitespace_probability + hyphen_probability):
|
||||
whitespace_phrase = u'-'
|
||||
|
||||
if num_type == cls.ALPHA_PLUS_NUMERIC:
|
||||
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
|
||||
|
||||
@@ -71,6 +71,9 @@ class Floor(NumberedComponent):
|
||||
return roman_numeral
|
||||
else:
|
||||
return safe_decode(number)
|
||||
elif num_type == cls.HYPHENATED_NUMBER:
|
||||
number2 = number + sample_floors_range(1, cls.max_floors)
|
||||
return u'{}-{}'.format(number, number2)
|
||||
else:
|
||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
|
||||
|
||||
@@ -226,6 +226,7 @@ class NumberedComponent(object):
|
||||
ALPHA = 'alpha'
|
||||
ALPHA_PLUS_NUMERIC = 'alpha_plus_numeric'
|
||||
NUMERIC_PLUS_ALPHA = 'numeric_plus_alpha'
|
||||
HYPHENATED_NUMBER = 'hyphenated_number'
|
||||
ROMAN_NUMERAL = 'roman_numeral'
|
||||
|
||||
@classmethod
|
||||
@@ -237,7 +238,7 @@ class NumberedComponent(object):
|
||||
values = []
|
||||
probs = []
|
||||
|
||||
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.ROMAN_NUMERAL):
|
||||
for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.HYPHENATED_NUMBER, cls.ROMAN_NUMERAL):
|
||||
key = '{}_probability'.format(num_type)
|
||||
prob = alphanumeric_props.get(key)
|
||||
if prob is not None:
|
||||
|
||||
@@ -29,6 +29,10 @@ class Staircase(NumberedComponent):
|
||||
if num_type == cls.NUMERIC:
|
||||
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
|
||||
return safe_decode(number)
|
||||
elif num_type == cls.HYPHENATED_NUMBER:
|
||||
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
|
||||
number2 = number + weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
|
||||
return u'{}-{}'.format(number, number2)
|
||||
else:
|
||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
|
||||
@@ -41,7 +45,13 @@ class Staircase(NumberedComponent):
|
||||
number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf)
|
||||
|
||||
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
|
||||
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
|
||||
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
|
||||
whitespace_phrase = u''
|
||||
r = random.random()
|
||||
if r < whitespace_probability:
|
||||
whitespace_phrase = u' '
|
||||
elif r < (whitespace_probability + hyphen_probability):
|
||||
whitespace_phrase = u'-'
|
||||
|
||||
if num_type == cls.ALPHA_PLUS_NUMERIC:
|
||||
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
|
||||
|
||||
@@ -139,6 +139,26 @@ class Unit(NumberedComponent):
|
||||
|
||||
if num_type == cls.NUMERIC:
|
||||
return safe_decode(number)
|
||||
elif num_type == cls.HYPHENATED_NUMBER:
|
||||
number2 = weighted_choice(cls.positive_units, cls.positive_units_cdf)
|
||||
range_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.range_probability', language, country=country, default=0.5))
|
||||
direction = address_config.get_property('units.alphanumeric.hyphenated_number.direction', language, country=country, default='right')
|
||||
direction_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.direction_probability', language, country=country, default=0.0))
|
||||
|
||||
if random.random() < direction_prob:
|
||||
direction = 'left' if direction == 'right' else 'right'
|
||||
|
||||
direction_right = direction == 'right'
|
||||
|
||||
if random.random() < range_prob:
|
||||
if direction_right:
|
||||
number2 += number
|
||||
else:
|
||||
number2 = max(0, number - number2)
|
||||
if direction == 'right':
|
||||
return u'{}-{}'.format(number, number2)
|
||||
else:
|
||||
return u'{}-{}'.format(number2, number)
|
||||
else:
|
||||
alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet)
|
||||
alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None)
|
||||
@@ -151,8 +171,14 @@ class Unit(NumberedComponent):
|
||||
if num_floors is None:
|
||||
number = weighted_choice(cls.positive_units_letters, cls.positive_units_letters_cdf)
|
||||
|
||||
whitespace_probability = nested_get(num_type_props, (num_type, 'whitespace_probability'))
|
||||
whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('')
|
||||
whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0))
|
||||
hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0))
|
||||
whitespace_phrase = u''
|
||||
r = random.random()
|
||||
if r < whitespace_probability:
|
||||
whitespace_phrase = u' '
|
||||
elif r < (whitespace_probability + hyphen_probability):
|
||||
whitespace_phrase = u'-'
|
||||
|
||||
if num_type == cls.ALPHA_PLUS_NUMERIC:
|
||||
return six.u('{}{}{}').format(letter, whitespace_phrase, number)
|
||||
|
||||
Reference in New Issue
Block a user