diff --git a/resources/addresses/en.yaml b/resources/addresses/en.yaml index c7f9f559..f2b71461 100644 --- a/resources/addresses/en.yaml +++ b/resources/addresses/en.yaml @@ -142,7 +142,6 @@ levels: # Numbered floors floor: &floor canonical: floor - plural: floors abbreviated: fl canonical_probability: 0.5 # With this probability, use canonical version abbreviated_probability: 0.4 # With this probability, use abbreviated version @@ -150,6 +149,9 @@ levels: sample_exclude: - / f # Exclude this abbreviation since it's used as an affix sample: true + plural: + canonical: floors + abbreviated: fls # e.g. Floor 1 numeric: direction: left # Floor/Fl goes to the left of the number @@ -500,15 +502,19 @@ levels: probability: 0.025 - alternative: *storey probability: 0.025 - numeric_probability: 0.99 # With this probability, pick an integer + numeric_probability: 0.96 # With this probability, pick an integer alpha_probability: 0.0098 # With this probability, pick a letter e.g. Floor A numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2 + hyphenated_number_probability: 0.03 # e.g. 11-10 numeric_plus_alpha: whitespace_probability: 0.1 alpha_plus_numeric: whitespace_probability: 0.1 - + hyphenated_number: + range_probability: 0.5 + direction: right + direction_probability: 0.6 # Intersections # ============= @@ -1180,15 +1186,22 @@ units: probability: 0.01 - alternative: *apartment probability: 0.1 - numeric_probability: 0.9 # e.g. Flat 1 + numeric_probability: 0.87 # e.g. Flat 1 numeric_plus_alpha_probability: 0.03 # e.g. 1A alpha_plus_numeric_probability: 0.03 # e.g. A1 alpha_probability: 0.04 # e.g. Flat A + hyphenated_number_probability: 0.03 # e.g. 11-10 alpha_plus_numeric: - whitespace_probability: 0.1 + whitespace_probability: 0.2 + hyphen_probability: 0.2 numeric_plus_alpha: - whitespace_probability: 0.1 + whitespace_probability: 0.2 + hyphen_probability: 0.2 + hyphenated_number: + range_probability: 0.5 + direction: right + direction_probability: 0.6 # Separate random probability for adding directions like 2L, 2R, etc. add_direction: true diff --git a/scripts/geodata/addresses/entrances.py b/scripts/geodata/addresses/entrances.py index 7652afd6..098b5a6a 100644 --- a/scripts/geodata/addresses/entrances.py +++ b/scripts/geodata/addresses/entrances.py @@ -29,6 +29,10 @@ class Entrance(NumberedComponent): if num_type == cls.NUMERIC: number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf) return safe_decode(number) + elif num_type == cls.HYPHENATED_NUMBER: + number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf) + number2 = number + weighted_choice(cls.entrance_range, cls.entrance_range_cdf) + return u'{}-{}'.format(number, number2) else: alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet) alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None) @@ -41,7 +45,13 @@ class Entrance(NumberedComponent): number = weighted_choice(cls.entrance_range, cls.entrance_range_cdf) whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0)) - whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('') + hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0)) + whitespace_phrase = u'' + r = random.random() + if r < whitespace_probability: + whitespace_phrase = u' ' + elif r < (whitespace_probability + hyphen_probability): + whitespace_phrase = u'-' if num_type == cls.ALPHA_PLUS_NUMERIC: return six.u('{}{}{}').format(letter, whitespace_phrase, number) diff --git a/scripts/geodata/addresses/floors.py b/scripts/geodata/addresses/floors.py index a325fd05..ef269c74 100644 --- a/scripts/geodata/addresses/floors.py +++ b/scripts/geodata/addresses/floors.py @@ -71,6 +71,9 @@ class Floor(NumberedComponent): return roman_numeral else: return safe_decode(number) + elif num_type == cls.HYPHENATED_NUMBER: + number2 = number + sample_floors_range(1, cls.max_floors) + return u'{}-{}'.format(number, number2) else: alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet) alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None) diff --git a/scripts/geodata/addresses/numbering.py b/scripts/geodata/addresses/numbering.py index c7649803..ad96b408 100644 --- a/scripts/geodata/addresses/numbering.py +++ b/scripts/geodata/addresses/numbering.py @@ -226,6 +226,7 @@ class NumberedComponent(object): ALPHA = 'alpha' ALPHA_PLUS_NUMERIC = 'alpha_plus_numeric' NUMERIC_PLUS_ALPHA = 'numeric_plus_alpha' + HYPHENATED_NUMBER = 'hyphenated_number' ROMAN_NUMERAL = 'roman_numeral' @classmethod @@ -237,7 +238,7 @@ class NumberedComponent(object): values = [] probs = [] - for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.ROMAN_NUMERAL): + for num_type in (cls.NUMERIC, cls.ALPHA, cls.ALPHA_PLUS_NUMERIC, cls.NUMERIC_PLUS_ALPHA, cls.HYPHENATED_NUMBER, cls.ROMAN_NUMERAL): key = '{}_probability'.format(num_type) prob = alphanumeric_props.get(key) if prob is not None: diff --git a/scripts/geodata/addresses/staircases.py b/scripts/geodata/addresses/staircases.py index 0b128c30..03e02c1b 100644 --- a/scripts/geodata/addresses/staircases.py +++ b/scripts/geodata/addresses/staircases.py @@ -29,6 +29,10 @@ class Staircase(NumberedComponent): if num_type == cls.NUMERIC: number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf) return safe_decode(number) + elif num_type == cls.HYPHENATED_NUMBER: + number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf) + number2 = number + weighted_choice(cls.staircase_range, cls.staircase_range_cdf) + return u'{}-{}'.format(number, number2) else: alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet) alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None) @@ -41,7 +45,13 @@ class Staircase(NumberedComponent): number = weighted_choice(cls.staircase_range, cls.staircase_range_cdf) whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0)) - whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('') + hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0)) + whitespace_phrase = u'' + r = random.random() + if r < whitespace_probability: + whitespace_phrase = u' ' + elif r < (whitespace_probability + hyphen_probability): + whitespace_phrase = u'-' if num_type == cls.ALPHA_PLUS_NUMERIC: return six.u('{}{}{}').format(letter, whitespace_phrase, number) diff --git a/scripts/geodata/addresses/units.py b/scripts/geodata/addresses/units.py index 01de6c38..323b5d48 100644 --- a/scripts/geodata/addresses/units.py +++ b/scripts/geodata/addresses/units.py @@ -139,6 +139,26 @@ class Unit(NumberedComponent): if num_type == cls.NUMERIC: return safe_decode(number) + elif num_type == cls.HYPHENATED_NUMBER: + number2 = weighted_choice(cls.positive_units, cls.positive_units_cdf) + range_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.range_probability', language, country=country, default=0.5)) + direction = address_config.get_property('units.alphanumeric.hyphenated_number.direction', language, country=country, default='right') + direction_prob = float(address_config.get_property('units.alphanumeric.hyphenated_number.direction_probability', language, country=country, default=0.0)) + + if random.random() < direction_prob: + direction = 'left' if direction == 'right' else 'right' + + direction_right = direction == 'right' + + if random.random() < range_prob: + if direction_right: + number2 += number + else: + number2 = max(0, number - number2) + if direction == 'right': + return u'{}-{}'.format(number, number2) + else: + return u'{}-{}'.format(number2, number) else: alphabet = address_config.get_property('alphabet', language, country=country, default=latin_alphabet) alphabet_probability = address_config.get_property('alphabet_probability', language, country=country, default=None) @@ -151,8 +171,14 @@ class Unit(NumberedComponent): if num_floors is None: number = weighted_choice(cls.positive_units_letters, cls.positive_units_letters_cdf) - whitespace_probability = nested_get(num_type_props, (num_type, 'whitespace_probability')) - whitespace_phrase = six.u(' ') if whitespace_probability and random.random() < whitespace_probability else six.u('') + whitespace_probability = float(num_type_props.get('whitespace_probability', 0.0)) + hyphen_probability = float(num_type_props.get('hyphen_probability', 0.0)) + whitespace_phrase = u'' + r = random.random() + if r < whitespace_probability: + whitespace_phrase = u' ' + elif r < (whitespace_probability + hyphen_probability): + whitespace_phrase = u'-' if num_type == cls.ALPHA_PLUS_NUMERIC: return six.u('{}{}{}').format(letter, whitespace_phrase, number)