From 793671d0b9a4ec7a3468877a5b5d47337b07e6a7 Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 3 Jul 2016 23:41:49 -0400 Subject: [PATCH] [addresses] Sample from higher floors in buildings higher than 10 stories since those are relatively rare and we get enough lower numbered floors from random sampling --- scripts/geodata/addresses/floors.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/scripts/geodata/addresses/floors.py b/scripts/geodata/addresses/floors.py index 04e4f5a1..49031415 100644 --- a/scripts/geodata/addresses/floors.py +++ b/scripts/geodata/addresses/floors.py @@ -3,7 +3,7 @@ import six from geodata.addresses.config import address_config -from geodata.addresses.numbering import NumberedComponent, Digits, sample_alphabet, latin_alphabet +from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet from geodata.encoding import safe_decode from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf from geodata.numbers.spellout import numeric_expressions @@ -30,16 +30,23 @@ class Floor(NumberedComponent): return random.randint(-num_basements, (num_floors - 1) if num_floors > 0 else 0) @classmethod - def sample_positive_floors(cls, num_floors, zeroth_floor_prob=0.001): - num_floors = int(num_floors) - if random.random() < zeroth_floor_prob: - return 0 - return random.randint(1, (num_floors - 1) if num_floors > 1 else 1) + def sample_floors_range(cls, min_floor, max_floor): + return random.randint(min_floor, (max_floor - 1) if max_floor > min_floor else min_floor) @classmethod def random_int(cls, language, country=None, num_floors=None, num_basements=None): + number = None if num_floors is not None: - number = cls.sample_floors(num_floors, num_basements or 0) + try: + num_floors = int(num_floors) + except (ValueError, TypeError): + return weighted_choice(cls.numbered_floors, cls.floor_probs_cdf) + + if num_floors <= cls.max_floors: + number = cls.sample_floors(num_floors, num_basements=num_basements or 0) + else: + number = cls.sample_floors_range(cls.max_floors + 1, num_floors) + else: number = weighted_choice(cls.numbered_floors, cls.floor_probs_cdf) @@ -57,8 +64,7 @@ class Floor(NumberedComponent): number += numbering_starts_at if num_type == cls.NUMERIC: - number = safe_decode(number) - return Digits.rewrite(number, language, num_type_props) + return safe_decode(number) elif num_type == cls.ROMAN_NUMERAL: roman_numeral = numeric_expressions.roman_numeral(number) if roman_numeral is not None: