From 974cef7ec4a0244cfc548ebec53b385b8ca4fc4b Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 28 Apr 2016 13:01:41 -0400 Subject: [PATCH] [addresses] Fixing direction_probability, adding ability to have phrases which only apply to numbers, adding the possibility of null phrases to non-numeric "numbers" e.g. A-Z, etc. --- scripts/geodata/addresses/numbering.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/geodata/addresses/numbering.py b/scripts/geodata/addresses/numbering.py index 80dd767b..448f7c65 100644 --- a/scripts/geodata/addresses/numbering.py +++ b/scripts/geodata/addresses/numbering.py @@ -5,7 +5,7 @@ from geodata.addresses.config import address_config from geodata.encoding import safe_decode from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf from geodata.numbers.ordinals import ordinal_expressions - +from geodata.text.tokenize import tokenize, token_types alphabets = {} @@ -104,7 +104,7 @@ class Number(NumericPhrase): class NumberedComponent(object): @classmethod - def numeric_phrase(cls, key, num, language, country=None, dictionaries=()): + def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False): is_alpha = False is_none = False if num is not None: @@ -114,7 +114,11 @@ class NumberedComponent(object): try: num = float(num) except ValueError: - is_alpha = True + if not all((c == token_types.NUMERIC) for t, c in tokenize(safe_decode(num))): + if strict_numeric: + return safe_decode(num) + is_alpha = True + else: is_none = True @@ -162,7 +166,7 @@ class NumberedComponent(object): # If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items if is_alpha: - values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'standalone')]) + values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')]) total = sum(probs) probs = [p / total for p in probs] @@ -218,7 +222,7 @@ class NumberedComponent(object): whitespace = props.get('whitespace', whitespace_default) # Occasionally switch up if direction_probability is specified - if random.random() < props.get('direction_probability', 0.0): + if random.random() > props.get('direction_probability', 1.0): if direction == 'left': direction = 'right' elif direction == 'right':