[addresses] Fixing direction_probability, adding ability to have phrases which only apply to numbers, adding the possibility of null phrases to non-numeric "numbers" e.g. A-Z, etc.

This commit is contained in:
Al
2016-04-28 13:01:41 -04:00
parent 4a56c12881
commit 974cef7ec4

View File

@@ -5,7 +5,7 @@ from geodata.addresses.config import address_config
from geodata.encoding import safe_decode from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
from geodata.numbers.ordinals import ordinal_expressions from geodata.numbers.ordinals import ordinal_expressions
from geodata.text.tokenize import tokenize, token_types
alphabets = {} alphabets = {}
@@ -104,7 +104,7 @@ class Number(NumericPhrase):
class NumberedComponent(object): class NumberedComponent(object):
@classmethod @classmethod
def numeric_phrase(cls, key, num, language, country=None, dictionaries=()): def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False):
is_alpha = False is_alpha = False
is_none = False is_none = False
if num is not None: if num is not None:
@@ -114,7 +114,11 @@ class NumberedComponent(object):
try: try:
num = float(num) num = float(num)
except ValueError: except ValueError:
is_alpha = True if not all((c == token_types.NUMERIC) for t, c in tokenize(safe_decode(num))):
if strict_numeric:
return safe_decode(num)
is_alpha = True
else: else:
is_none = True is_none = True
@@ -162,7 +166,7 @@ class NumberedComponent(object):
# If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items # If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
if is_alpha: if is_alpha:
values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'standalone')]) values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')])
total = sum(probs) total = sum(probs)
probs = [p / total for p in probs] probs = [p / total for p in probs]
@@ -218,7 +222,7 @@ class NumberedComponent(object):
whitespace = props.get('whitespace', whitespace_default) whitespace = props.get('whitespace', whitespace_default)
# Occasionally switch up if direction_probability is specified # Occasionally switch up if direction_probability is specified
if random.random() < props.get('direction_probability', 0.0): if random.random() > props.get('direction_probability', 1.0):
if direction == 'left': if direction == 'left':
direction = 'right' direction = 'right'
elif direction == 'right': elif direction == 'right':