[addresses] Fixing direction_probability, adding ability to have phrases which only apply to numbers, adding the possibility of null phrases to non-numeric "numbers" e.g. A-Z, etc.

This commit is contained in:
Al
2016-04-28 13:01:41 -04:00
parent ac00f294c0
commit 3d765e9eca

View File

@@ -5,7 +5,7 @@ from geodata.addresses.config import address_config
from geodata.encoding import safe_decode
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
from geodata.numbers.ordinals import ordinal_expressions
from geodata.text.tokenize import tokenize, token_types
alphabets = {}
@@ -104,7 +104,7 @@ class Number(NumericPhrase):
class NumberedComponent(object):
@classmethod
def numeric_phrase(cls, key, num, language, country=None, dictionaries=()):
def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False):
is_alpha = False
is_none = False
if num is not None:
@@ -114,7 +114,11 @@ class NumberedComponent(object):
try:
num = float(num)
except ValueError:
is_alpha = True
if not all((c == token_types.NUMERIC) for t, c in tokenize(safe_decode(num))):
if strict_numeric:
return safe_decode(num)
is_alpha = True
else:
is_none = True
@@ -162,7 +166,7 @@ class NumberedComponent(object):
# If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
if is_alpha:
values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'standalone')])
values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')])
total = sum(probs)
probs = [p / total for p in probs]
@@ -218,7 +222,7 @@ class NumberedComponent(object):
whitespace = props.get('whitespace', whitespace_default)
# Occasionally switch up if direction_probability is specified
if random.random() < props.get('direction_probability', 0.0):
if random.random() > props.get('direction_probability', 1.0):
if direction == 'left':
direction = 'right'
elif direction == 'right':