[addresses] Fixing direction_probability, adding ability to have phrases which only apply to numbers, adding the possibility of null phrases to non-numeric "numbers" e.g. A-Z, etc.
This commit is contained in:
@@ -5,7 +5,7 @@ from geodata.addresses.config import address_config
|
|||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
|
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
|
||||||
from geodata.numbers.ordinals import ordinal_expressions
|
from geodata.numbers.ordinals import ordinal_expressions
|
||||||
|
from geodata.text.tokenize import tokenize, token_types
|
||||||
|
|
||||||
alphabets = {}
|
alphabets = {}
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ class Number(NumericPhrase):
|
|||||||
|
|
||||||
class NumberedComponent(object):
|
class NumberedComponent(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def numeric_phrase(cls, key, num, language, country=None, dictionaries=()):
|
def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False):
|
||||||
is_alpha = False
|
is_alpha = False
|
||||||
is_none = False
|
is_none = False
|
||||||
if num is not None:
|
if num is not None:
|
||||||
@@ -114,7 +114,11 @@ class NumberedComponent(object):
|
|||||||
try:
|
try:
|
||||||
num = float(num)
|
num = float(num)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
is_alpha = True
|
if not all((c == token_types.NUMERIC) for t, c in tokenize(safe_decode(num))):
|
||||||
|
if strict_numeric:
|
||||||
|
return safe_decode(num)
|
||||||
|
is_alpha = True
|
||||||
|
|
||||||
else:
|
else:
|
||||||
is_none = True
|
is_none = True
|
||||||
|
|
||||||
@@ -162,7 +166,7 @@ class NumberedComponent(object):
|
|||||||
|
|
||||||
# If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
|
# If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
|
||||||
if is_alpha:
|
if is_alpha:
|
||||||
values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'standalone')])
|
values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')])
|
||||||
total = sum(probs)
|
total = sum(probs)
|
||||||
probs = [p / total for p in probs]
|
probs = [p / total for p in probs]
|
||||||
|
|
||||||
@@ -218,7 +222,7 @@ class NumberedComponent(object):
|
|||||||
whitespace = props.get('whitespace', whitespace_default)
|
whitespace = props.get('whitespace', whitespace_default)
|
||||||
|
|
||||||
# Occasionally switch up if direction_probability is specified
|
# Occasionally switch up if direction_probability is specified
|
||||||
if random.random() < props.get('direction_probability', 0.0):
|
if random.random() > props.get('direction_probability', 1.0):
|
||||||
if direction == 'left':
|
if direction == 'left':
|
||||||
direction = 'right'
|
direction = 'right'
|
||||||
elif direction == 'right':
|
elif direction == 'right':
|
||||||
|
|||||||
Reference in New Issue
Block a user