[addresses] Adding Digits, which allows for replacing numbers with their unicode full-width equivalents or doing number spellout
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import random
|
||||
import six
|
||||
|
||||
@@ -40,6 +41,72 @@ def sample_alphabet(alphabet, b=1.5):
|
||||
latin_alphabet = [chr(i) for i in range(ord('A'), ord('Z') + 1)]
|
||||
|
||||
|
||||
class Digits(object):
|
||||
ASCII = 'ascii'
|
||||
SPELLOUT = 'spellout'
|
||||
UNICODE_FULL_WIDTH = 'unicode_full_width'
|
||||
|
||||
unicode_full_width_map = {
|
||||
'0': safe_decode('0'),
|
||||
'1': safe_decode('1'),
|
||||
'2': safe_decode('2'),
|
||||
'3': safe_decode('3'),
|
||||
'4': safe_decode('4'),
|
||||
'5': safe_decode('5'),
|
||||
'6': safe_decode('6'),
|
||||
'7': safe_decode('7'),
|
||||
'8': safe_decode('8'),
|
||||
'9': safe_decode('9'),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def rewrite_full_width(cls, s):
|
||||
return six.u('').join([cls.unicode_full_width_map.get(c, c) for c in s])
|
||||
|
||||
@classmethod
|
||||
def rewrite_spellout(cls, s, lang):
|
||||
if s.isdigit():
|
||||
num = int(s)
|
||||
cardinal = numeric_expressions.spellout_cardinal(num, lang)
|
||||
if cardinal:
|
||||
return cardinal
|
||||
return s
|
||||
else:
|
||||
return s
|
||||
|
||||
@classmethod
|
||||
def rewrite(cls, d, lang, props):
|
||||
if not props:
|
||||
return d
|
||||
|
||||
d = safe_decode(d)
|
||||
|
||||
values = []
|
||||
probs = []
|
||||
|
||||
for digit_type in (cls.SPELLOUT, cls.UNICODE_FULL_WIDTH):
|
||||
key = '{}_probability'.format(digit_type)
|
||||
if key in props:
|
||||
values.append(digit_type)
|
||||
probs.append(props[key])
|
||||
|
||||
if not isclose(sum(probs), 1.0):
|
||||
values.append(cls.ASCII)
|
||||
probs.append(1.0 - sum(probs))
|
||||
|
||||
probs = cdf(probs)
|
||||
digit_type = weighted_choice(values, probs)
|
||||
|
||||
if digit_type == cls.ASCII:
|
||||
return d
|
||||
elif digit_type == cls.UNICODE_FULL_WIDTH:
|
||||
return cls.rewrite_full_width(d)
|
||||
elif digit_type == cls.SPELLOUT:
|
||||
return cls.rewrite_spellout(d, lang)
|
||||
else:
|
||||
return d
|
||||
|
||||
|
||||
class NumericPhrase(object):
|
||||
key = None
|
||||
|
||||
@@ -50,7 +117,7 @@ class NumericPhrase(object):
|
||||
def pick_phrase_and_type(cls, number, language, country=None):
|
||||
values, probs = address_config.alternative_probabilities(cls.key, language, dictionaries=cls.dictionaries, country=country)
|
||||
if not values:
|
||||
return safe_decode(number) if number is not None else None
|
||||
return None, safe_decode(number) if number is not None else None, None
|
||||
|
||||
phrase, phrase_props = weighted_choice(values, probs)
|
||||
|
||||
@@ -135,6 +202,9 @@ class NumberedComponent(object):
|
||||
values.append(num_type)
|
||||
probs.append(prob)
|
||||
|
||||
if not values:
|
||||
return None, None
|
||||
|
||||
probs = cdf(probs)
|
||||
num_type = weighted_choice(values, probs)
|
||||
num_type_props = alphanumeric_props.get(num_type, {})
|
||||
|
||||
Reference in New Issue
Block a user