[addresses] Adding Digits, which allows for replacing numbers with their unicode full-width equivalents or doing number spellout
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
import random
|
import random
|
||||||
import six
|
import six
|
||||||
|
|
||||||
@@ -40,6 +41,72 @@ def sample_alphabet(alphabet, b=1.5):
|
|||||||
latin_alphabet = [chr(i) for i in range(ord('A'), ord('Z') + 1)]
|
latin_alphabet = [chr(i) for i in range(ord('A'), ord('Z') + 1)]
|
||||||
|
|
||||||
|
|
||||||
|
class Digits(object):
|
||||||
|
ASCII = 'ascii'
|
||||||
|
SPELLOUT = 'spellout'
|
||||||
|
UNICODE_FULL_WIDTH = 'unicode_full_width'
|
||||||
|
|
||||||
|
unicode_full_width_map = {
|
||||||
|
'0': safe_decode('0'),
|
||||||
|
'1': safe_decode('1'),
|
||||||
|
'2': safe_decode('2'),
|
||||||
|
'3': safe_decode('3'),
|
||||||
|
'4': safe_decode('4'),
|
||||||
|
'5': safe_decode('5'),
|
||||||
|
'6': safe_decode('6'),
|
||||||
|
'7': safe_decode('7'),
|
||||||
|
'8': safe_decode('8'),
|
||||||
|
'9': safe_decode('9'),
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rewrite_full_width(cls, s):
|
||||||
|
return six.u('').join([cls.unicode_full_width_map.get(c, c) for c in s])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rewrite_spellout(cls, s, lang):
|
||||||
|
if s.isdigit():
|
||||||
|
num = int(s)
|
||||||
|
cardinal = numeric_expressions.spellout_cardinal(num, lang)
|
||||||
|
if cardinal:
|
||||||
|
return cardinal
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return s
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rewrite(cls, d, lang, props):
|
||||||
|
if not props:
|
||||||
|
return d
|
||||||
|
|
||||||
|
d = safe_decode(d)
|
||||||
|
|
||||||
|
values = []
|
||||||
|
probs = []
|
||||||
|
|
||||||
|
for digit_type in (cls.SPELLOUT, cls.UNICODE_FULL_WIDTH):
|
||||||
|
key = '{}_probability'.format(digit_type)
|
||||||
|
if key in props:
|
||||||
|
values.append(digit_type)
|
||||||
|
probs.append(props[key])
|
||||||
|
|
||||||
|
if not isclose(sum(probs), 1.0):
|
||||||
|
values.append(cls.ASCII)
|
||||||
|
probs.append(1.0 - sum(probs))
|
||||||
|
|
||||||
|
probs = cdf(probs)
|
||||||
|
digit_type = weighted_choice(values, probs)
|
||||||
|
|
||||||
|
if digit_type == cls.ASCII:
|
||||||
|
return d
|
||||||
|
elif digit_type == cls.UNICODE_FULL_WIDTH:
|
||||||
|
return cls.rewrite_full_width(d)
|
||||||
|
elif digit_type == cls.SPELLOUT:
|
||||||
|
return cls.rewrite_spellout(d, lang)
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
class NumericPhrase(object):
|
class NumericPhrase(object):
|
||||||
key = None
|
key = None
|
||||||
|
|
||||||
@@ -50,7 +117,7 @@ class NumericPhrase(object):
|
|||||||
def pick_phrase_and_type(cls, number, language, country=None):
|
def pick_phrase_and_type(cls, number, language, country=None):
|
||||||
values, probs = address_config.alternative_probabilities(cls.key, language, dictionaries=cls.dictionaries, country=country)
|
values, probs = address_config.alternative_probabilities(cls.key, language, dictionaries=cls.dictionaries, country=country)
|
||||||
if not values:
|
if not values:
|
||||||
return safe_decode(number) if number is not None else None
|
return None, safe_decode(number) if number is not None else None, None
|
||||||
|
|
||||||
phrase, phrase_props = weighted_choice(values, probs)
|
phrase, phrase_props = weighted_choice(values, probs)
|
||||||
|
|
||||||
@@ -135,6 +202,9 @@ class NumberedComponent(object):
|
|||||||
values.append(num_type)
|
values.append(num_type)
|
||||||
probs.append(prob)
|
probs.append(prob)
|
||||||
|
|
||||||
|
if not values:
|
||||||
|
return None, None
|
||||||
|
|
||||||
probs = cdf(probs)
|
probs = cdf(probs)
|
||||||
num_type = weighted_choice(values, probs)
|
num_type = weighted_choice(values, probs)
|
||||||
num_type_props = alphanumeric_props.get(num_type, {})
|
num_type_props = alphanumeric_props.get(num_type, {})
|
||||||
|
|||||||
Reference in New Issue
Block a user