Initial fork commit
This commit is contained in:
0
scripts/geodata/numbers/__init__.py
Normal file
0
scripts/geodata/numbers/__init__.py
Normal file
219
scripts/geodata/numbers/numex.py
Normal file
219
scripts/geodata/numbers/numex.py
Normal file
@@ -0,0 +1,219 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(this_dir, os.pardir, os.pardir)))
|
||||
|
||||
from geodata.encoding import safe_encode
|
||||
from geodata.i18n.unicode_paths import DATA_DIR
|
||||
|
||||
|
||||
class InvalidNumexRuleException(Exception):
|
||||
pass
|
||||
|
||||
NUMEX_DATA_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
||||
'resources', 'numex')
|
||||
|
||||
NUMEX_RULES_FILE = os.path.join(this_dir, os.pardir, os.pardir, os.pardir, 'src', 'numex_data.c')
|
||||
|
||||
GENDER_MASCULINE = 'GENDER_MASCULINE'
|
||||
GENDER_FEMININE = 'GENDER_FEMININE'
|
||||
GENDER_NEUTER = 'GENDER_NEUTER'
|
||||
GENDER_NONE = 'GENDER_NONE'
|
||||
|
||||
gender_map = {
|
||||
'm': GENDER_MASCULINE,
|
||||
'f': GENDER_FEMININE,
|
||||
'n': GENDER_NEUTER,
|
||||
None: GENDER_NONE,
|
||||
}
|
||||
|
||||
|
||||
CATEGORY_PLURAL = 'CATEGORY_PLURAL'
|
||||
CATEGORY_DEFAULT = 'CATEGORY_DEFAULT'
|
||||
|
||||
valid_numex_keys = set(['name', 'value', 'type', 'left', 'right', 'gender', 'category', 'radix',
|
||||
'multiply_gte', 'exact_multiple_only', 'left_separator', 'right_separator'])
|
||||
|
||||
valid_ordinal_keys = set(['suffixes', 'gender', 'category'])
|
||||
|
||||
|
||||
category_map = {
|
||||
'plural': CATEGORY_PLURAL,
|
||||
None: CATEGORY_DEFAULT
|
||||
}
|
||||
|
||||
LEFT_CONTEXT_MULTIPLY = 'NUMEX_LEFT_CONTEXT_MULTIPLY'
|
||||
LEFT_CONTEXT_ADD = 'NUMEX_LEFT_CONTEXT_ADD'
|
||||
LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER = 'NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER'
|
||||
LEFT_CONTEXT_NONE = 'NUMEX_LEFT_CONTEXT_NONE'
|
||||
|
||||
left_context_map = {
|
||||
'add': LEFT_CONTEXT_ADD,
|
||||
'multiply': LEFT_CONTEXT_MULTIPLY,
|
||||
'concat_only_if_number': LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER,
|
||||
None: LEFT_CONTEXT_NONE,
|
||||
}
|
||||
|
||||
RIGHT_CONTEXT_MULTIPLY = 'NUMEX_RIGHT_CONTEXT_MULTIPLY'
|
||||
RIGHT_CONTEXT_ADD = 'NUMEX_RIGHT_CONTEXT_ADD'
|
||||
RIGHT_CONTEXT_NONE = 'NUMEX_RIGHT_CONTEXT_NONE'
|
||||
|
||||
right_context_map = {
|
||||
'add': RIGHT_CONTEXT_ADD,
|
||||
'multiply': RIGHT_CONTEXT_MULTIPLY,
|
||||
None: RIGHT_CONTEXT_NONE,
|
||||
}
|
||||
|
||||
CARDINAL = 'NUMEX_CARDINAL_RULE'
|
||||
ORDINAL = 'NUMEX_ORDINAL_RULE'
|
||||
ORDINAL_INDICATOR = 'NUMEX_ORDINAL_INDICATOR_RULE'
|
||||
|
||||
rule_type_map = {
|
||||
'cardinal': CARDINAL,
|
||||
'ordinal': ORDINAL,
|
||||
'ordinal_indicator': ORDINAL_INDICATOR,
|
||||
}
|
||||
|
||||
numex_key_template = u'"{key}"'
|
||||
numex_rule_template = u'{{{left_context_type}, {right_context_type}, {rule_type}, {gender}, {category}, {radix}, {value}LL}}'
|
||||
|
||||
stopword_rule = u'NUMEX_STOPWORD_RULE'
|
||||
|
||||
ordinal_indicator_template = u'{{"{key}", {gender}, {category}, "{value}"}}'
|
||||
|
||||
stopwords_template = u'"{word}"'
|
||||
|
||||
language_template = u'{{"{language}", {whole_words_only}, {rule_index}, {num_rules}, {ordinal_indicator_index}, {num_ordinal_indicators}}}'
|
||||
|
||||
numex_rules_data_template = u'''
|
||||
char *numex_keys[] = {{
|
||||
{numex_keys}
|
||||
}};
|
||||
|
||||
numex_rule_t numex_rules[] = {{
|
||||
{numex_rules}
|
||||
}};
|
||||
|
||||
ordinal_indicator_t ordinal_indicator_rules[] = {{
|
||||
{ordinal_indicator_rules}
|
||||
}};
|
||||
|
||||
numex_language_source_t numex_languages[] = {{
|
||||
{languages}
|
||||
}};
|
||||
'''
|
||||
|
||||
|
||||
def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
||||
all_keys = []
|
||||
all_rules = []
|
||||
|
||||
all_ordinal_indicators = []
|
||||
all_stopwords = []
|
||||
|
||||
all_languages = []
|
||||
|
||||
out = open(outfile, 'w')
|
||||
|
||||
for filename in os.listdir(dirname):
|
||||
path = os.path.join(dirname, filename)
|
||||
if not os.path.isfile(path) or not filename.endswith('.yaml'):
|
||||
continue
|
||||
|
||||
language = filename.split('.yaml', 1)[0]
|
||||
|
||||
data = yaml.load(open(path))
|
||||
|
||||
whole_words_only = data.get('whole_words_only', False)
|
||||
|
||||
rules = data.get('rules', [])
|
||||
rule_index = len(all_rules)
|
||||
|
||||
for rule in rules:
|
||||
invalid_keys = set(rule.keys()) - valid_numex_keys
|
||||
if invalid_keys:
|
||||
raise InvalidNumexRuleException(u'Invalid keys: ({}) for language {}, rule: {}'.format(u','.join(invalid_keys), language, rule))
|
||||
gender = gender_map[rule.get('gender')]
|
||||
rule_type = rule_type_map[rule['type']]
|
||||
key = rule['name']
|
||||
value = rule['value']
|
||||
radix = rule.get('radix', 10)
|
||||
rule_category = rule.get('category')
|
||||
category = category_map.get(rule_category)
|
||||
if category is None:
|
||||
continue
|
||||
left_context_type = left_context_map[rule.get('left')]
|
||||
right_context_type = right_context_map[rule.get('right')]
|
||||
all_keys.append(unicode(numex_key_template.format(key=key)))
|
||||
all_rules.append(unicode(numex_rule_template.format(
|
||||
language=language,
|
||||
rule_type=rule_type,
|
||||
gender=gender,
|
||||
category=category,
|
||||
left_context_type=left_context_type,
|
||||
right_context_type=right_context_type,
|
||||
value=value,
|
||||
radix=radix
|
||||
)))
|
||||
|
||||
ordinal_indicator_index = len(all_ordinal_indicators)
|
||||
ordinal_indicators = data.get('ordinal_indicators', [])
|
||||
num_ordinal_indicators = 0
|
||||
|
||||
for rule in ordinal_indicators:
|
||||
gender = gender_map[rule.get('gender')]
|
||||
category = category_map[rule.get('category')]
|
||||
invalid_ordinal_keys = set(rule.keys()) - valid_ordinal_keys
|
||||
if invalid_ordinal_keys:
|
||||
raise InvalidNumexRuleException(u'Invalid keys ({}) in ordinal rule for language {}, rule: {}'.format(u','.join(invalid_ordinal_keys), language, rule))
|
||||
|
||||
for key, suffixes in rule['suffixes'].iteritems():
|
||||
for suffix in suffixes:
|
||||
all_ordinal_indicators.append(unicode(ordinal_indicator_template.format(
|
||||
key=key,
|
||||
value=suffix,
|
||||
gender=gender,
|
||||
category=category
|
||||
)))
|
||||
num_ordinal_indicators += len(suffixes)
|
||||
|
||||
stopwords = data.get('stopwords', [])
|
||||
stopword_index = len(all_stopwords)
|
||||
num_stopwords = len(stopwords)
|
||||
|
||||
for stopword in stopwords:
|
||||
all_keys.append(numex_key_template.format(key=unicode(stopword)))
|
||||
all_rules.append(stopword_rule)
|
||||
|
||||
num_rules = len(rules) + len(stopwords)
|
||||
|
||||
all_languages.append(unicode(language_template.format(
|
||||
language=language,
|
||||
whole_words_only=int(whole_words_only),
|
||||
rule_index=rule_index,
|
||||
num_rules=num_rules,
|
||||
ordinal_indicator_index=ordinal_indicator_index,
|
||||
num_ordinal_indicators=num_ordinal_indicators
|
||||
)))
|
||||
|
||||
out.write(safe_encode(numex_rules_data_template.format(
|
||||
numex_keys=u''',
|
||||
'''.join(all_keys),
|
||||
numex_rules=u''',
|
||||
'''.join(all_rules),
|
||||
ordinal_indicator_rules=u''',
|
||||
'''.join(all_ordinal_indicators),
|
||||
stopwords=u''',
|
||||
'''.join(all_stopwords),
|
||||
languages=u''',
|
||||
'''.join(all_languages),
|
||||
)))
|
||||
|
||||
out.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parse_numex_rules(*sys.argv[1:])
|
||||
108
scripts/geodata/numbers/ordinals.py
Normal file
108
scripts/geodata/numbers/ordinals.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import bisect
|
||||
import math
|
||||
import os
|
||||
import operator
|
||||
import random
|
||||
import six
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
from collections import defaultdict
|
||||
from marisa_trie import BytesTrie
|
||||
|
||||
from geodata.text.phrases import PhraseFilter
|
||||
from geodata.encoding import safe_encode, safe_decode
|
||||
from geodata.i18n.unicode_paths import DATA_DIR
|
||||
|
||||
from geodata.numbers.numex import NUMEX_DATA_DIR
|
||||
|
||||
|
||||
class OrdinalSuffixTrie(PhraseFilter):
|
||||
def __init__(self, ordinal_rules):
|
||||
self.trie = BytesTrie([(safe_decode(k)[::-1], safe_decode('|').join(v).encode('utf-8')) for k, v in six.iteritems(ordinal_rules)])
|
||||
self.configured = True
|
||||
|
||||
def search_substring(self, s):
|
||||
if len(s) == 0:
|
||||
return None, 0
|
||||
|
||||
for i in xrange(len(s) + 1):
|
||||
if not self.trie.has_keys_with_prefix(s[:i]):
|
||||
i -= 1
|
||||
break
|
||||
if i > 0:
|
||||
return (self.trie.get(s[:i]), i)
|
||||
else:
|
||||
return None, 0
|
||||
|
||||
def search_suffix(self, token):
|
||||
suffix_search, suffix_len = self.search_substring(safe_decode(token[::-1]))
|
||||
if suffix_search:
|
||||
return suffix_search[0].split('|')
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class OrdinalExpressions(object):
|
||||
def __init__(self, base_dir=NUMEX_DATA_DIR):
|
||||
self.cardinal_rules = {}
|
||||
self.cardinal_rules_ones = {}
|
||||
|
||||
self.ordinal_rules = {}
|
||||
self.ordinal_suffix_rules = {}
|
||||
|
||||
for filename in os.listdir(base_dir):
|
||||
if filename.endswith('.yaml'):
|
||||
lang = filename.split('.yaml')[0]
|
||||
f = open(os.path.join(base_dir, filename))
|
||||
data = yaml.load(f)
|
||||
|
||||
rules = data.get('rules')
|
||||
if rules is not None and hasattr(rules, '__getslice__'):
|
||||
cardinals = []
|
||||
ordinals = defaultdict(list)
|
||||
for rule in rules:
|
||||
name = rule.get('name')
|
||||
value = rule.get('value')
|
||||
rule_type = rule.get('type')
|
||||
if not name or type(value) not in (int, float) or rule_type not in ('cardinal', 'ordinal'):
|
||||
continue
|
||||
gender = rule.get('gender', None)
|
||||
category = rule.get('category', None)
|
||||
if rule_type == 'ordinal':
|
||||
ordinals[(value, gender, category)].append(name)
|
||||
else:
|
||||
cardinals.append(rule)
|
||||
if value == 1:
|
||||
self.cardinal_rules_ones[(lang, gender, category)] = name
|
||||
|
||||
self.cardinal_rules[lang] = cardinals
|
||||
self.ordinal_rules[lang] = ordinals
|
||||
|
||||
ordinal_indicators = data.get('ordinal_indicators')
|
||||
if ordinal_indicators is not None and hasattr(ordinal_indicators, '__getslice__'):
|
||||
for rule_set in ordinal_indicators:
|
||||
gender = rule_set.get('gender', None)
|
||||
category = rule_set.get('category', None)
|
||||
self.ordinal_suffix_rules[(lang, gender, category)] = OrdinalSuffixTrie(rule_set['suffixes'])
|
||||
|
||||
def get_suffixes(self, num, lang, gender=None, category=None):
|
||||
trie = self.ordinal_suffix_rules.get((lang, gender, category))
|
||||
if not trie:
|
||||
return None
|
||||
|
||||
return trie.search_suffix(str(num))
|
||||
|
||||
def get_suffix(self, num, lang, gender=None, category=None):
|
||||
suffixes = self.get_suffixes(num, lang, gender=gender, category=category)
|
||||
if not suffixes:
|
||||
return None
|
||||
return random.choice(suffixes)
|
||||
|
||||
def suffixed_number(self, num, lang, gender=None, category=None):
|
||||
suffix = self.get_suffix(num, lang, gender=gender, category=category)
|
||||
if not suffix:
|
||||
return None
|
||||
return six.u('{}{}').format(safe_decode(num), safe_decode(suffix))
|
||||
|
||||
ordinal_expressions = OrdinalExpressions()
|
||||
449
scripts/geodata/numbers/spellout.py
Normal file
449
scripts/geodata/numbers/spellout.py
Normal file
@@ -0,0 +1,449 @@
|
||||
import bisect
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import six
|
||||
import yaml
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from geodata.numbers.numex import NUMEX_DATA_DIR
|
||||
|
||||
|
||||
class NumericExpressions(object):
|
||||
default_separator = ' '
|
||||
|
||||
def __init__(self, base_dir=NUMEX_DATA_DIR):
|
||||
self.cardinal_rules = {}
|
||||
self.cardinal_rules_sorted = {}
|
||||
self.cardinal_rules_ones = defaultdict(dict)
|
||||
self.cardinal_rules_ones_sorted = {}
|
||||
|
||||
self.default_separators = {}
|
||||
|
||||
self.ordinal_rules = {}
|
||||
self.ordinal_suffix_rules = {}
|
||||
|
||||
for filename in os.listdir(base_dir):
|
||||
if filename.endswith('.yaml'):
|
||||
lang = filename.split('.yaml')[0]
|
||||
f = open(os.path.join(base_dir, filename))
|
||||
data = yaml.load(f)
|
||||
|
||||
default_separator = data.get('default_separator')
|
||||
if default_separator is not None:
|
||||
self.default_separators[lang] = default_separator
|
||||
|
||||
rules = data.get('rules')
|
||||
if rules is not None and hasattr(rules, '__getslice__'):
|
||||
cardinals = defaultdict(list)
|
||||
ordinals = defaultdict(list)
|
||||
for rule in rules:
|
||||
name = rule.get('name')
|
||||
value = rule.get('value')
|
||||
rule_type = rule.get('type')
|
||||
if not name or type(value) not in (int, float) or rule_type not in ('cardinal', 'ordinal'):
|
||||
continue
|
||||
gender = rule.get('gender', None)
|
||||
category = rule.get('category', None)
|
||||
if rule_type == 'ordinal':
|
||||
ordinals[(value, gender, category)].append(rule)
|
||||
else:
|
||||
cardinals[(value, gender, category)].append(rule)
|
||||
if value == 1 and 'multiply_gte' in rule:
|
||||
self.cardinal_rules_ones[lang][rule['multiply_gte']] = rule
|
||||
|
||||
self.cardinal_rules[lang] = cardinals
|
||||
self.ordinal_rules[lang] = ordinals
|
||||
|
||||
self.cardinal_rules_sorted[lang] = sorted(set([v for v, g, c in cardinals]))
|
||||
self.cardinal_rules_ones_sorted[lang] = sorted(self.cardinal_rules_ones[lang].keys())
|
||||
|
||||
self.cardinal_rules_ones = dict(self.cardinal_rules_ones)
|
||||
|
||||
def spellout_cardinal(self, num, lang, gender=None, category=None, random_choice_cardinals=False):
|
||||
num = int(num)
|
||||
remainder = 0
|
||||
|
||||
if lang not in self.cardinal_rules:
|
||||
return None
|
||||
|
||||
rules = self.cardinal_rules.get(lang)
|
||||
cardinals = self.cardinal_rules_sorted.get(lang)
|
||||
if not rules or not cardinals:
|
||||
return None
|
||||
|
||||
default_separator = self.default_separators.get(lang, self.default_separator)
|
||||
|
||||
if num == 0:
|
||||
cardinal = rules.get((num, gender, category))
|
||||
if cardinal:
|
||||
if not random_choice_cardinals:
|
||||
cardinal = cardinal[0]
|
||||
else:
|
||||
cardinal = random.choice(cardinal)
|
||||
return cardinal['name']
|
||||
else:
|
||||
return None
|
||||
|
||||
cardinal_part = []
|
||||
|
||||
last_rule = {}
|
||||
left_multiply_rules = []
|
||||
|
||||
while num:
|
||||
i = bisect.bisect_left(cardinals, num)
|
||||
if i > len(cardinals) - 1:
|
||||
return None
|
||||
if i > 0 and cardinals[i] > num:
|
||||
val = cardinals[i - 1]
|
||||
else:
|
||||
val = cardinals[i]
|
||||
|
||||
multiple = num // val
|
||||
|
||||
if val == num:
|
||||
cardinal = rules.get((num, gender, category))
|
||||
else:
|
||||
cardinal = rules.get((val, None, None), [])
|
||||
|
||||
multiple_rule = None
|
||||
|
||||
if multiple > 1:
|
||||
multiple_val = rules.get((multiple, None, None))
|
||||
if multiple_val:
|
||||
if not random_choice_cardinals:
|
||||
multiple_rule = multiple_val[0]
|
||||
else:
|
||||
multiple_rule = random.choice(multiple_val)
|
||||
elif multiple == 1 and lang in self.cardinal_rules_ones_sorted:
|
||||
ones_rules = self.cardinal_rules_ones_sorted[lang]
|
||||
j = bisect.bisect_right(ones_rules, val)
|
||||
if j > 0 and ones_rules[j - 1] <= num:
|
||||
multiple_rule = self.cardinal_rules_ones[lang][ones_rules[j - 1]]
|
||||
|
||||
use_multiple = multiple > 1
|
||||
|
||||
is_left_multiply = False
|
||||
did_left_multiply = False
|
||||
|
||||
if not use_multiple:
|
||||
rule = None
|
||||
if cardinal and not random_choice_cardinals:
|
||||
rule = cardinal[0]
|
||||
elif cardinal:
|
||||
rule = random.choice(cardinal)
|
||||
else:
|
||||
for rule in cardinal:
|
||||
left_multiply = rule.get('left') == 'multiply'
|
||||
if left_multiply:
|
||||
if not multiple_rule:
|
||||
left_multiply_rules.append(rule)
|
||||
is_left_multiply = True
|
||||
last_rule = rule
|
||||
rule = None
|
||||
break
|
||||
else:
|
||||
rule = None
|
||||
|
||||
if rule is not None:
|
||||
left_add = last_rule.get('left') == 'add'
|
||||
right_add = last_rule.get('right') == 'add'
|
||||
|
||||
if multiple_rule:
|
||||
if right_add and cardinal_part:
|
||||
cardinal_part.append(last_rule.get('left_separator', default_separator))
|
||||
cardinal_part.append(multiple_rule['name'])
|
||||
cardinal_part.append(rule.get('left_separator', default_separator))
|
||||
|
||||
if right_add:
|
||||
if not multiple_rule and cardinal_part:
|
||||
right_separator = last_rule.get('right_separator', default_separator)
|
||||
cardinal_part.append(right_separator)
|
||||
cardinal_part.append(rule['name'])
|
||||
elif left_add and cardinal_part:
|
||||
last = cardinal_part.pop()
|
||||
cardinal_part.append(rule['name'])
|
||||
left_separator = last_rule.get('left_separator', default_separator)
|
||||
cardinal_part.append(left_separator)
|
||||
cardinal_part.append(last)
|
||||
elif not left_add and not right_add:
|
||||
cardinal_part.append(rule['name'])
|
||||
|
||||
last_rule = rule
|
||||
|
||||
if left_multiply_rules and 'right' not in rule and 'left' not in rule:
|
||||
left_multiply_rule = left_multiply_rules.pop()
|
||||
left_separator = left_multiply_rule.get('left_separator', default_separator)
|
||||
cardinal_part.append(left_separator)
|
||||
cardinal_part.append(left_multiply_rule['name'])
|
||||
did_left_multiply = True
|
||||
last_rule = left_multiply_rule
|
||||
|
||||
if not is_left_multiply and not did_left_multiply:
|
||||
num -= (multiple * val)
|
||||
elif not did_left_multiply:
|
||||
remainder = num % val
|
||||
num /= val
|
||||
else:
|
||||
num = remainder
|
||||
did_left_multiply = False
|
||||
|
||||
return six.u('').join(cardinal_part)
|
||||
|
||||
def roman_numeral(self, num):
|
||||
numeral = self.spellout_cardinal(num, 'la')
|
||||
if numeral is None:
|
||||
return None
|
||||
return numeral.upper()
|
||||
|
||||
def spellout_ordinal(self, num, lang, gender=None, category=None,
|
||||
random_choice_cardinals=False, random_choice_ordinals=False):
|
||||
num = int(num)
|
||||
remainder = 0
|
||||
|
||||
if lang not in self.cardinal_rules:
|
||||
return None
|
||||
|
||||
rules = self.ordinal_rules.get(lang)
|
||||
cardinal_rules = self.cardinal_rules.get(lang)
|
||||
cardinals = self.cardinal_rules_sorted.get(lang)
|
||||
if not rules or not cardinal_rules or not cardinals:
|
||||
return None
|
||||
|
||||
default_separator = self.default_separators.get(lang, self.default_separator)
|
||||
|
||||
expression = []
|
||||
|
||||
last_rule = {}
|
||||
left_multiply_rules = []
|
||||
|
||||
if num == 0 or (num, gender, category) in rules:
|
||||
ordinals = rules.get((num, gender, category))
|
||||
if ordinals:
|
||||
if not random_choice_ordinals:
|
||||
ordinal = ordinals[0]
|
||||
else:
|
||||
ordinal = random.choice(ordinals)
|
||||
return ordinal['name']
|
||||
else:
|
||||
return None
|
||||
|
||||
while num:
|
||||
i = bisect.bisect_left(cardinals, num)
|
||||
if i > len(cardinals) - 1:
|
||||
return None
|
||||
if i > 0 and cardinals[i] > num:
|
||||
val = cardinals[i - 1]
|
||||
else:
|
||||
val = cardinals[i]
|
||||
|
||||
if val == num and not remainder:
|
||||
if last_rule.get('right') == 'add':
|
||||
ordinals = rules.get((num, gender, category))
|
||||
if ordinals:
|
||||
if not random_choice_ordinals:
|
||||
ordinal = ordinals[0]
|
||||
else:
|
||||
ordinal = random.choice(ordinals)
|
||||
right_separator = last_rule.get('right_separator', default_separator)
|
||||
|
||||
return right_separator.join([six.u('').join(expression), ordinal['name']])
|
||||
else:
|
||||
return None
|
||||
elif last_rule.get('left') == 'add':
|
||||
last_num = last_rule['value']
|
||||
ordinals = rules.get((last_num, gender, category))
|
||||
if ordinals:
|
||||
if not random_choice_ordinals:
|
||||
ordinal = ordinals[0]
|
||||
else:
|
||||
ordinal = random.choice(ordinals)
|
||||
|
||||
last_rule = ordinal
|
||||
expression.pop()
|
||||
cardinals = cardinal_rules.get((num, None, None))
|
||||
if cardinals:
|
||||
if not random_choice_cardinals:
|
||||
rule = cardinals[0]
|
||||
else:
|
||||
rule = random.choice(cardinals)
|
||||
expression.append(rule['name'])
|
||||
else:
|
||||
return None
|
||||
last = ordinal['name']
|
||||
left_separator = last_rule.get('left_separator', default_separator)
|
||||
return left_separator.join([six.u('').join(expression), ordinal['name']])
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
ordinal = rules.get((val, None, None), [])
|
||||
cardinal = cardinal_rules.get((val, None, None), [])
|
||||
|
||||
multiple = num // val
|
||||
|
||||
multiple_rule = None
|
||||
|
||||
if multiple > 1:
|
||||
multiple_val = cardinal_rules.get((multiple, None, None))
|
||||
if multiple_val:
|
||||
if not random_choice_cardinals:
|
||||
multiple_rule = multiple_val[0]
|
||||
else:
|
||||
multiple_rule = random.choice(multiple_val)
|
||||
elif multiple == 1 and lang in self.cardinal_rules_ones_sorted:
|
||||
ones_rules = self.cardinal_rules_ones_sorted[lang]
|
||||
j = bisect.bisect_right(ones_rules, val)
|
||||
if j > 0 and ones_rules[j - 1] <= num:
|
||||
multiple_rule = self.cardinal_rules_ones[lang][ones_rules[j - 1]]
|
||||
|
||||
use_multiple = multiple > 1
|
||||
|
||||
is_left_multiply = False
|
||||
did_left_multiply = False
|
||||
|
||||
if not use_multiple:
|
||||
rule = None
|
||||
if ordinal and not remainder:
|
||||
for rule in ordinal:
|
||||
if rule.get('right') == 'add':
|
||||
break
|
||||
else:
|
||||
rule = None
|
||||
|
||||
if not rule and cardinal and not random_choice_cardinals:
|
||||
rule = cardinal[0]
|
||||
elif not rule and cardinal:
|
||||
rule = random.choice(cardinal)
|
||||
else:
|
||||
rule = None
|
||||
have_ordinal = False
|
||||
if ordinal:
|
||||
for rule in ordinal:
|
||||
left_multiply = rule.get('left') == 'multiply'
|
||||
if left_multiply and rule.get('right') == 'add':
|
||||
if not multiple_rule:
|
||||
left_multiply_rules.append(rule)
|
||||
is_left_multiply = True
|
||||
last_rule = rule
|
||||
rule = None
|
||||
have_ordinal = True
|
||||
break
|
||||
else:
|
||||
rule = None
|
||||
|
||||
if not have_ordinal:
|
||||
for rule in cardinal:
|
||||
left_multiply = rule.get('left') == 'multiply'
|
||||
if left_multiply:
|
||||
if not multiple_rule:
|
||||
left_multiply_rules.append(rule)
|
||||
is_left_multiply = True
|
||||
last_rule = rule
|
||||
rule = None
|
||||
break
|
||||
else:
|
||||
rule = None
|
||||
|
||||
if rule is not None:
|
||||
left_add = last_rule.get('left') == 'add'
|
||||
right_add = last_rule.get('right') == 'add'
|
||||
|
||||
if multiple_rule:
|
||||
if right_add and expression:
|
||||
expression.append(last_rule.get('left_separator', default_separator))
|
||||
expression.append(multiple_rule['name'])
|
||||
expression.append(rule.get('left_separator', default_separator))
|
||||
|
||||
if right_add:
|
||||
if not multiple_rule and expression:
|
||||
right_separator = last_rule.get('right_separator', default_separator)
|
||||
expression.append(right_separator)
|
||||
expression.append(rule['name'])
|
||||
elif left_add and expression:
|
||||
last = expression.pop()
|
||||
expression.append(rule['name'])
|
||||
left_separator = last_rule.get('left_separator', default_separator)
|
||||
expression.append(left_separator)
|
||||
expression.append(last)
|
||||
elif not left_add and not right_add:
|
||||
expression.append(rule['name'])
|
||||
|
||||
last_rule = rule
|
||||
|
||||
if left_multiply_rules and 'right' not in rule and 'left' not in rule:
|
||||
left_multiply_rule = left_multiply_rules.pop()
|
||||
print 'left_multiply_rule', left_multiply_rule
|
||||
left_separator = left_multiply_rule.get('left_separator', default_separator)
|
||||
expression.append(left_separator)
|
||||
expression.append(left_multiply_rule['name'])
|
||||
did_left_multiply = True
|
||||
last_rule = left_multiply_rule
|
||||
|
||||
if not is_left_multiply and not did_left_multiply:
|
||||
num -= (multiple * val)
|
||||
elif not did_left_multiply:
|
||||
remainder = num % val
|
||||
num /= val
|
||||
else:
|
||||
num = remainder
|
||||
remainder = 0
|
||||
did_left_multiply = False
|
||||
|
||||
def spellout_cardinal_hundreds(self, num, lang, gender=None, category=None, splitter=six.u(' ')):
|
||||
if num % 100 >= 10:
|
||||
first_hundred = self.spellout_cardinal(num % 100, lang, gender=gender, category=category)
|
||||
elif num % 100 == 0:
|
||||
rules = self.cardinal_rules.get(lang)
|
||||
if not rules:
|
||||
return None
|
||||
|
||||
cardinals = rules.get((100, gender, category))
|
||||
if not cardinals:
|
||||
return None
|
||||
|
||||
for rule in cardinals:
|
||||
if rule.get('left') == 'multiply' and not rule.get('exact_multiple_only'):
|
||||
break
|
||||
else:
|
||||
rule = None
|
||||
|
||||
if not rule:
|
||||
return None
|
||||
|
||||
first_hundred = rule['name']
|
||||
else:
|
||||
rules = self.cardinal_rules.get(lang)
|
||||
if not rules:
|
||||
return None
|
||||
|
||||
tens_place = num % 10
|
||||
zero_rules = rules.get((0, gender, category))
|
||||
if not zero_rules:
|
||||
return None
|
||||
|
||||
tens_place_rules = rules.get((tens_place, gender, category))
|
||||
if not tens_place_rules:
|
||||
return None
|
||||
|
||||
zero_rule = random.choice(zero_rules)
|
||||
tens_rule = random.choice(tens_place_rules)
|
||||
|
||||
first_hundred = splitter.join([zero_rule['name'], tens_rule['name']])
|
||||
|
||||
if not first_hundred:
|
||||
return None
|
||||
|
||||
parts = [first_hundred]
|
||||
|
||||
for i in xrange(1, int(math.ceil(math.log(num, 100)))):
|
||||
part = self.spellout_cardinal(num / 100 ** i, lang, gender=gender, category=category)
|
||||
if not part:
|
||||
return None
|
||||
parts.append(part)
|
||||
return splitter.join(reversed(parts))
|
||||
|
||||
|
||||
numeric_expressions = NumericExpressions()
|
||||
Reference in New Issue
Block a user