450 lines
17 KiB
Python
450 lines
17 KiB
Python
import bisect
|
|
import math
|
|
import os
|
|
import random
|
|
import six
|
|
import yaml
|
|
|
|
from collections import defaultdict
|
|
|
|
from geodata.numbers.numex import NUMEX_DATA_DIR
|
|
|
|
|
|
class NumericExpressions(object):
|
|
default_separator = ' '
|
|
|
|
def __init__(self, base_dir=NUMEX_DATA_DIR):
|
|
self.cardinal_rules = {}
|
|
self.cardinal_rules_sorted = {}
|
|
self.cardinal_rules_ones = defaultdict(dict)
|
|
self.cardinal_rules_ones_sorted = {}
|
|
|
|
self.default_separators = {}
|
|
|
|
self.ordinal_rules = {}
|
|
self.ordinal_suffix_rules = {}
|
|
|
|
for filename in os.listdir(base_dir):
|
|
if filename.endswith('.yaml'):
|
|
lang = filename.split('.yaml')[0]
|
|
f = open(os.path.join(base_dir, filename))
|
|
data = yaml.load(f)
|
|
|
|
default_separator = data.get('default_separator')
|
|
if default_separator is not None:
|
|
self.default_separators[lang] = default_separator
|
|
|
|
rules = data.get('rules')
|
|
if rules is not None and hasattr(rules, '__getslice__'):
|
|
cardinals = defaultdict(list)
|
|
ordinals = defaultdict(list)
|
|
for rule in rules:
|
|
name = rule.get('name')
|
|
value = rule.get('value')
|
|
rule_type = rule.get('type')
|
|
if not name or type(value) not in (int, float) or rule_type not in ('cardinal', 'ordinal'):
|
|
continue
|
|
gender = rule.get('gender', None)
|
|
category = rule.get('category', None)
|
|
if rule_type == 'ordinal':
|
|
ordinals[(value, gender, category)].append(rule)
|
|
else:
|
|
cardinals[(value, gender, category)].append(rule)
|
|
if value == 1 and 'multiply_gte' in rule:
|
|
self.cardinal_rules_ones[lang][rule['multiply_gte']] = rule
|
|
|
|
self.cardinal_rules[lang] = cardinals
|
|
self.ordinal_rules[lang] = ordinals
|
|
|
|
self.cardinal_rules_sorted[lang] = sorted(set([v for v, g, c in cardinals]))
|
|
self.cardinal_rules_ones_sorted[lang] = sorted(self.cardinal_rules_ones[lang].keys())
|
|
|
|
self.cardinal_rules_ones = dict(self.cardinal_rules_ones)
|
|
|
|
def spellout_cardinal(self, num, lang, gender=None, category=None, random_choice_cardinals=False):
|
|
num = int(num)
|
|
remainder = 0
|
|
|
|
if lang not in self.cardinal_rules:
|
|
return None
|
|
|
|
rules = self.cardinal_rules.get(lang)
|
|
cardinals = self.cardinal_rules_sorted.get(lang)
|
|
if not rules or not cardinals:
|
|
return None
|
|
|
|
default_separator = self.default_separators.get(lang, self.default_separator)
|
|
|
|
if num == 0:
|
|
cardinal = rules.get((num, gender, category))
|
|
if cardinal:
|
|
if not random_choice_cardinals:
|
|
cardinal = cardinal[0]
|
|
else:
|
|
cardinal = random.choice(cardinal)
|
|
return cardinal['name']
|
|
else:
|
|
return None
|
|
|
|
cardinal_part = []
|
|
|
|
last_rule = {}
|
|
left_multiply_rules = []
|
|
|
|
while num:
|
|
i = bisect.bisect_left(cardinals, num)
|
|
if i > len(cardinals) - 1:
|
|
return None
|
|
if i > 0 and cardinals[i] > num:
|
|
val = cardinals[i - 1]
|
|
else:
|
|
val = cardinals[i]
|
|
|
|
multiple = num // val
|
|
|
|
if val == num:
|
|
cardinal = rules.get((num, gender, category))
|
|
else:
|
|
cardinal = rules.get((val, None, None), [])
|
|
|
|
multiple_rule = None
|
|
|
|
if multiple > 1:
|
|
multiple_val = rules.get((multiple, None, None))
|
|
if multiple_val:
|
|
if not random_choice_cardinals:
|
|
multiple_rule = multiple_val[0]
|
|
else:
|
|
multiple_rule = random.choice(multiple_val)
|
|
elif multiple == 1 and lang in self.cardinal_rules_ones_sorted:
|
|
ones_rules = self.cardinal_rules_ones_sorted[lang]
|
|
j = bisect.bisect_right(ones_rules, val)
|
|
if j > 0 and ones_rules[j - 1] <= num:
|
|
multiple_rule = self.cardinal_rules_ones[lang][ones_rules[j - 1]]
|
|
|
|
use_multiple = multiple > 1
|
|
|
|
is_left_multiply = False
|
|
did_left_multiply = False
|
|
|
|
if not use_multiple:
|
|
rule = None
|
|
if cardinal and not random_choice_cardinals:
|
|
rule = cardinal[0]
|
|
elif cardinal:
|
|
rule = random.choice(cardinal)
|
|
else:
|
|
for rule in cardinal:
|
|
left_multiply = rule.get('left') == 'multiply'
|
|
if left_multiply:
|
|
if not multiple_rule:
|
|
left_multiply_rules.append(rule)
|
|
is_left_multiply = True
|
|
last_rule = rule
|
|
rule = None
|
|
break
|
|
else:
|
|
rule = None
|
|
|
|
if rule is not None:
|
|
left_add = last_rule.get('left') == 'add'
|
|
right_add = last_rule.get('right') == 'add'
|
|
|
|
if multiple_rule:
|
|
if right_add and cardinal_part:
|
|
cardinal_part.append(last_rule.get('left_separator', default_separator))
|
|
cardinal_part.append(multiple_rule['name'])
|
|
cardinal_part.append(rule.get('left_separator', default_separator))
|
|
|
|
if right_add:
|
|
if not multiple_rule and cardinal_part:
|
|
right_separator = last_rule.get('right_separator', default_separator)
|
|
cardinal_part.append(right_separator)
|
|
cardinal_part.append(rule['name'])
|
|
elif left_add and cardinal_part:
|
|
last = cardinal_part.pop()
|
|
cardinal_part.append(rule['name'])
|
|
left_separator = last_rule.get('left_separator', default_separator)
|
|
cardinal_part.append(left_separator)
|
|
cardinal_part.append(last)
|
|
elif not left_add and not right_add:
|
|
cardinal_part.append(rule['name'])
|
|
|
|
last_rule = rule
|
|
|
|
if left_multiply_rules and 'right' not in rule and 'left' not in rule:
|
|
left_multiply_rule = left_multiply_rules.pop()
|
|
left_separator = left_multiply_rule.get('left_separator', default_separator)
|
|
cardinal_part.append(left_separator)
|
|
cardinal_part.append(left_multiply_rule['name'])
|
|
did_left_multiply = True
|
|
last_rule = left_multiply_rule
|
|
|
|
if not is_left_multiply and not did_left_multiply:
|
|
num -= (multiple * val)
|
|
elif not did_left_multiply:
|
|
remainder = num % val
|
|
num /= val
|
|
else:
|
|
num = remainder
|
|
did_left_multiply = False
|
|
|
|
return six.u('').join(cardinal_part)
|
|
|
|
def roman_numeral(self, num):
|
|
numeral = self.spellout_cardinal(num, 'la')
|
|
if numeral is None:
|
|
return None
|
|
return numeral.upper()
|
|
|
|
def spellout_ordinal(self, num, lang, gender=None, category=None,
|
|
random_choice_cardinals=False, random_choice_ordinals=False):
|
|
num = int(num)
|
|
remainder = 0
|
|
|
|
if lang not in self.cardinal_rules:
|
|
return None
|
|
|
|
rules = self.ordinal_rules.get(lang)
|
|
cardinal_rules = self.cardinal_rules.get(lang)
|
|
cardinals = self.cardinal_rules_sorted.get(lang)
|
|
if not rules or not cardinal_rules or not cardinals:
|
|
return None
|
|
|
|
default_separator = self.default_separators.get(lang, self.default_separator)
|
|
|
|
expression = []
|
|
|
|
last_rule = {}
|
|
left_multiply_rules = []
|
|
|
|
if num == 0 or (num, gender, category) in rules:
|
|
ordinals = rules.get((num, gender, category))
|
|
if ordinals:
|
|
if not random_choice_ordinals:
|
|
ordinal = ordinals[0]
|
|
else:
|
|
ordinal = random.choice(ordinals)
|
|
return ordinal['name']
|
|
else:
|
|
return None
|
|
|
|
while num:
|
|
i = bisect.bisect_left(cardinals, num)
|
|
if i > len(cardinals) - 1:
|
|
return None
|
|
if i > 0 and cardinals[i] > num:
|
|
val = cardinals[i - 1]
|
|
else:
|
|
val = cardinals[i]
|
|
|
|
if val == num and not remainder:
|
|
if last_rule.get('right') == 'add':
|
|
ordinals = rules.get((num, gender, category))
|
|
if ordinals:
|
|
if not random_choice_ordinals:
|
|
ordinal = ordinals[0]
|
|
else:
|
|
ordinal = random.choice(ordinals)
|
|
right_separator = last_rule.get('right_separator', default_separator)
|
|
|
|
return right_separator.join([six.u('').join(expression), ordinal['name']])
|
|
else:
|
|
return None
|
|
elif last_rule.get('left') == 'add':
|
|
last_num = last_rule['value']
|
|
ordinals = rules.get((last_num, gender, category))
|
|
if ordinals:
|
|
if not random_choice_ordinals:
|
|
ordinal = ordinals[0]
|
|
else:
|
|
ordinal = random.choice(ordinals)
|
|
|
|
last_rule = ordinal
|
|
expression.pop()
|
|
cardinals = cardinal_rules.get((num, None, None))
|
|
if cardinals:
|
|
if not random_choice_cardinals:
|
|
rule = cardinals[0]
|
|
else:
|
|
rule = random.choice(cardinals)
|
|
expression.append(rule['name'])
|
|
else:
|
|
return None
|
|
last = ordinal['name']
|
|
left_separator = last_rule.get('left_separator', default_separator)
|
|
return left_separator.join([six.u('').join(expression), ordinal['name']])
|
|
else:
|
|
return None
|
|
else:
|
|
return None
|
|
else:
|
|
ordinal = rules.get((val, None, None), [])
|
|
cardinal = cardinal_rules.get((val, None, None), [])
|
|
|
|
multiple = num // val
|
|
|
|
multiple_rule = None
|
|
|
|
if multiple > 1:
|
|
multiple_val = cardinal_rules.get((multiple, None, None))
|
|
if multiple_val:
|
|
if not random_choice_cardinals:
|
|
multiple_rule = multiple_val[0]
|
|
else:
|
|
multiple_rule = random.choice(multiple_val)
|
|
elif multiple == 1 and lang in self.cardinal_rules_ones_sorted:
|
|
ones_rules = self.cardinal_rules_ones_sorted[lang]
|
|
j = bisect.bisect_right(ones_rules, val)
|
|
if j > 0 and ones_rules[j - 1] <= num:
|
|
multiple_rule = self.cardinal_rules_ones[lang][ones_rules[j - 1]]
|
|
|
|
use_multiple = multiple > 1
|
|
|
|
is_left_multiply = False
|
|
did_left_multiply = False
|
|
|
|
if not use_multiple:
|
|
rule = None
|
|
if ordinal and not remainder:
|
|
for rule in ordinal:
|
|
if rule.get('right') == 'add':
|
|
break
|
|
else:
|
|
rule = None
|
|
|
|
if not rule and cardinal and not random_choice_cardinals:
|
|
rule = cardinal[0]
|
|
elif not rule and cardinal:
|
|
rule = random.choice(cardinal)
|
|
else:
|
|
rule = None
|
|
have_ordinal = False
|
|
if ordinal:
|
|
for rule in ordinal:
|
|
left_multiply = rule.get('left') == 'multiply'
|
|
if left_multiply and rule.get('right') == 'add':
|
|
if not multiple_rule:
|
|
left_multiply_rules.append(rule)
|
|
is_left_multiply = True
|
|
last_rule = rule
|
|
rule = None
|
|
have_ordinal = True
|
|
break
|
|
else:
|
|
rule = None
|
|
|
|
if not have_ordinal:
|
|
for rule in cardinal:
|
|
left_multiply = rule.get('left') == 'multiply'
|
|
if left_multiply:
|
|
if not multiple_rule:
|
|
left_multiply_rules.append(rule)
|
|
is_left_multiply = True
|
|
last_rule = rule
|
|
rule = None
|
|
break
|
|
else:
|
|
rule = None
|
|
|
|
if rule is not None:
|
|
left_add = last_rule.get('left') == 'add'
|
|
right_add = last_rule.get('right') == 'add'
|
|
|
|
if multiple_rule:
|
|
if right_add and expression:
|
|
expression.append(last_rule.get('left_separator', default_separator))
|
|
expression.append(multiple_rule['name'])
|
|
expression.append(rule.get('left_separator', default_separator))
|
|
|
|
if right_add:
|
|
if not multiple_rule and expression:
|
|
right_separator = last_rule.get('right_separator', default_separator)
|
|
expression.append(right_separator)
|
|
expression.append(rule['name'])
|
|
elif left_add and expression:
|
|
last = expression.pop()
|
|
expression.append(rule['name'])
|
|
left_separator = last_rule.get('left_separator', default_separator)
|
|
expression.append(left_separator)
|
|
expression.append(last)
|
|
elif not left_add and not right_add:
|
|
expression.append(rule['name'])
|
|
|
|
last_rule = rule
|
|
|
|
if left_multiply_rules and 'right' not in rule and 'left' not in rule:
|
|
left_multiply_rule = left_multiply_rules.pop()
|
|
print 'left_multiply_rule', left_multiply_rule
|
|
left_separator = left_multiply_rule.get('left_separator', default_separator)
|
|
expression.append(left_separator)
|
|
expression.append(left_multiply_rule['name'])
|
|
did_left_multiply = True
|
|
last_rule = left_multiply_rule
|
|
|
|
if not is_left_multiply and not did_left_multiply:
|
|
num -= (multiple * val)
|
|
elif not did_left_multiply:
|
|
remainder = num % val
|
|
num /= val
|
|
else:
|
|
num = remainder
|
|
remainder = 0
|
|
did_left_multiply = False
|
|
|
|
def spellout_cardinal_hundreds(self, num, lang, gender=None, category=None, splitter=six.u(' ')):
|
|
if num % 100 >= 10:
|
|
first_hundred = self.spellout_cardinal(num % 100, lang, gender=gender, category=category)
|
|
elif num % 100 == 0:
|
|
rules = self.cardinal_rules.get(lang)
|
|
if not rules:
|
|
return None
|
|
|
|
cardinals = rules.get((100, gender, category))
|
|
if not cardinals:
|
|
return None
|
|
|
|
for rule in cardinals:
|
|
if rule.get('left') == 'multiply' and not rule.get('exact_multiple_only'):
|
|
break
|
|
else:
|
|
rule = None
|
|
|
|
if not rule:
|
|
return None
|
|
|
|
first_hundred = rule['name']
|
|
else:
|
|
rules = self.cardinal_rules.get(lang)
|
|
if not rules:
|
|
return None
|
|
|
|
tens_place = num % 10
|
|
zero_rules = rules.get((0, gender, category))
|
|
if not zero_rules:
|
|
return None
|
|
|
|
tens_place_rules = rules.get((tens_place, gender, category))
|
|
if not tens_place_rules:
|
|
return None
|
|
|
|
zero_rule = random.choice(zero_rules)
|
|
tens_rule = random.choice(tens_place_rules)
|
|
|
|
first_hundred = splitter.join([zero_rule['name'], tens_rule['name']])
|
|
|
|
if not first_hundred:
|
|
return None
|
|
|
|
parts = [first_hundred]
|
|
|
|
for i in xrange(1, int(math.ceil(math.log(num, 100)))):
|
|
part = self.spellout_cardinal(num / 100 ** i, lang, gender=gender, category=category)
|
|
if not part:
|
|
return None
|
|
parts.append(part)
|
|
return splitter.join(reversed(parts))
|
|
|
|
|
|
numeric_expressions = NumericExpressions()
|