diff --git a/scripts/geodata/categories/__init__.py b/scripts/geodata/categories/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/geodata/categories/config.py b/scripts/geodata/categories/config.py new file mode 100644 index 00000000..b59e7b1d --- /dev/null +++ b/scripts/geodata/categories/config.py @@ -0,0 +1,65 @@ +import csv +import os +import six +import random +import sys + +from collections import defaultdict + +this_dir = os.path.realpath(os.path.dirname(__file__)) +sys.path.append(os.path.realpath(os.path.join(this_dir, os.pardir, os.pardir))) + +from geodata.encoding import safe_decode + +CATEGORIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir, + 'resources', 'categories') + + +class CategoryConfig(object): + def __init__(self, base_dir=CATEGORIES_DIR): + self.language_categories_singular = {} + self.language_categories_plural = {} + + if not os.path.exists(base_dir): + raise RuntimeError('{} does not exist'.format(base_dir)) + + for filename in os.listdir(base_dir): + if not filename.endswith('.tsv'): + continue + + lang = filename.rsplit('.tsv')[0] + base_lang = lang.split('_')[0] + + singular_rules = self.language_categories_singular.get(base_lang, defaultdict(list)) + plural_rules = self.language_categories_plural.get(base_lang, defaultdict(list)) + + reader = csv.reader(open(os.path.join(CATEGORIES_DIR, filename)), delimiter='\t') + reader.next() # headers + + for key, value, is_plural, phrase in reader: + is_plural = bool(int(is_plural)) + if is_plural: + plural_rules[(key, value)].append(phrase) + else: + singular_rules[(key, value)].append(phrase) + + self.language_categories_singular[base_lang] = singular_rules + self.language_categories_plural[base_lang] = plural_rules + + self.language_categories_singular = {key: dict(value) for key, value + in six.iteritems(self.language_categories_singular)} + + self.language_categories_plural = {key: dict(value) for key, value + in six.iteritems(self.language_categories_plural)} + + def get_phrase(self, language, key, value, is_plural=False): + config = self.language_categories_singular if not is_plural else self.language_categories_plural + if language not in config: + return None + language_config = config[language] + choices = language_config.get((key, value)) + if not choices: + return None + return random.choice(choices) + +category_config = CategoryConfig()