[categories/chains] Reusing category config in chain queries

This commit is contained in:
Al
2016-05-20 16:00:04 -04:00
parent b50120f45c
commit ce381134fb
4 changed files with 77 additions and 4 deletions

View File

@@ -20,6 +20,8 @@ class CategoryConfig(object):
self.language_categories_singular = {}
self.language_categories_plural = {}
self.property_names = set()
if not os.path.exists(base_dir):
raise RuntimeError('{} does not exist'.format(base_dir))
@@ -37,6 +39,7 @@ class CategoryConfig(object):
reader.next() # headers
for key, value, is_plural, phrase in reader:
self.property_names.add(key)
is_plural = bool(int(is_plural))
if is_plural:
plural_rules[(key, value)].append(phrase)

View File

@@ -0,0 +1,31 @@
from geodata.addresses.config import address_config
from geodata.categories.config import category_config
from geodata.math.sampling import weighted_choice, cdf
class CategoryPreposition(object):
NEAR = 'near'
NEARBY = 'nearby'
NEAR_ME = 'near_me'
IN = 'in'
NULL = 'null'
@classmethod
def random(cls, language, country=None):
category_props = address_config.get_property('categories', language, country=country)
if category_props is None:
return None
values = []
probs = []
for prep_phrase_type in (cls.NEAR, cls.NEARBY, cls.NEAR_ME, cls.IN, cls.NULL):
k = '{}_probability'.format(prep_phrase_type)
prob = category_props.get(k, None)
if prob is not None:
values.append(prep_phrase_type)
probs.append(prob)
probs = cdf(probs)
return weighted_choice(values, probs)

View File

@@ -14,6 +14,12 @@ NULL_CATEGORY_QUERY = CategoryQuery(None, None, False)
class Category(object):
NEAR = 'near'
NEARBY = 'nearby'
NEAR_ME = 'near_me'
IN = 'in'
NULL = 'null'
@classmethod
def phrase(cls, language, key, value, is_plural=False, country=None):
category_phrase = category_config.get_phrase(language, key, value, is_plural=is_plural)
@@ -29,7 +35,7 @@ class Category(object):
values = []
probs = []
for prep_phrase_type in ('near', 'nearby', 'near_me', 'in', 'null'):
for prep_phrase_type in (cls.NEAR, cls.NEARBY, cls.NEAR_ME, cls.IN, cls.NULL):
k = '{}_probability'.format(prep_phrase_type)
prob = category_props.get(k, None)
if prob is not None:
@@ -40,7 +46,7 @@ class Category(object):
prep_phrase_type = weighted_choice(values, probs)
if prep_phrase_type == 'null':
if prep_phrase_type == cls.NULL:
return CategoryQuery(category_phrase, prep=None, add_place_name=True)
values, probs = address_config.alternative_probabilities('categories.{}'.format(prep_phrase_type), language, country=country)
@@ -50,6 +56,6 @@ class Category(object):
prep_phrase, prep_phrase_props = weighted_choice(values, probs)
prep_phrase = safe_decode(prep_phrase)
add_place_name = prep_phrase_type not in ('nearby', 'near_me')
add_place_name = prep_phrase_type not in (cls.NEARBY, cls.NEAR_ME)
return CategoryQuery(category_phrase, prep=prep_phrase, add_place_name=add_place_name)