[config] Adding default/alternative probability distribution to config.utils

This commit is contained in:
Al
2016-05-05 18:28:26 -04:00
parent 88b25a2d22
commit 6f8e94b851
2 changed files with 46 additions and 32 deletions

View File

@@ -7,7 +7,7 @@ import yaml
from collections import Mapping from collections import Mapping
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
from geodata.configs.utils import nested_get, DoesNotExist, recursive_merge from geodata.configs.utils import nested_get, DoesNotExist, recursive_merge, alternative_probabilities
from geodata.math.sampling import cdf, check_probability_distribution from geodata.math.sampling import cdf, check_probability_distribution
@@ -77,21 +77,15 @@ class AddressConfig(object):
if properties is None: if properties is None:
return None, None return None, None
probs = [] alternatives, probs = alternative_probabilities(properties)
alternatives = []
if 'probability' in properties: forms = []
prob = properties['probability'] form_probs = []
props = properties['default']
for props, prob in zip(alternatives, probs):
phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries) phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries)
probs.extend([prob * p for p in phrase_probs]) forms.extend([(p, props) for p in phrases])
alternatives.extend([(p, props) for p in phrases]) form_probs.extend([prob * p for p in phrase_probs])
elif 'alternatives' not in properties:
prob = 1.0
props = properties['default']
phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries)
probs.extend([prob * p for p in phrase_probs])
alternatives.extend([(p, props) for p in phrases])
sample_probability = properties.get('sample_probability') sample_probability = properties.get('sample_probability')
if sample_probability is not None: if sample_probability is not None:
@@ -102,25 +96,17 @@ class AddressConfig(object):
sample_phrases.append(canonical) sample_phrases.append(canonical)
sample_phrases.extend(surface_forms) sample_phrases.extend(surface_forms)
# Note: use the outer properties dictionary e.g. units.alphanumeric # Note: use the outer properties dictionary e.g. units.alphanumeric
alternatives.extend([(p, properties) for p in sample_phrases]) forms.extend([(p, properties) for p in sample_phrases])
probs.extend([float(sample_probability) / len(sample_phrases)] * len(sample_phrases)) form_probs.extend([float(sample_probability) / len(sample_phrases)] * len(sample_phrases))
alts = properties.get('alternatives', [])
for alt in alts:
prob = alt.get('probability', 1.0 / len(alts))
props = alt['alternative']
phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries)
probs.extend([prob * p for p in phrase_probs])
alternatives.extend([(p, props) for p in phrases])
try: try:
check_probability_distribution(probs) check_probability_distribution(form_probs)
except AssertionError: except AssertionError:
print 'values were: {}'.format(alternatives) print 'values were: {}'.format(forms)
raise raise
probs_cdf = cdf(probs) form_probs_cdf = cdf(form_probs)
self.cache[key] = (alternatives, probs_cdf) self.cache[key] = (forms, form_probs_cdf)
return self.cache[key] return self.cache[key]
def form_probabilities(self, properties, language, dictionaries=()): def form_probabilities(self, properties, language, dictionaries=()):

View File

@@ -17,15 +17,43 @@ class DoesNotExist:
pass pass
def nested_get(obj, keys): def nested_get(obj, keys, default=DoesNotExist):
if len(keys) == 0: if len(keys) == 0:
return obj return obj
try: try:
for key in keys[:-1]: for key in keys[:-1]:
obj = obj.get(key, {}) obj = obj.get(key, {})
if not hasattr(obj, 'items'): if not hasattr(obj, 'items'):
return DoesNotExist return default
key = keys[-1] key = keys[-1]
return obj.get(key, DoesNotExist) return obj.get(key, default)
except AttributeError: except AttributeError:
return DoesNotExist return default
def alternative_probabilities(properties):
if properties is None:
return None
probs = []
alternatives = []
if 'probability' in properties:
prob = properties['probability']
props = properties['default']
probs.append(prob)
alternatives.append(props)
elif 'alternatives' not in properties:
prob = 1.0
props = properties['default']
probs.append(prob)
alternatives.append(props)
alts = properties.get('alternatives', [])
for alt in alts:
prob = alt.get('probability', 1.0 / len(alts))
props = alt['alternative']
probs.append(prob)
alternatives.append(props)
return alternatives, probs