[places] adding probability distributions on added place components so can have West Indies, W.I. etc.

This commit is contained in:
Al
2016-09-22 17:45:14 -04:00
parent 373708b595
commit 996a38d017
2 changed files with 23 additions and 3 deletions

View File

@@ -32,7 +32,13 @@ add_west_indies: &add_west_indies
components:
world_region: &add_west_indies_world_region
probability: 0.1
value: West Indies
values:
- value: West Indies
probability: 0.6
- value: W.I.
probability: 0.2
- value: WI
probability: 0.2
countries:
# Anguilla

View File

@@ -9,8 +9,8 @@ from collections import defaultdict
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
from geodata.address_formatting.formatter import AddressFormatter
from geodata.configs.utils import nested_get, DoesNotExist, recursive_merge
from geodata.math.sampling import cdf, check_probability_distribution
from geodata.configs.utils import nested_get, recursive_merge
from geodata.math.sampling import cdf, weighted_choice
from geodata.encoding import safe_encode
@@ -46,6 +46,8 @@ class PlaceConfig(object):
self.global_config = place_config['global']
self.country_configs = {}
self.cdf_cache = {}
countries = place_config.pop('countries', {})
for k, v in six.iteritems(countries):
@@ -144,6 +146,18 @@ class PlaceConfig(object):
for component in self.ADMIN_COMPONENTS:
value = self.get_property(('components', component, 'value'), country=country, default=None)
if not value:
values, probs = self.cdf_cache.get((country, component), (None, None))
if values is None:
values = self.get_property(('components', component, 'values'), country=country, default=None)
if values is not None:
values, probs = zip(*[(v['value'], float(v['probability'])) for v in values])
probs = cdf(probs)
self.cdf_cache[(country, component)] = (values, probs)
if values is not None:
value = weighted_choice(values, probs)
if value is not None and component not in components and self.include_component(component, containing_ids, country=country, population=population):
new_components[component] = value