From 44f0054170e7c215fd047d55c799e82d09a73a1d Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 8 May 2016 17:56:26 -0400 Subject: [PATCH] [boundaries] Adding component-specific admin name probabilities to config (e.g. choose the ISO alpha-2 code 20% of the time, etc.) --- resources/boundaries/names/global.yaml | 19 +++++++++++++++++++ scripts/geodata/boundaries/names.py | 13 +++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/resources/boundaries/names/global.yaml b/resources/boundaries/names/global.yaml index 58096496..60212b3e 100644 --- a/resources/boundaries/names/global.yaml +++ b/resources/boundaries/names/global.yaml @@ -10,6 +10,25 @@ names: - alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland probability: 0.01 + components: + country: + keys: + default: name:simple # e.g. United States vs. United States of America (defaults to name if not found) + probability: 0.4 + alternatives: + - alternative: name + probability: 0.25 + - alternative: ISO3166-1:alpha2 + probability: 0.15 + - alternative: ISO3166-1:alpha3 + probability: 0.1 + - alternative: short_name + probability: 0.05 + - alternative: alt_name + probability: 0.04 + - alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland + probability: 0.01 + # This section overrides place names exceptions: diff --git a/scripts/geodata/boundaries/names.py b/scripts/geodata/boundaries/names.py index 937f1930..3ae7e187 100644 --- a/scripts/geodata/boundaries/names.py +++ b/scripts/geodata/boundaries/names.py @@ -1,4 +1,5 @@ import os +import six import yaml from collections import defaultdict @@ -28,6 +29,13 @@ class BoundaryNames(object): self.name_keys = name_keys self.name_key_probs = cdf(probs) + self.component_name_keys = {} + + for component, component_config in six.iteritems(nested_get(config, ('names', 'components'), default={})): + component_names = component_config.get('keys') + component_name_keys, component_probs = alternative_probabilities(component_names) + self.component_name_keys[component] = (component_name_keys, cdf(component_probs)) + self.exceptions = {} for props in nested_get(config, ('names', 'exceptions'), default=[]): @@ -58,7 +66,7 @@ class BoundaryNames(object): condition_object_type = condition['type'] self.omit_conditions[(object_type, object_id)].add((condition_object_type, condition_object_id)) - def name_key(self, props): + def name_key(self, props, component): object_type = props.get('type') object_id = safe_encode(props.get('id', '')) @@ -66,7 +74,8 @@ class BoundaryNames(object): values, probs = self.exceptions[(object_type, object_id)] return weighted_choice(values, probs) - return weighted_choice(self.name_keys, self.name_key_probs) + name_keys, probs = self.component_name_keys.get(component, (self.name_keys, self.name_key_probs)) + return weighted_choice(name_keys, probs) def remove_excluded_components(self, components): all_ids = set()