[places] adding dependencies to admin components e.g. so in some countries city_district must be accompanied by a city, etc.

This commit is contained in:
Al
2016-11-15 02:31:15 -05:00
parent 96fb725e54
commit 67f409cdf6
2 changed files with 111 additions and 1 deletions

View File

@@ -7,6 +7,9 @@ global:
components:
suburb:
probability: 0.2
dependencies:
- city
- city_district
metro_station:
probability: 0.05
city_district:
@@ -18,15 +21,24 @@ global:
probability: 0.0
state_district:
probability: 0.05
dependencies:
- city
state:
probability: 0.1
dependencies:
- city
- country
country_region:
probability: 0.05
dependencies:
- country
country:
probability: 0.8
world_region:
probability: 0.001
dependencies:
- country
add_west_indies: &add_west_indies
components:
@@ -50,8 +62,17 @@ countries:
# Australia
au:
components:
suburb:
dependencies:
- city_district
- city
- state_district
- state
state:
probability: 0.6
dependencies:
- city
- city_district
country:
probability: 0.5
@@ -64,8 +85,17 @@ countries:
# Brazil
br:
components:
suburb:
dependencies:
- city_district
- city
- state_district
- state
state:
probability: 0.6
dependencies:
- city
- city_district
# Bahamas
bs: *add_west_indies
@@ -73,8 +103,17 @@ countries:
# Canada
ca:
components:
suburb:
dependencies:
- city_district
- city
- state_district
- state
state:
probability: 0.7
dependencies:
- city
- city_district
country:
probability: 0.2
@@ -86,12 +125,17 @@ countries:
components:
city_district:
probability: 0.4
dependencies:
- city
# United Kingdom
gb:
components:
state_district:
probability: 0.15
dependencies:
- city_district
- city
suburb:
probability: 0.5
country:
@@ -111,10 +155,15 @@ countries:
# Hong Kong
hk:
components:
suburb:
dependencies:
- country
country:
probability: 0.85
state:
probability: 0.2
dependencies:
- country
island:
probability: 0.4
containing:
@@ -149,6 +198,9 @@ countries:
# Saint Kitts and Nevis
kn:
components:
suburb:
dependencies:
- country
island:
probability: 0.8
world_region: *add_west_indies_world_region
@@ -165,8 +217,18 @@ countries:
# Mexico
mx:
components:
suburb:
dependencies:
- city_district
- city
- state_district
- state
state:
probability: 0.5
dependencies:
- city_district
- city
# Malaysia
my:
components:
@@ -224,6 +286,11 @@ countries:
components:
suburb:
probability: 0.4
dependencies:
- city_district
- city
- state_district
- state
city_district:
probability: 0.2
containing:
@@ -264,6 +331,9 @@ countries:
probability: 0.9
- lt: 10000
probability: 1.0
dependencies:
- city_district
- city
state_district:
probability: 0.1
@@ -284,9 +354,16 @@ countries:
probability: 0.1
- <<: *richmond_county
probability: 0.1
dependencies:
- city_district
- city
- state
country:
probability: 0.1
dependencies:
- city
- city_district
# Tuvalu
tv:

View File

@@ -7,6 +7,7 @@ import yaml
from collections import defaultdict
from geodata.addresses.dependencies import ComponentDependencies
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
from geodata.address_formatting.formatter import AddressFormatter
from geodata.configs.utils import nested_get, recursive_merge
@@ -55,6 +56,19 @@ class PlaceConfig(object):
global_config_copy = copy.deepcopy(self.global_config)
self.country_configs[k] = recursive_merge(global_config_copy, country_config)
self.country_configs[None] = self.global_config
self.setup_component_dependencies()
def setup_component_dependencies(self):
self.component_dependencies = {}
for country, conf in six.iteritems(self.country_configs):
graph = {k: c['dependencies'] for k, c in six.iteritems(conf['components']) if 'dependencies' in c}
graph.update({c: [] for c in self.ADMIN_COMPONENTS if c not in graph})
self.component_dependencies[country] = ComponentDependencies(graph)
def get_property(self, key, country=None, default=None):
if isinstance(key, six.string_types):
key = key.split('.')
@@ -106,6 +120,21 @@ class PlaceConfig(object):
return random.random() < probability
def drop_invalid_components(self, address_components, country, original_bitset=None):
if not address_components:
return
component_bitset = ComponentDependencies.component_bitset(address_components)
deps = self.component_dependencies.get(country, self.component_dependencies[None])
dep_order = deps.dependency_order
for c in dep_order:
if c not in address_components:
continue
if c in deps and not component_bitset & deps[c] and (original_bitset is None or original_bitset & deps[c]):
address_components.pop(c)
component_bitset ^= ComponentDependencies.component_bit_values[c]
def dropout_components(self, components, boundaries=(), country=None, population=None):
containing_ids = set()
@@ -116,6 +145,8 @@ class PlaceConfig(object):
continue
containing_ids.add((object_type, object_id))
original_bitset = ComponentDependencies.component_bitset(components)
names = defaultdict(list)
admin_components = [c for c in components if c in self.ADMIN_COMPONENTS]
for c in admin_components:
@@ -161,6 +192,8 @@ class PlaceConfig(object):
if value is not None and component not in components and self.include_component(component, containing_ids, country=country, population=population):
new_components[component] = value
self.drop_invalid_components(new_components, country, original_bitset=original_bitset)
return new_components