[places] adding dependencies to admin components e.g. so in some countries city_district must be accompanied by a city, etc.
This commit is contained in:
@@ -7,6 +7,9 @@ global:
|
|||||||
components:
|
components:
|
||||||
suburb:
|
suburb:
|
||||||
probability: 0.2
|
probability: 0.2
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
- city_district
|
||||||
metro_station:
|
metro_station:
|
||||||
probability: 0.05
|
probability: 0.05
|
||||||
city_district:
|
city_district:
|
||||||
@@ -18,15 +21,24 @@ global:
|
|||||||
probability: 0.0
|
probability: 0.0
|
||||||
state_district:
|
state_district:
|
||||||
probability: 0.05
|
probability: 0.05
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
state:
|
state:
|
||||||
probability: 0.1
|
probability: 0.1
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
- country
|
||||||
country_region:
|
country_region:
|
||||||
probability: 0.05
|
probability: 0.05
|
||||||
|
dependencies:
|
||||||
|
- country
|
||||||
country:
|
country:
|
||||||
probability: 0.8
|
probability: 0.8
|
||||||
|
|
||||||
world_region:
|
world_region:
|
||||||
probability: 0.001
|
probability: 0.001
|
||||||
|
dependencies:
|
||||||
|
- country
|
||||||
|
|
||||||
add_west_indies: &add_west_indies
|
add_west_indies: &add_west_indies
|
||||||
components:
|
components:
|
||||||
@@ -50,8 +62,17 @@ countries:
|
|||||||
# Australia
|
# Australia
|
||||||
au:
|
au:
|
||||||
components:
|
components:
|
||||||
|
suburb:
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
- state_district
|
||||||
|
- state
|
||||||
state:
|
state:
|
||||||
probability: 0.6
|
probability: 0.6
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
- city_district
|
||||||
country:
|
country:
|
||||||
probability: 0.5
|
probability: 0.5
|
||||||
|
|
||||||
@@ -64,8 +85,17 @@ countries:
|
|||||||
# Brazil
|
# Brazil
|
||||||
br:
|
br:
|
||||||
components:
|
components:
|
||||||
|
suburb:
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
- state_district
|
||||||
|
- state
|
||||||
state:
|
state:
|
||||||
probability: 0.6
|
probability: 0.6
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
- city_district
|
||||||
|
|
||||||
# Bahamas
|
# Bahamas
|
||||||
bs: *add_west_indies
|
bs: *add_west_indies
|
||||||
@@ -73,8 +103,17 @@ countries:
|
|||||||
# Canada
|
# Canada
|
||||||
ca:
|
ca:
|
||||||
components:
|
components:
|
||||||
|
suburb:
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
- state_district
|
||||||
|
- state
|
||||||
state:
|
state:
|
||||||
probability: 0.7
|
probability: 0.7
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
- city_district
|
||||||
country:
|
country:
|
||||||
probability: 0.2
|
probability: 0.2
|
||||||
|
|
||||||
@@ -86,12 +125,17 @@ countries:
|
|||||||
components:
|
components:
|
||||||
city_district:
|
city_district:
|
||||||
probability: 0.4
|
probability: 0.4
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
|
||||||
# United Kingdom
|
# United Kingdom
|
||||||
gb:
|
gb:
|
||||||
components:
|
components:
|
||||||
state_district:
|
state_district:
|
||||||
probability: 0.15
|
probability: 0.15
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
suburb:
|
suburb:
|
||||||
probability: 0.5
|
probability: 0.5
|
||||||
country:
|
country:
|
||||||
@@ -111,10 +155,15 @@ countries:
|
|||||||
# Hong Kong
|
# Hong Kong
|
||||||
hk:
|
hk:
|
||||||
components:
|
components:
|
||||||
|
suburb:
|
||||||
|
dependencies:
|
||||||
|
- country
|
||||||
country:
|
country:
|
||||||
probability: 0.85
|
probability: 0.85
|
||||||
state:
|
state:
|
||||||
probability: 0.2
|
probability: 0.2
|
||||||
|
dependencies:
|
||||||
|
- country
|
||||||
island:
|
island:
|
||||||
probability: 0.4
|
probability: 0.4
|
||||||
containing:
|
containing:
|
||||||
@@ -149,6 +198,9 @@ countries:
|
|||||||
# Saint Kitts and Nevis
|
# Saint Kitts and Nevis
|
||||||
kn:
|
kn:
|
||||||
components:
|
components:
|
||||||
|
suburb:
|
||||||
|
dependencies:
|
||||||
|
- country
|
||||||
island:
|
island:
|
||||||
probability: 0.8
|
probability: 0.8
|
||||||
world_region: *add_west_indies_world_region
|
world_region: *add_west_indies_world_region
|
||||||
@@ -165,8 +217,18 @@ countries:
|
|||||||
# Mexico
|
# Mexico
|
||||||
mx:
|
mx:
|
||||||
components:
|
components:
|
||||||
|
suburb:
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
- state_district
|
||||||
|
- state
|
||||||
state:
|
state:
|
||||||
probability: 0.5
|
probability: 0.5
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
|
||||||
# Malaysia
|
# Malaysia
|
||||||
my:
|
my:
|
||||||
components:
|
components:
|
||||||
@@ -224,6 +286,11 @@ countries:
|
|||||||
components:
|
components:
|
||||||
suburb:
|
suburb:
|
||||||
probability: 0.4
|
probability: 0.4
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
- state_district
|
||||||
|
- state
|
||||||
city_district:
|
city_district:
|
||||||
probability: 0.2
|
probability: 0.2
|
||||||
containing:
|
containing:
|
||||||
@@ -264,6 +331,9 @@ countries:
|
|||||||
probability: 0.9
|
probability: 0.9
|
||||||
- lt: 10000
|
- lt: 10000
|
||||||
probability: 1.0
|
probability: 1.0
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
|
||||||
state_district:
|
state_district:
|
||||||
probability: 0.1
|
probability: 0.1
|
||||||
@@ -284,9 +354,16 @@ countries:
|
|||||||
probability: 0.1
|
probability: 0.1
|
||||||
- <<: *richmond_county
|
- <<: *richmond_county
|
||||||
probability: 0.1
|
probability: 0.1
|
||||||
|
dependencies:
|
||||||
|
- city_district
|
||||||
|
- city
|
||||||
|
- state
|
||||||
|
|
||||||
country:
|
country:
|
||||||
probability: 0.1
|
probability: 0.1
|
||||||
|
dependencies:
|
||||||
|
- city
|
||||||
|
- city_district
|
||||||
|
|
||||||
# Tuvalu
|
# Tuvalu
|
||||||
tv:
|
tv:
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import yaml
|
|||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from geodata.addresses.dependencies import ComponentDependencies
|
||||||
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
||||||
from geodata.address_formatting.formatter import AddressFormatter
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
from geodata.configs.utils import nested_get, recursive_merge
|
from geodata.configs.utils import nested_get, recursive_merge
|
||||||
@@ -55,6 +56,19 @@ class PlaceConfig(object):
|
|||||||
global_config_copy = copy.deepcopy(self.global_config)
|
global_config_copy = copy.deepcopy(self.global_config)
|
||||||
self.country_configs[k] = recursive_merge(global_config_copy, country_config)
|
self.country_configs[k] = recursive_merge(global_config_copy, country_config)
|
||||||
|
|
||||||
|
self.country_configs[None] = self.global_config
|
||||||
|
|
||||||
|
self.setup_component_dependencies()
|
||||||
|
|
||||||
|
def setup_component_dependencies(self):
|
||||||
|
self.component_dependencies = {}
|
||||||
|
|
||||||
|
for country, conf in six.iteritems(self.country_configs):
|
||||||
|
graph = {k: c['dependencies'] for k, c in six.iteritems(conf['components']) if 'dependencies' in c}
|
||||||
|
graph.update({c: [] for c in self.ADMIN_COMPONENTS if c not in graph})
|
||||||
|
|
||||||
|
self.component_dependencies[country] = ComponentDependencies(graph)
|
||||||
|
|
||||||
def get_property(self, key, country=None, default=None):
|
def get_property(self, key, country=None, default=None):
|
||||||
if isinstance(key, six.string_types):
|
if isinstance(key, six.string_types):
|
||||||
key = key.split('.')
|
key = key.split('.')
|
||||||
@@ -106,6 +120,21 @@ class PlaceConfig(object):
|
|||||||
|
|
||||||
return random.random() < probability
|
return random.random() < probability
|
||||||
|
|
||||||
|
def drop_invalid_components(self, address_components, country, original_bitset=None):
|
||||||
|
if not address_components:
|
||||||
|
return
|
||||||
|
component_bitset = ComponentDependencies.component_bitset(address_components)
|
||||||
|
|
||||||
|
deps = self.component_dependencies.get(country, self.component_dependencies[None])
|
||||||
|
dep_order = deps.dependency_order
|
||||||
|
|
||||||
|
for c in dep_order:
|
||||||
|
if c not in address_components:
|
||||||
|
continue
|
||||||
|
if c in deps and not component_bitset & deps[c] and (original_bitset is None or original_bitset & deps[c]):
|
||||||
|
address_components.pop(c)
|
||||||
|
component_bitset ^= ComponentDependencies.component_bit_values[c]
|
||||||
|
|
||||||
def dropout_components(self, components, boundaries=(), country=None, population=None):
|
def dropout_components(self, components, boundaries=(), country=None, population=None):
|
||||||
containing_ids = set()
|
containing_ids = set()
|
||||||
|
|
||||||
@@ -116,6 +145,8 @@ class PlaceConfig(object):
|
|||||||
continue
|
continue
|
||||||
containing_ids.add((object_type, object_id))
|
containing_ids.add((object_type, object_id))
|
||||||
|
|
||||||
|
original_bitset = ComponentDependencies.component_bitset(components)
|
||||||
|
|
||||||
names = defaultdict(list)
|
names = defaultdict(list)
|
||||||
admin_components = [c for c in components if c in self.ADMIN_COMPONENTS]
|
admin_components = [c for c in components if c in self.ADMIN_COMPONENTS]
|
||||||
for c in admin_components:
|
for c in admin_components:
|
||||||
@@ -161,6 +192,8 @@ class PlaceConfig(object):
|
|||||||
if value is not None and component not in components and self.include_component(component, containing_ids, country=country, population=population):
|
if value is not None and component not in components and self.include_component(component, containing_ids, country=country, population=population):
|
||||||
new_components[component] = value
|
new_components[component] = value
|
||||||
|
|
||||||
|
self.drop_invalid_components(new_components, country, original_bitset=original_bitset)
|
||||||
|
|
||||||
return new_components
|
return new_components
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user