diff --git a/resources/places/countries/global.yaml b/resources/places/countries/global.yaml index 2cdd4134..eb862918 100644 --- a/resources/places/countries/global.yaml +++ b/resources/places/countries/global.yaml @@ -7,6 +7,9 @@ global: components: suburb: probability: 0.2 + dependencies: + - city + - city_district metro_station: probability: 0.05 city_district: @@ -18,15 +21,24 @@ global: probability: 0.0 state_district: probability: 0.05 + dependencies: + - city state: probability: 0.1 + dependencies: + - city + - country country_region: probability: 0.05 + dependencies: + - country country: probability: 0.8 + world_region: probability: 0.001 - + dependencies: + - country add_west_indies: &add_west_indies components: @@ -50,8 +62,17 @@ countries: # Australia au: components: + suburb: + dependencies: + - city_district + - city + - state_district + - state state: probability: 0.6 + dependencies: + - city + - city_district country: probability: 0.5 @@ -64,8 +85,17 @@ countries: # Brazil br: components: + suburb: + dependencies: + - city_district + - city + - state_district + - state state: probability: 0.6 + dependencies: + - city + - city_district # Bahamas bs: *add_west_indies @@ -73,8 +103,17 @@ countries: # Canada ca: components: + suburb: + dependencies: + - city_district + - city + - state_district + - state state: probability: 0.7 + dependencies: + - city + - city_district country: probability: 0.2 @@ -86,12 +125,17 @@ countries: components: city_district: probability: 0.4 + dependencies: + - city # United Kingdom gb: components: state_district: probability: 0.15 + dependencies: + - city_district + - city suburb: probability: 0.5 country: @@ -111,10 +155,15 @@ countries: # Hong Kong hk: components: + suburb: + dependencies: + - country country: probability: 0.85 state: probability: 0.2 + dependencies: + - country island: probability: 0.4 containing: @@ -149,6 +198,9 @@ countries: # Saint Kitts and Nevis kn: components: + suburb: + dependencies: + - country island: probability: 0.8 world_region: *add_west_indies_world_region @@ -165,8 +217,18 @@ countries: # Mexico mx: components: + suburb: + dependencies: + - city_district + - city + - state_district + - state state: probability: 0.5 + dependencies: + - city_district + - city + # Malaysia my: components: @@ -224,6 +286,11 @@ countries: components: suburb: probability: 0.4 + dependencies: + - city_district + - city + - state_district + - state city_district: probability: 0.2 containing: @@ -264,6 +331,9 @@ countries: probability: 0.9 - lt: 10000 probability: 1.0 + dependencies: + - city_district + - city state_district: probability: 0.1 @@ -284,9 +354,16 @@ countries: probability: 0.1 - <<: *richmond_county probability: 0.1 + dependencies: + - city_district + - city + - state country: probability: 0.1 + dependencies: + - city + - city_district # Tuvalu tv: diff --git a/scripts/geodata/places/config.py b/scripts/geodata/places/config.py index 8fba4301..afcada2c 100644 --- a/scripts/geodata/places/config.py +++ b/scripts/geodata/places/config.py @@ -7,6 +7,7 @@ import yaml from collections import defaultdict +from geodata.addresses.dependencies import ComponentDependencies from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries from geodata.address_formatting.formatter import AddressFormatter from geodata.configs.utils import nested_get, recursive_merge @@ -55,6 +56,19 @@ class PlaceConfig(object): global_config_copy = copy.deepcopy(self.global_config) self.country_configs[k] = recursive_merge(global_config_copy, country_config) + self.country_configs[None] = self.global_config + + self.setup_component_dependencies() + + def setup_component_dependencies(self): + self.component_dependencies = {} + + for country, conf in six.iteritems(self.country_configs): + graph = {k: c['dependencies'] for k, c in six.iteritems(conf['components']) if 'dependencies' in c} + graph.update({c: [] for c in self.ADMIN_COMPONENTS if c not in graph}) + + self.component_dependencies[country] = ComponentDependencies(graph) + def get_property(self, key, country=None, default=None): if isinstance(key, six.string_types): key = key.split('.') @@ -106,6 +120,21 @@ class PlaceConfig(object): return random.random() < probability + def drop_invalid_components(self, address_components, country, original_bitset=None): + if not address_components: + return + component_bitset = ComponentDependencies.component_bitset(address_components) + + deps = self.component_dependencies.get(country, self.component_dependencies[None]) + dep_order = deps.dependency_order + + for c in dep_order: + if c not in address_components: + continue + if c in deps and not component_bitset & deps[c] and (original_bitset is None or original_bitset & deps[c]): + address_components.pop(c) + component_bitset ^= ComponentDependencies.component_bit_values[c] + def dropout_components(self, components, boundaries=(), country=None, population=None): containing_ids = set() @@ -116,6 +145,8 @@ class PlaceConfig(object): continue containing_ids.add((object_type, object_id)) + original_bitset = ComponentDependencies.component_bitset(components) + names = defaultdict(list) admin_components = [c for c in components if c in self.ADMIN_COMPONENTS] for c in admin_components: @@ -161,6 +192,8 @@ class PlaceConfig(object): if value is not None and component not in components and self.include_component(component, containing_ids, country=country, population=population): new_components[component] = value + self.drop_invalid_components(new_components, country, original_bitset=original_bitset) + return new_components