diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 96ab6b5a..014fa0c4 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -22,6 +22,7 @@ from geodata.boundaries.names import boundary_names from geodata.configs.utils import nested_get from geodata.coordinates.conversion import latlon_to_decimal from geodata.countries.names import * +from geodata.graph.topsort import topsort from geodata.language_id.disambiguation import * from geodata.language_id.sample import sample_random_language from geodata.math.floats import isclose @@ -155,6 +156,10 @@ class AddressComponents(object): all_values = self.component_bitset(forward_deps) + graph = {k: c['dependencies'] for k, c in six.iteritems(forward_deps)} + graph.update({c: [] for c in AddressFormatter.address_formatter_fields if c not in graph}) + self.component_dependency_order = [c for c in topsort(graph) if graph[c]] + for component, conf in six.iteritems(forward_deps): deps = conf['dependencies'] self.component_dependencies[component] = self.component_bitset(deps) if deps else all_values @@ -990,7 +995,10 @@ class AddressComponents(object): if not address_components: return component_bitset = self.component_bitset(address_components) - for c in list(address_components): + + for c in self.component_dependency_order: + if c not in address_components: + continue if c in self.component_dependencies and not component_bitset & self.component_dependencies[c]: address_components.pop(c) component_bitset ^= self.component_bit_values[c] diff --git a/scripts/geodata/graph/topsort.py b/scripts/geodata/graph/topsort.py new file mode 100644 index 00000000..f903e841 --- /dev/null +++ b/scripts/geodata/graph/topsort.py @@ -0,0 +1,32 @@ + +def topsort(graph): + ''' + Topological sort for a dependency graph, e.g. + + Usage: + + >>> graph = { + 'a': ['b'], + 'b': ['d'], + 'c': ['d', 'a'], + 'd': [], + } + >>> topsort(graph) + + Returns: ['d', 'b', 'a', 'c'] + + ''' + todos = set(graph.keys()) + seen = set() + result = [] + while todos: + for key in todos: + deps = graph[key] + if len([d for d in deps if d in seen]) == len(deps): + break + else: + raise Exception('Cycle: {}'.format(todos)) + todos.remove(key) + result.append(key) + seen.add(key) + return result