[addresses] Topological sort of address component dependencies so they get checked/removed in order
This commit is contained in:
@@ -22,6 +22,7 @@ from geodata.boundaries.names import boundary_names
|
||||
from geodata.configs.utils import nested_get
|
||||
from geodata.coordinates.conversion import latlon_to_decimal
|
||||
from geodata.countries.names import *
|
||||
from geodata.graph.topsort import topsort
|
||||
from geodata.language_id.disambiguation import *
|
||||
from geodata.language_id.sample import sample_random_language
|
||||
from geodata.math.floats import isclose
|
||||
@@ -155,6 +156,10 @@ class AddressComponents(object):
|
||||
|
||||
all_values = self.component_bitset(forward_deps)
|
||||
|
||||
graph = {k: c['dependencies'] for k, c in six.iteritems(forward_deps)}
|
||||
graph.update({c: [] for c in AddressFormatter.address_formatter_fields if c not in graph})
|
||||
self.component_dependency_order = [c for c in topsort(graph) if graph[c]]
|
||||
|
||||
for component, conf in six.iteritems(forward_deps):
|
||||
deps = conf['dependencies']
|
||||
self.component_dependencies[component] = self.component_bitset(deps) if deps else all_values
|
||||
@@ -990,7 +995,10 @@ class AddressComponents(object):
|
||||
if not address_components:
|
||||
return
|
||||
component_bitset = self.component_bitset(address_components)
|
||||
for c in list(address_components):
|
||||
|
||||
for c in self.component_dependency_order:
|
||||
if c not in address_components:
|
||||
continue
|
||||
if c in self.component_dependencies and not component_bitset & self.component_dependencies[c]:
|
||||
address_components.pop(c)
|
||||
component_bitset ^= self.component_bit_values[c]
|
||||
|
||||
32
scripts/geodata/graph/topsort.py
Normal file
32
scripts/geodata/graph/topsort.py
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
def topsort(graph):
|
||||
'''
|
||||
Topological sort for a dependency graph, e.g.
|
||||
|
||||
Usage:
|
||||
|
||||
>>> graph = {
|
||||
'a': ['b'],
|
||||
'b': ['d'],
|
||||
'c': ['d', 'a'],
|
||||
'd': [],
|
||||
}
|
||||
>>> topsort(graph)
|
||||
|
||||
Returns: ['d', 'b', 'a', 'c']
|
||||
|
||||
'''
|
||||
todos = set(graph.keys())
|
||||
seen = set()
|
||||
result = []
|
||||
while todos:
|
||||
for key in todos:
|
||||
deps = graph[key]
|
||||
if len([d for d in deps if d in seen]) == len(deps):
|
||||
break
|
||||
else:
|
||||
raise Exception('Cycle: {}'.format(todos))
|
||||
todos.remove(key)
|
||||
result.append(key)
|
||||
seen.add(key)
|
||||
return result
|
||||
Reference in New Issue
Block a user