[osm] Moving parse_osm to a separate module, adding option to list dependencies
This commit is contained in:
63
scripts/geodata/osm/extract.py
Normal file
63
scripts/geodata/osm/extract.py
Normal file
@@ -0,0 +1,63 @@
|
||||
'''
|
||||
geodata.osm.extract
|
||||
-------------------
|
||||
|
||||
Extracts nodes/ways/relations, their metadata and dependencies
|
||||
from .osm XML files.
|
||||
'''
|
||||
|
||||
from collections import OrderedDict
|
||||
from lxml import etree
|
||||
|
||||
|
||||
WAY_OFFSET = 10 ** 15
|
||||
RELATION_OFFSET = 2 * 10 ** 15
|
||||
|
||||
ALL_OSM_TAGS = set(['node', 'way', 'relation'])
|
||||
WAYS_RELATIONS = set(['way', 'relation'])
|
||||
|
||||
|
||||
def parse_osm(filename, allowed_types=ALL_OSM_TAGS, dependencies=False):
|
||||
'''
|
||||
Parse a file in .osm format iteratively, generating tuples like:
|
||||
('node:1', OrderedDict([('lat', '12.34'), ('lon', '23.45')])),
|
||||
('node:2', OrderedDict([('lat', '12.34'), ('lon', '23.45')])),
|
||||
('node:3', OrderedDict([('lat', '12.34'), ('lon', '23.45')])),
|
||||
('node:4', OrderedDict([('lat', '12.34'), ('lon', '23.45')])),
|
||||
('way:4444', OrderedDict([('name', 'Main Street')]), [1,2,3,4])
|
||||
'''
|
||||
f = open(filename)
|
||||
parser = etree.iterparse(f)
|
||||
|
||||
single_type = len(allowed_types) == 1
|
||||
|
||||
for (_, elem) in parser:
|
||||
elem_id = long(elem.attrib.pop('id', 0))
|
||||
item_type = elem.tag
|
||||
if elem_id >= WAY_OFFSET and elem_id < RELATION_OFFSET:
|
||||
elem_id -= WAY_OFFSET
|
||||
item_type = 'way'
|
||||
elif elem_id >= RELATION_OFFSET:
|
||||
elem_id -= RELATION_OFFSET
|
||||
item_type = 'relation'
|
||||
|
||||
if item_type in allowed_types:
|
||||
attrs = OrderedDict(elem.attrib)
|
||||
deps = [] if dependencies else None
|
||||
|
||||
for e in elem.getchildren():
|
||||
if e.tag == 'tag':
|
||||
attrs[e.attrib['k']] = e.attrib['v']
|
||||
elif dependencies and item_type == 'way' and e.tag == 'nd':
|
||||
deps.append(long(e.attrib['ref']))
|
||||
elif dependencies and item_type == 'relation' and e.tag == 'member' and \
|
||||
e.attrib.get('type') in ('way', 'relation'):
|
||||
deps.append((long(e.attrib['ref']), e.attrib.get('role')))
|
||||
|
||||
key = elem_id if single_type else '{}:{}'.format(item_type, elem_id)
|
||||
yield key, attrs, deps
|
||||
|
||||
if elem.tag in ALL_OSM_TAGS:
|
||||
elem.clear()
|
||||
while elem.getprevious() is not None:
|
||||
del elem.getparent()[0]
|
||||
Reference in New Issue
Block a user