[subdivisions/buildings] Adding subdivisions and buildings rtree to training data for getting building height, zone
This commit is contained in:
@@ -98,11 +98,27 @@ class OSMAddressFormatter(object):
|
|||||||
])
|
])
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, components):
|
zones = {
|
||||||
|
'landuse': {
|
||||||
|
'retail': AddressComponents.zones.COMMERCIAL,
|
||||||
|
'commercial': AddressComponents.zones.COMMERCIAL,
|
||||||
|
'industrial': AddressComponents.zones.INDUSTRIAL,
|
||||||
|
'residential': AddressComponents.zones.RESIDENTIAL,
|
||||||
|
},
|
||||||
|
'amenity': {
|
||||||
|
'university': AddressComponents.zones.UNIVERSITY,
|
||||||
|
'college': AddressComponents.zones.UNIVERSITY,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, components, subdivisions_rtree, buildings_rtree):
|
||||||
# Instance of AddressComponents, contains structures for reverse geocoding, etc.
|
# Instance of AddressComponents, contains structures for reverse geocoding, etc.
|
||||||
self.components = components
|
self.components = components
|
||||||
self.language_rtree = components.language_rtree
|
self.language_rtree = components.language_rtree
|
||||||
|
|
||||||
|
self.subdivisions_rtree = subdivisions_rtree
|
||||||
|
self.buildings_rtree = buildings_rtree
|
||||||
|
|
||||||
self.config = yaml.load(open(OSM_PARSER_DATA_DEFAULT_CONFIG))
|
self.config = yaml.load(open(OSM_PARSER_DATA_DEFAULT_CONFIG))
|
||||||
self.formatter = AddressFormatter()
|
self.formatter = AddressFormatter()
|
||||||
|
|
||||||
@@ -137,6 +153,37 @@ class OSMAddressFormatter(object):
|
|||||||
address_components = {k: v for k, v in six.iteritems(address_components) if k in AddressFormatter.address_formatter_fields}
|
address_components = {k: v for k, v in six.iteritems(address_components) if k in AddressFormatter.address_formatter_fields}
|
||||||
return address_components
|
return address_components
|
||||||
|
|
||||||
|
def subdivision_components(self, latitude, longitude):
|
||||||
|
return self.subdivisions_rtree.point_in_poly(latitude, longitude, return_all=True)
|
||||||
|
|
||||||
|
def zone(self, subdivisions):
|
||||||
|
for subdiv in subdivisions:
|
||||||
|
for k, v in six.iteritems(self.zones):
|
||||||
|
zone = v.get(subdiv.get(k))
|
||||||
|
if zone:
|
||||||
|
return zone
|
||||||
|
return None
|
||||||
|
|
||||||
|
def building_components(self, latitude, longitude):
|
||||||
|
return self.buildings_rtree.point_in_poly(latitude, longitude, return_all=True)
|
||||||
|
|
||||||
|
def num_floors(self, buildings, key='building:levels'):
|
||||||
|
max_floors = None
|
||||||
|
for b in buildings:
|
||||||
|
num_floors = b.get(key)
|
||||||
|
if num_floors is not None:
|
||||||
|
try:
|
||||||
|
num_floors = int(num_floors)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
try:
|
||||||
|
num_floors = int(float(num_floors))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if max_floors is not None and num_floors > max_floors:
|
||||||
|
max_floors = num_floors
|
||||||
|
return max_floors
|
||||||
|
|
||||||
def abbreviated_street(self, street, language):
|
def abbreviated_street(self, street, language):
|
||||||
'''
|
'''
|
||||||
Street abbreviations
|
Street abbreviations
|
||||||
@@ -330,7 +377,22 @@ class OSMAddressFormatter(object):
|
|||||||
|
|
||||||
revised_tags = self.normalize_address_components(tags)
|
revised_tags = self.normalize_address_components(tags)
|
||||||
|
|
||||||
address_components, country, language = self.components.expanded(revised_tags, latitude, longitude)
|
num_floors = None
|
||||||
|
num_basements = None
|
||||||
|
zone = None
|
||||||
|
|
||||||
|
building_components = self.building_components(latitude, longitude)
|
||||||
|
if building_components:
|
||||||
|
num_floors = self.num_floors(building_components)
|
||||||
|
num_basements = self.num_floors(building_components, key='building:levels:underground')
|
||||||
|
|
||||||
|
subdivision_components = self.subdivision_components(latitude, longitude)
|
||||||
|
if subdivision_components:
|
||||||
|
zone = self.zone(subdivision_components)
|
||||||
|
|
||||||
|
address_components, country, language = self.components.expanded(revised_tags, latitude, longitude,
|
||||||
|
num_floors=num_floors, num_basements=num_basements,
|
||||||
|
zone=zone)
|
||||||
|
|
||||||
if not address_components:
|
if not address_components:
|
||||||
return None, None, None
|
return None, None, None
|
||||||
|
|||||||
@@ -447,23 +447,31 @@ if __name__ == '__main__':
|
|||||||
default=tempfile.gettempdir(),
|
default=tempfile.gettempdir(),
|
||||||
help='Temp directory to use')
|
help='Temp directory to use')
|
||||||
|
|
||||||
parser.add_argument('-g', '--language-rtree-dir',
|
parser.add_argument('--language-rtree-dir',
|
||||||
required=True,
|
required=True,
|
||||||
help='Language RTree directory')
|
help='Language RTree directory')
|
||||||
|
|
||||||
parser.add_argument('-r', '--rtree-dir',
|
parser.add_argument('--rtree-dir',
|
||||||
default=None,
|
default=None,
|
||||||
help='OSM reverse geocoder RTree directory')
|
help='OSM reverse geocoder RTree directory')
|
||||||
|
|
||||||
parser.add_argument('-q', '--quattroshapes-rtree-dir',
|
parser.add_argument('--quattroshapes-rtree-dir',
|
||||||
default=None,
|
default=None,
|
||||||
help='Quattroshapes reverse geocoder RTree directory')
|
help='Quattroshapes reverse geocoder RTree directory')
|
||||||
|
|
||||||
parser.add_argument('-d', '--geonames-db',
|
parser.add_argument('--subdivisions-rtree-dir',
|
||||||
|
default=None,
|
||||||
|
help='Subdivisions reverse geocoder RTree directory')
|
||||||
|
|
||||||
|
parser.add_argument('--buildings-rtree-dir',
|
||||||
|
default=None,
|
||||||
|
help='Buildings reverse geocoder RTree directory')
|
||||||
|
|
||||||
|
parser.add_argument('--geonames-db',
|
||||||
default=None,
|
default=None,
|
||||||
help='GeoNames db file')
|
help='GeoNames db file')
|
||||||
|
|
||||||
parser.add_argument('-n', '--neighborhoods-rtree-dir',
|
parser.add_argument('--neighborhoods-rtree-dir',
|
||||||
default=None,
|
default=None,
|
||||||
help='Neighborhoods reverse geocoder RTree directory')
|
help='Neighborhoods reverse geocoder RTree directory')
|
||||||
|
|
||||||
@@ -486,6 +494,14 @@ if __name__ == '__main__':
|
|||||||
if args.quattroshapes_rtree_dir:
|
if args.quattroshapes_rtree_dir:
|
||||||
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
||||||
|
|
||||||
|
subdivisions_rtree = None
|
||||||
|
if args.subdivisions_rtree_dir:
|
||||||
|
subdivisions_rtree = OSMSubdivisionReverseGeocoder.load(args.subdivisions_rtree_dir)
|
||||||
|
|
||||||
|
buildings_rtree = None
|
||||||
|
if args.subdivisions_rtree_dir:
|
||||||
|
buildings_rtree = OSMBuildingReverseGeocoder.load(args.buildings_rtree)
|
||||||
|
|
||||||
geonames = None
|
geonames = None
|
||||||
|
|
||||||
if args.geonames_db:
|
if args.geonames_db:
|
||||||
@@ -509,11 +525,11 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
if args.address_file and args.format:
|
if args.address_file and args.format:
|
||||||
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||||
osm_formatter = OSMAddressFormatter(components)
|
osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree)
|
||||||
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
|
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
|
||||||
if args.address_file and args.limited_addresses:
|
if args.address_file and args.limited_addresses:
|
||||||
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||||
osm_formatter = OSMAddressFormatter(components, splitter=u' ')
|
osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree, splitter=u' ')
|
||||||
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
|
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
|
||||||
if args.venues_file:
|
if args.venues_file:
|
||||||
build_venue_training_data(language_rtree, args.venues_file, args.out_dir)
|
build_venue_training_data(language_rtree, args.venues_file, args.out_dir)
|
||||||
|
|||||||
Reference in New Issue
Block a user