[subdivisions/buildings] Adding subdivisions and buildings rtree to training data for getting building height, zone

This commit is contained in:
Al
2016-05-23 14:51:44 -04:00
parent 52aa95c213
commit 2e4ba6e6cc
2 changed files with 87 additions and 9 deletions

View File

@@ -98,11 +98,27 @@ class OSMAddressFormatter(object):
])
)
def __init__(self, components):
zones = {
'landuse': {
'retail': AddressComponents.zones.COMMERCIAL,
'commercial': AddressComponents.zones.COMMERCIAL,
'industrial': AddressComponents.zones.INDUSTRIAL,
'residential': AddressComponents.zones.RESIDENTIAL,
},
'amenity': {
'university': AddressComponents.zones.UNIVERSITY,
'college': AddressComponents.zones.UNIVERSITY,
}
}
def __init__(self, components, subdivisions_rtree, buildings_rtree):
# Instance of AddressComponents, contains structures for reverse geocoding, etc.
self.components = components
self.language_rtree = components.language_rtree
self.subdivisions_rtree = subdivisions_rtree
self.buildings_rtree = buildings_rtree
self.config = yaml.load(open(OSM_PARSER_DATA_DEFAULT_CONFIG))
self.formatter = AddressFormatter()
@@ -137,6 +153,37 @@ class OSMAddressFormatter(object):
address_components = {k: v for k, v in six.iteritems(address_components) if k in AddressFormatter.address_formatter_fields}
return address_components
def subdivision_components(self, latitude, longitude):
return self.subdivisions_rtree.point_in_poly(latitude, longitude, return_all=True)
def zone(self, subdivisions):
for subdiv in subdivisions:
for k, v in six.iteritems(self.zones):
zone = v.get(subdiv.get(k))
if zone:
return zone
return None
def building_components(self, latitude, longitude):
return self.buildings_rtree.point_in_poly(latitude, longitude, return_all=True)
def num_floors(self, buildings, key='building:levels'):
max_floors = None
for b in buildings:
num_floors = b.get(key)
if num_floors is not None:
try:
num_floors = int(num_floors)
except (ValueError, TypeError):
try:
num_floors = int(float(num_floors))
except (ValueError, TypeError):
continue
if max_floors is not None and num_floors > max_floors:
max_floors = num_floors
return max_floors
def abbreviated_street(self, street, language):
'''
Street abbreviations
@@ -330,7 +377,22 @@ class OSMAddressFormatter(object):
revised_tags = self.normalize_address_components(tags)
address_components, country, language = self.components.expanded(revised_tags, latitude, longitude)
num_floors = None
num_basements = None
zone = None
building_components = self.building_components(latitude, longitude)
if building_components:
num_floors = self.num_floors(building_components)
num_basements = self.num_floors(building_components, key='building:levels:underground')
subdivision_components = self.subdivision_components(latitude, longitude)
if subdivision_components:
zone = self.zone(subdivision_components)
address_components, country, language = self.components.expanded(revised_tags, latitude, longitude,
num_floors=num_floors, num_basements=num_basements,
zone=zone)
if not address_components:
return None, None, None

View File

@@ -447,23 +447,31 @@ if __name__ == '__main__':
default=tempfile.gettempdir(),
help='Temp directory to use')
parser.add_argument('-g', '--language-rtree-dir',
parser.add_argument('--language-rtree-dir',
required=True,
help='Language RTree directory')
parser.add_argument('-r', '--rtree-dir',
parser.add_argument('--rtree-dir',
default=None,
help='OSM reverse geocoder RTree directory')
parser.add_argument('-q', '--quattroshapes-rtree-dir',
parser.add_argument('--quattroshapes-rtree-dir',
default=None,
help='Quattroshapes reverse geocoder RTree directory')
parser.add_argument('-d', '--geonames-db',
parser.add_argument('--subdivisions-rtree-dir',
default=None,
help='Subdivisions reverse geocoder RTree directory')
parser.add_argument('--buildings-rtree-dir',
default=None,
help='Buildings reverse geocoder RTree directory')
parser.add_argument('--geonames-db',
default=None,
help='GeoNames db file')
parser.add_argument('-n', '--neighborhoods-rtree-dir',
parser.add_argument('--neighborhoods-rtree-dir',
default=None,
help='Neighborhoods reverse geocoder RTree directory')
@@ -486,6 +494,14 @@ if __name__ == '__main__':
if args.quattroshapes_rtree_dir:
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
subdivisions_rtree = None
if args.subdivisions_rtree_dir:
subdivisions_rtree = OSMSubdivisionReverseGeocoder.load(args.subdivisions_rtree_dir)
buildings_rtree = None
if args.subdivisions_rtree_dir:
buildings_rtree = OSMBuildingReverseGeocoder.load(args.buildings_rtree)
geonames = None
if args.geonames_db:
@@ -509,11 +525,11 @@ if __name__ == '__main__':
if args.address_file and args.format:
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
osm_formatter = OSMAddressFormatter(components)
osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree)
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
if args.address_file and args.limited_addresses:
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
osm_formatter = OSMAddressFormatter(components, splitter=u' ')
osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree, splitter=u' ')
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
if args.venues_file:
build_venue_training_data(language_rtree, args.venues_file, args.out_dir)