[subdivisions/buildings] Adding subdivisions and buildings rtree to training data for getting building height, zone
This commit is contained in:
@@ -98,11 +98,27 @@ class OSMAddressFormatter(object):
|
||||
])
|
||||
)
|
||||
|
||||
def __init__(self, components):
|
||||
zones = {
|
||||
'landuse': {
|
||||
'retail': AddressComponents.zones.COMMERCIAL,
|
||||
'commercial': AddressComponents.zones.COMMERCIAL,
|
||||
'industrial': AddressComponents.zones.INDUSTRIAL,
|
||||
'residential': AddressComponents.zones.RESIDENTIAL,
|
||||
},
|
||||
'amenity': {
|
||||
'university': AddressComponents.zones.UNIVERSITY,
|
||||
'college': AddressComponents.zones.UNIVERSITY,
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, components, subdivisions_rtree, buildings_rtree):
|
||||
# Instance of AddressComponents, contains structures for reverse geocoding, etc.
|
||||
self.components = components
|
||||
self.language_rtree = components.language_rtree
|
||||
|
||||
self.subdivisions_rtree = subdivisions_rtree
|
||||
self.buildings_rtree = buildings_rtree
|
||||
|
||||
self.config = yaml.load(open(OSM_PARSER_DATA_DEFAULT_CONFIG))
|
||||
self.formatter = AddressFormatter()
|
||||
|
||||
@@ -137,6 +153,37 @@ class OSMAddressFormatter(object):
|
||||
address_components = {k: v for k, v in six.iteritems(address_components) if k in AddressFormatter.address_formatter_fields}
|
||||
return address_components
|
||||
|
||||
def subdivision_components(self, latitude, longitude):
|
||||
return self.subdivisions_rtree.point_in_poly(latitude, longitude, return_all=True)
|
||||
|
||||
def zone(self, subdivisions):
|
||||
for subdiv in subdivisions:
|
||||
for k, v in six.iteritems(self.zones):
|
||||
zone = v.get(subdiv.get(k))
|
||||
if zone:
|
||||
return zone
|
||||
return None
|
||||
|
||||
def building_components(self, latitude, longitude):
|
||||
return self.buildings_rtree.point_in_poly(latitude, longitude, return_all=True)
|
||||
|
||||
def num_floors(self, buildings, key='building:levels'):
|
||||
max_floors = None
|
||||
for b in buildings:
|
||||
num_floors = b.get(key)
|
||||
if num_floors is not None:
|
||||
try:
|
||||
num_floors = int(num_floors)
|
||||
except (ValueError, TypeError):
|
||||
try:
|
||||
num_floors = int(float(num_floors))
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
if max_floors is not None and num_floors > max_floors:
|
||||
max_floors = num_floors
|
||||
return max_floors
|
||||
|
||||
def abbreviated_street(self, street, language):
|
||||
'''
|
||||
Street abbreviations
|
||||
@@ -330,7 +377,22 @@ class OSMAddressFormatter(object):
|
||||
|
||||
revised_tags = self.normalize_address_components(tags)
|
||||
|
||||
address_components, country, language = self.components.expanded(revised_tags, latitude, longitude)
|
||||
num_floors = None
|
||||
num_basements = None
|
||||
zone = None
|
||||
|
||||
building_components = self.building_components(latitude, longitude)
|
||||
if building_components:
|
||||
num_floors = self.num_floors(building_components)
|
||||
num_basements = self.num_floors(building_components, key='building:levels:underground')
|
||||
|
||||
subdivision_components = self.subdivision_components(latitude, longitude)
|
||||
if subdivision_components:
|
||||
zone = self.zone(subdivision_components)
|
||||
|
||||
address_components, country, language = self.components.expanded(revised_tags, latitude, longitude,
|
||||
num_floors=num_floors, num_basements=num_basements,
|
||||
zone=zone)
|
||||
|
||||
if not address_components:
|
||||
return None, None, None
|
||||
|
||||
@@ -447,23 +447,31 @@ if __name__ == '__main__':
|
||||
default=tempfile.gettempdir(),
|
||||
help='Temp directory to use')
|
||||
|
||||
parser.add_argument('-g', '--language-rtree-dir',
|
||||
parser.add_argument('--language-rtree-dir',
|
||||
required=True,
|
||||
help='Language RTree directory')
|
||||
|
||||
parser.add_argument('-r', '--rtree-dir',
|
||||
parser.add_argument('--rtree-dir',
|
||||
default=None,
|
||||
help='OSM reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('-q', '--quattroshapes-rtree-dir',
|
||||
parser.add_argument('--quattroshapes-rtree-dir',
|
||||
default=None,
|
||||
help='Quattroshapes reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('-d', '--geonames-db',
|
||||
parser.add_argument('--subdivisions-rtree-dir',
|
||||
default=None,
|
||||
help='Subdivisions reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--buildings-rtree-dir',
|
||||
default=None,
|
||||
help='Buildings reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--geonames-db',
|
||||
default=None,
|
||||
help='GeoNames db file')
|
||||
|
||||
parser.add_argument('-n', '--neighborhoods-rtree-dir',
|
||||
parser.add_argument('--neighborhoods-rtree-dir',
|
||||
default=None,
|
||||
help='Neighborhoods reverse geocoder RTree directory')
|
||||
|
||||
@@ -486,6 +494,14 @@ if __name__ == '__main__':
|
||||
if args.quattroshapes_rtree_dir:
|
||||
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
||||
|
||||
subdivisions_rtree = None
|
||||
if args.subdivisions_rtree_dir:
|
||||
subdivisions_rtree = OSMSubdivisionReverseGeocoder.load(args.subdivisions_rtree_dir)
|
||||
|
||||
buildings_rtree = None
|
||||
if args.subdivisions_rtree_dir:
|
||||
buildings_rtree = OSMBuildingReverseGeocoder.load(args.buildings_rtree)
|
||||
|
||||
geonames = None
|
||||
|
||||
if args.geonames_db:
|
||||
@@ -509,11 +525,11 @@ if __name__ == '__main__':
|
||||
|
||||
if args.address_file and args.format:
|
||||
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
osm_formatter = OSMAddressFormatter(components)
|
||||
osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree)
|
||||
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
|
||||
if args.address_file and args.limited_addresses:
|
||||
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
osm_formatter = OSMAddressFormatter(components, splitter=u' ')
|
||||
osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree, splitter=u' ')
|
||||
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
|
||||
if args.venues_file:
|
||||
build_venue_training_data(language_rtree, args.venues_file, args.out_dir)
|
||||
|
||||
Reference in New Issue
Block a user