[addresses] adding new places index in OSM and OpenAddresses training data
This commit is contained in:
@@ -62,7 +62,7 @@ class AddressComponents(object):
|
||||
prefixes like "London Borough of", pruning duplicates like "Antwerpen, Antwerpen, Antwerpen".
|
||||
|
||||
Usage:
|
||||
>>> components = AddressComponents(osm_admin_rtree, neighborhoods_rtree, buildings_rtree, subdivisions_rtree, quattroshapes_rtree, geonames)
|
||||
>>> components = AddressComponents(osm_admin_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
>>> components.expand({'name': 'Hackney Empire'}, 51.54559, -0.05567)
|
||||
|
||||
Returns (results vary because of randomness):
|
||||
|
||||
@@ -430,7 +430,7 @@ class OpenAddressesFormatter(object):
|
||||
unambiguous_city = False
|
||||
if add_osm_boundaries or AddressFormatter.CITY not in components:
|
||||
osm_components = self.components.osm_reverse_geocoded_components(latitude, longitude)
|
||||
self.components.add_admin_boundaries(components, osm_components, country, language)
|
||||
self.components.add_admin_boundaries(components, osm_components, country, language, latitude, longitude)
|
||||
categorized = self.components.categorized_osm_components(country, osm_components)
|
||||
for component, label in categorized:
|
||||
if label == AddressFormatter.CITY:
|
||||
|
||||
@@ -15,6 +15,7 @@ from geodata.addresses.components import AddressComponents
|
||||
from geodata.geonames.db import GeoNamesDB
|
||||
from geodata.polygons.language_polys import LanguagePolygonIndex
|
||||
from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder
|
||||
from geodata.places.reverse_geocode import PlaceReverseGeocoder
|
||||
from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder, QuattroshapesReverseGeocoder
|
||||
|
||||
|
||||
@@ -47,6 +48,10 @@ if __name__ == '__main__':
|
||||
default=None,
|
||||
help='Quattroshapes reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--places-index-dir',
|
||||
default=None,
|
||||
help='Places index directory')
|
||||
|
||||
parser.add_argument('--geonames-db',
|
||||
default=None,
|
||||
help='GeoNames db file')
|
||||
@@ -76,6 +81,10 @@ if __name__ == '__main__':
|
||||
if args.neighborhoods_rtree_dir:
|
||||
neighborhoods_rtree = NeighborhoodReverseGeocoder.load(args.neighborhoods_rtree_dir)
|
||||
|
||||
places_index = None
|
||||
if args.places_index_dir:
|
||||
places_index = PlaceReverseGeocoder.load(args.places_index_dir)
|
||||
|
||||
quattroshapes_rtree = None
|
||||
if args.quattroshapes_rtree_dir:
|
||||
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
||||
@@ -86,7 +95,7 @@ if __name__ == '__main__':
|
||||
geonames = GeoNamesDB(args.geonames_db)
|
||||
|
||||
if args.openaddresses_dir and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
|
||||
oa_formatter = OpenAddressesFormatter(components, country_rtree, debug=args.debug)
|
||||
oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
@@ -646,6 +646,7 @@ class OSMAddressFormatter(object):
|
||||
address_components = {component_name: name}
|
||||
|
||||
self.components.add_admin_boundaries(address_components, osm_components, country, UNKNOWN_LANGUAGE,
|
||||
latitude, longitude,
|
||||
random_key=num_references > 1,
|
||||
language_suffix=language_suffix,
|
||||
drop_duplicate_city_names=False)
|
||||
@@ -689,6 +690,7 @@ class OSMAddressFormatter(object):
|
||||
for i in xrange(n):
|
||||
address_components = {component_name: name}
|
||||
self.components.add_admin_boundaries(address_components, osm_components, country, language,
|
||||
latitude, longitude,
|
||||
random_key=is_default,
|
||||
language_suffix=language_suffix,
|
||||
drop_duplicate_city_names=False)
|
||||
@@ -725,6 +727,7 @@ class OSMAddressFormatter(object):
|
||||
for i in xrange(num_references / 2 if language == ENGLISH else min_references / 2):
|
||||
address_components = {component_name: name}
|
||||
self.components.add_admin_boundaries(address_components, osm_components, country, language,
|
||||
latitude, longitude,
|
||||
random_key=False,
|
||||
non_local_language=language,
|
||||
language_suffix=language_suffix,
|
||||
|
||||
@@ -65,6 +65,7 @@ from geodata.metro_stations.reverse_geocode import MetroStationReverseGeocoder
|
||||
from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder
|
||||
from geodata.osm.extract import *
|
||||
from geodata.osm.formatter import OSMAddressFormatter
|
||||
from geodata.places.reverse_geocode import PlaceReverseGeocoder
|
||||
from geodata.polygons.language_polys import *
|
||||
from geodata.polygons.reverse_geocode import *
|
||||
from geodata.i18n.unicode_paths import DATA_DIR
|
||||
@@ -471,6 +472,10 @@ if __name__ == '__main__':
|
||||
default=None,
|
||||
help='Quattroshapes reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--places-index-dir',
|
||||
default=None,
|
||||
help='Places index directory')
|
||||
|
||||
parser.add_argument('--metro-stations-index-dir',
|
||||
default=None,
|
||||
help='Metro stations reverse geocoder directory')
|
||||
@@ -507,6 +512,10 @@ if __name__ == '__main__':
|
||||
if args.neighborhoods_rtree_dir:
|
||||
neighborhoods_rtree = NeighborhoodReverseGeocoder.load(args.neighborhoods_rtree_dir)
|
||||
|
||||
places_index = None
|
||||
if args.places_index_dir:
|
||||
places_index = PlaceReverseGeocoder.load(args.places_index_dir)
|
||||
|
||||
quattroshapes_rtree = None
|
||||
if args.quattroshapes_rtree_dir:
|
||||
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
||||
@@ -545,22 +554,24 @@ if __name__ == '__main__':
|
||||
parser.error('--quattroshapes-rtree-dir required for formatted addresses')
|
||||
elif geonames is None:
|
||||
parser.error('--geonames-db required for formatted addresses')
|
||||
elif places_index is None:
|
||||
parser.error('--places-index-dir required for formatted addresses')
|
||||
|
||||
if args.address_file and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
|
||||
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
|
||||
if args.address_file and args.limited_addresses:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index, splitter=u' ')
|
||||
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
|
||||
|
||||
if args.place_nodes_file and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
|
||||
osm_formatter.build_place_training_data(args.place_nodes_file, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
if args.intersections_file and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
|
||||
osm_formatter.build_intersections_training_data(args.intersections_file, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
@@ -94,7 +94,7 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('-i', '--osm-places-file',
|
||||
help='Path to OSM metro stations file')
|
||||
help='Path to OSM places file')
|
||||
|
||||
parser.add_argument('-p', '--precision',
|
||||
type=int,
|
||||
|
||||
Reference in New Issue
Block a user