[addresses] adding new places index in OSM and OpenAddresses training data

This commit is contained in:
Al
2016-12-05 18:34:09 -05:00
parent 628fecea59
commit da36b71829
6 changed files with 31 additions and 8 deletions

View File

@@ -646,6 +646,7 @@ class OSMAddressFormatter(object):
address_components = {component_name: name}
self.components.add_admin_boundaries(address_components, osm_components, country, UNKNOWN_LANGUAGE,
latitude, longitude,
random_key=num_references > 1,
language_suffix=language_suffix,
drop_duplicate_city_names=False)
@@ -689,6 +690,7 @@ class OSMAddressFormatter(object):
for i in xrange(n):
address_components = {component_name: name}
self.components.add_admin_boundaries(address_components, osm_components, country, language,
latitude, longitude,
random_key=is_default,
language_suffix=language_suffix,
drop_duplicate_city_names=False)
@@ -725,6 +727,7 @@ class OSMAddressFormatter(object):
for i in xrange(num_references / 2 if language == ENGLISH else min_references / 2):
address_components = {component_name: name}
self.components.add_admin_boundaries(address_components, osm_components, country, language,
latitude, longitude,
random_key=False,
non_local_language=language,
language_suffix=language_suffix,

View File

@@ -65,6 +65,7 @@ from geodata.metro_stations.reverse_geocode import MetroStationReverseGeocoder
from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder
from geodata.osm.extract import *
from geodata.osm.formatter import OSMAddressFormatter
from geodata.places.reverse_geocode import PlaceReverseGeocoder
from geodata.polygons.language_polys import *
from geodata.polygons.reverse_geocode import *
from geodata.i18n.unicode_paths import DATA_DIR
@@ -471,6 +472,10 @@ if __name__ == '__main__':
default=None,
help='Quattroshapes reverse geocoder RTree directory')
parser.add_argument('--places-index-dir',
default=None,
help='Places index directory')
parser.add_argument('--metro-stations-index-dir',
default=None,
help='Metro stations reverse geocoder directory')
@@ -507,6 +512,10 @@ if __name__ == '__main__':
if args.neighborhoods_rtree_dir:
neighborhoods_rtree = NeighborhoodReverseGeocoder.load(args.neighborhoods_rtree_dir)
places_index = None
if args.places_index_dir:
places_index = PlaceReverseGeocoder.load(args.places_index_dir)
quattroshapes_rtree = None
if args.quattroshapes_rtree_dir:
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
@@ -545,22 +554,24 @@ if __name__ == '__main__':
parser.error('--quattroshapes-rtree-dir required for formatted addresses')
elif geonames is None:
parser.error('--geonames-db required for formatted addresses')
elif places_index is None:
parser.error('--places-index-dir required for formatted addresses')
if args.address_file and args.format:
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
if args.address_file and args.limited_addresses:
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index, splitter=u' ')
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
if args.place_nodes_file and args.format:
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
osm_formatter.build_place_training_data(args.place_nodes_file, args.out_dir, tag_components=not args.untagged)
if args.intersections_file and args.format:
components = AddressComponents(osm_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
osm_formatter.build_intersections_training_data(args.intersections_file, args.out_dir, tag_components=not args.untagged)