[addresses] remove Quattroshapes/GeoNames cities as they may have problematic names, and in any case we have point-based cities from OSM now
This commit is contained in:
@@ -63,7 +63,7 @@ class AddressComponents(object):
|
||||
prefixes like "London Borough of", pruning duplicates like "Antwerpen, Antwerpen, Antwerpen".
|
||||
|
||||
Usage:
|
||||
>>> components = AddressComponents(osm_admin_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
>>> components = AddressComponents(osm_admin_rtree, neighborhoods_rtree, places_index)
|
||||
>>> components.expand({'name': 'Hackney Empire'}, 51.54559, -0.05567)
|
||||
|
||||
Returns (results vary because of randomness):
|
||||
@@ -142,7 +142,7 @@ class AddressComponents(object):
|
||||
AddressFormatter.UNIT: Unit,
|
||||
}
|
||||
|
||||
def __init__(self, osm_admin_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames):
|
||||
def __init__(self, osm_admin_rtree, neighborhoods_rtree, places_index):
|
||||
self.config = yaml.load(open(PARSER_DEFAULT_CONFIG))
|
||||
|
||||
self.setup_component_dependencies()
|
||||
@@ -152,8 +152,6 @@ class AddressComponents(object):
|
||||
self.osm_admin_rtree = osm_admin_rtree
|
||||
self.neighborhoods_rtree = neighborhoods_rtree
|
||||
self.places_index = places_index
|
||||
self.quattroshapes_rtree = quattroshapes_rtree
|
||||
self.geonames = geonames
|
||||
|
||||
def setup_component_dependencies(self):
|
||||
self.component_dependencies = {}
|
||||
@@ -880,57 +878,6 @@ class AddressComponents(object):
|
||||
|
||||
address_components.update(new_admin_components)
|
||||
|
||||
def quattroshapes_city(self, address_components,
|
||||
latitude, longitude,
|
||||
language, non_local_language=None,
|
||||
always_use_full_names=False):
|
||||
'''
|
||||
Quattroshapes/GeoNames cities
|
||||
-----------------------------
|
||||
|
||||
Quattroshapes isn't great for everything, but it has decent city boundaries
|
||||
in places where OSM sometimes does not (or at least in places where we aren't
|
||||
currently able to create valid polygons). While Quattroshapes itself doesn't
|
||||
reliably use local names, which we'll want for consistency, Quattroshapes cities
|
||||
are linked with GeoNames, which has per-language localized names for most places.
|
||||
'''
|
||||
|
||||
city = None
|
||||
|
||||
qs_add_city_prob = float(nested_get(self.config, ('city', 'quattroshapes_geonames_backup_city_probability')))
|
||||
abbreviated_name_prob = float(nested_get(self.config, ('city', 'quattroshapes_geonames_abbreviated_probability')))
|
||||
|
||||
if AddressFormatter.CITY not in address_components and random.random() < qs_add_city_prob:
|
||||
lang = non_local_language or language
|
||||
quattroshapes_cities = self.quattroshapes_rtree.point_in_poly(latitude, longitude, return_all=True)
|
||||
for result in quattroshapes_cities:
|
||||
if result.get(self.quattroshapes_rtree.LEVEL) == self.quattroshapes_rtree.LOCALITY and self.quattroshapes_rtree.GEONAMES_ID in result:
|
||||
geonames_id = int(result[self.quattroshapes_rtree.GEONAMES_ID].split(',')[0])
|
||||
names = self.geonames.get_alternate_names(geonames_id)
|
||||
|
||||
if not names or lang not in names:
|
||||
continue
|
||||
|
||||
city = None
|
||||
if 'abbr' not in names or non_local_language:
|
||||
# Use the common city name in the target language
|
||||
city = names[lang][0][0]
|
||||
elif not always_use_full_names and random.random() < abbreviated_name_prob:
|
||||
# Use an abbreviation: NYC, BK, SF, etc.
|
||||
city = random.choice(names['abbr'])[0]
|
||||
|
||||
if not city or not city.strip():
|
||||
continue
|
||||
return city
|
||||
break
|
||||
else:
|
||||
if non_local_language and AddressFormatter.CITY in address_components and (
|
||||
AddressFormatter.CITY_DISTRICT in address_components or
|
||||
AddressFormatter.SUBURB in address_components):
|
||||
address_components.pop(AddressFormatter.CITY)
|
||||
|
||||
return city
|
||||
|
||||
generic_wiki_name_regex = re.compile('^[a-z]{2,3}:')
|
||||
|
||||
@classmethod
|
||||
@@ -1484,12 +1431,6 @@ class AddressComponents(object):
|
||||
non_local_language=non_local_language,
|
||||
language_suffix=language_suffix)
|
||||
|
||||
city = self.quattroshapes_city(address_components, latitude, longitude, language, non_local_language=non_local_language)
|
||||
if city:
|
||||
city = self.normalized_place_name(city, AddressFormatter.CITY, all_osm_components, country=country, languages=all_languages)
|
||||
if city:
|
||||
address_components[AddressFormatter.CITY] = city
|
||||
|
||||
self.add_neighborhoods(address_components, neighborhoods,
|
||||
language_suffix=language_suffix)
|
||||
|
||||
@@ -1596,14 +1537,6 @@ class AddressComponents(object):
|
||||
random_key=False,
|
||||
always_use_full_names=True)
|
||||
|
||||
city = self.quattroshapes_city(address_components, latitude, longitude, language, non_local_language=non_local_language,
|
||||
always_use_full_names=True)
|
||||
|
||||
if city:
|
||||
city = self.normalized_place_name(city, AddressFormatter.CITY, all_osm_components, country=country, languages=all_languages)
|
||||
if city:
|
||||
address_components[AddressFormatter.CITY] = city
|
||||
|
||||
self.add_neighborhoods(address_components, neighborhoods,
|
||||
language_suffix=language_suffix)
|
||||
|
||||
|
||||
@@ -12,11 +12,10 @@ import os
|
||||
from geodata.openaddresses.formatter import OpenAddressesFormatter
|
||||
|
||||
from geodata.addresses.components import AddressComponents
|
||||
from geodata.geonames.db import GeoNamesDB
|
||||
from geodata.polygons.language_polys import LanguagePolygonIndex
|
||||
from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder
|
||||
from geodata.places.reverse_geocode import PlaceReverseGeocoder
|
||||
from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder, QuattroshapesReverseGeocoder
|
||||
from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@@ -44,18 +43,10 @@ if __name__ == '__main__':
|
||||
default=None,
|
||||
help='OSM reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--quattroshapes-rtree-dir',
|
||||
default=None,
|
||||
help='Quattroshapes reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--places-index-dir',
|
||||
default=None,
|
||||
help='Places index directory')
|
||||
|
||||
parser.add_argument('--geonames-db',
|
||||
default=None,
|
||||
help='GeoNames db file')
|
||||
|
||||
parser.add_argument('--neighborhoods-rtree-dir',
|
||||
default=None,
|
||||
help='Neighborhoods reverse geocoder RTree directory')
|
||||
@@ -85,17 +76,8 @@ if __name__ == '__main__':
|
||||
if args.places_index_dir:
|
||||
places_index = PlaceReverseGeocoder.load(args.places_index_dir)
|
||||
|
||||
quattroshapes_rtree = None
|
||||
if args.quattroshapes_rtree_dir:
|
||||
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
||||
|
||||
geonames = None
|
||||
|
||||
if args.geonames_db:
|
||||
geonames = GeoNamesDB(args.geonames_db)
|
||||
|
||||
if args.openaddresses_dir and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index)
|
||||
|
||||
oa_formatter = OpenAddressesFormatter(components, country_rtree, debug=args.debug)
|
||||
oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
@@ -516,10 +516,6 @@ if __name__ == '__main__':
|
||||
if args.places_index_dir:
|
||||
places_index = PlaceReverseGeocoder.load(args.places_index_dir)
|
||||
|
||||
quattroshapes_rtree = None
|
||||
if args.quattroshapes_rtree_dir:
|
||||
quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir)
|
||||
|
||||
metro_stations_index = None
|
||||
if args.metro_stations_index_dir:
|
||||
metro_stations_index = MetroStationReverseGeocoder.load(args.metro_stations_index_dir)
|
||||
@@ -532,11 +528,6 @@ if __name__ == '__main__':
|
||||
if args.buildings_rtree_dir:
|
||||
buildings_rtree = OSMBuildingReverseGeocoder.load(args.buildings_rtree_dir)
|
||||
|
||||
geonames = None
|
||||
|
||||
if args.geonames_db:
|
||||
geonames = GeoNamesDB(args.geonames_db)
|
||||
|
||||
# Can parallelize
|
||||
if args.streets_file:
|
||||
build_ways_training_data(country_rtree, args.streets_file, args.out_dir, abbreviate_streets=not args.unabbreviated)
|
||||
@@ -550,28 +541,24 @@ if __name__ == '__main__':
|
||||
parser.error('--rtree-dir required for formatted addresses')
|
||||
elif neighborhoods_rtree is None:
|
||||
parser.error('--neighborhoods-rtree-dir required for formatted addresses')
|
||||
elif quattroshapes_rtree is None:
|
||||
parser.error('--quattroshapes-rtree-dir required for formatted addresses')
|
||||
elif geonames is None:
|
||||
parser.error('--geonames-db required for formatted addresses')
|
||||
elif places_index is None:
|
||||
parser.error('--places-index-dir required for formatted addresses')
|
||||
|
||||
if args.address_file and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
|
||||
osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged)
|
||||
if args.address_file and args.limited_addresses:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index, splitter=u' ')
|
||||
osm_formatter.build_limited_training_data(args.address_file, args.out_dir)
|
||||
|
||||
if args.place_nodes_file and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
|
||||
osm_formatter.build_place_training_data(args.place_nodes_file, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
if args.intersections_file and args.format:
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames)
|
||||
components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index)
|
||||
osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index)
|
||||
osm_formatter.build_intersections_training_data(args.intersections_file, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
Reference in New Issue
Block a user