diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 8aa19369..786d5b41 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -63,7 +63,7 @@ class AddressComponents(object): prefixes like "London Borough of", pruning duplicates like "Antwerpen, Antwerpen, Antwerpen". Usage: - >>> components = AddressComponents(osm_admin_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames) + >>> components = AddressComponents(osm_admin_rtree, neighborhoods_rtree, places_index) >>> components.expand({'name': 'Hackney Empire'}, 51.54559, -0.05567) Returns (results vary because of randomness): @@ -142,7 +142,7 @@ class AddressComponents(object): AddressFormatter.UNIT: Unit, } - def __init__(self, osm_admin_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames): + def __init__(self, osm_admin_rtree, neighborhoods_rtree, places_index): self.config = yaml.load(open(PARSER_DEFAULT_CONFIG)) self.setup_component_dependencies() @@ -152,8 +152,6 @@ class AddressComponents(object): self.osm_admin_rtree = osm_admin_rtree self.neighborhoods_rtree = neighborhoods_rtree self.places_index = places_index - self.quattroshapes_rtree = quattroshapes_rtree - self.geonames = geonames def setup_component_dependencies(self): self.component_dependencies = {} @@ -880,57 +878,6 @@ class AddressComponents(object): address_components.update(new_admin_components) - def quattroshapes_city(self, address_components, - latitude, longitude, - language, non_local_language=None, - always_use_full_names=False): - ''' - Quattroshapes/GeoNames cities - ----------------------------- - - Quattroshapes isn't great for everything, but it has decent city boundaries - in places where OSM sometimes does not (or at least in places where we aren't - currently able to create valid polygons). While Quattroshapes itself doesn't - reliably use local names, which we'll want for consistency, Quattroshapes cities - are linked with GeoNames, which has per-language localized names for most places. - ''' - - city = None - - qs_add_city_prob = float(nested_get(self.config, ('city', 'quattroshapes_geonames_backup_city_probability'))) - abbreviated_name_prob = float(nested_get(self.config, ('city', 'quattroshapes_geonames_abbreviated_probability'))) - - if AddressFormatter.CITY not in address_components and random.random() < qs_add_city_prob: - lang = non_local_language or language - quattroshapes_cities = self.quattroshapes_rtree.point_in_poly(latitude, longitude, return_all=True) - for result in quattroshapes_cities: - if result.get(self.quattroshapes_rtree.LEVEL) == self.quattroshapes_rtree.LOCALITY and self.quattroshapes_rtree.GEONAMES_ID in result: - geonames_id = int(result[self.quattroshapes_rtree.GEONAMES_ID].split(',')[0]) - names = self.geonames.get_alternate_names(geonames_id) - - if not names or lang not in names: - continue - - city = None - if 'abbr' not in names or non_local_language: - # Use the common city name in the target language - city = names[lang][0][0] - elif not always_use_full_names and random.random() < abbreviated_name_prob: - # Use an abbreviation: NYC, BK, SF, etc. - city = random.choice(names['abbr'])[0] - - if not city or not city.strip(): - continue - return city - break - else: - if non_local_language and AddressFormatter.CITY in address_components and ( - AddressFormatter.CITY_DISTRICT in address_components or - AddressFormatter.SUBURB in address_components): - address_components.pop(AddressFormatter.CITY) - - return city - generic_wiki_name_regex = re.compile('^[a-z]{2,3}:') @classmethod @@ -1484,12 +1431,6 @@ class AddressComponents(object): non_local_language=non_local_language, language_suffix=language_suffix) - city = self.quattroshapes_city(address_components, latitude, longitude, language, non_local_language=non_local_language) - if city: - city = self.normalized_place_name(city, AddressFormatter.CITY, all_osm_components, country=country, languages=all_languages) - if city: - address_components[AddressFormatter.CITY] = city - self.add_neighborhoods(address_components, neighborhoods, language_suffix=language_suffix) @@ -1596,14 +1537,6 @@ class AddressComponents(object): random_key=False, always_use_full_names=True) - city = self.quattroshapes_city(address_components, latitude, longitude, language, non_local_language=non_local_language, - always_use_full_names=True) - - if city: - city = self.normalized_place_name(city, AddressFormatter.CITY, all_osm_components, country=country, languages=all_languages) - if city: - address_components[AddressFormatter.CITY] = city - self.add_neighborhoods(address_components, neighborhoods, language_suffix=language_suffix) diff --git a/scripts/geodata/openaddresses/openaddresses_training_data.py b/scripts/geodata/openaddresses/openaddresses_training_data.py index 334b8c71..576862cb 100644 --- a/scripts/geodata/openaddresses/openaddresses_training_data.py +++ b/scripts/geodata/openaddresses/openaddresses_training_data.py @@ -12,11 +12,10 @@ import os from geodata.openaddresses.formatter import OpenAddressesFormatter from geodata.addresses.components import AddressComponents -from geodata.geonames.db import GeoNamesDB from geodata.polygons.language_polys import LanguagePolygonIndex from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder from geodata.places.reverse_geocode import PlaceReverseGeocoder -from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder, QuattroshapesReverseGeocoder +from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder if __name__ == '__main__': @@ -44,18 +43,10 @@ if __name__ == '__main__': default=None, help='OSM reverse geocoder RTree directory') - parser.add_argument('--quattroshapes-rtree-dir', - default=None, - help='Quattroshapes reverse geocoder RTree directory') - parser.add_argument('--places-index-dir', default=None, help='Places index directory') - parser.add_argument('--geonames-db', - default=None, - help='GeoNames db file') - parser.add_argument('--neighborhoods-rtree-dir', default=None, help='Neighborhoods reverse geocoder RTree directory') @@ -85,17 +76,8 @@ if __name__ == '__main__': if args.places_index_dir: places_index = PlaceReverseGeocoder.load(args.places_index_dir) - quattroshapes_rtree = None - if args.quattroshapes_rtree_dir: - quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir) - - geonames = None - - if args.geonames_db: - geonames = GeoNamesDB(args.geonames_db) - if args.openaddresses_dir and args.format: - components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames) + components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index) oa_formatter = OpenAddressesFormatter(components, country_rtree, debug=args.debug) oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 64fca501..d5dc6ecd 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -516,10 +516,6 @@ if __name__ == '__main__': if args.places_index_dir: places_index = PlaceReverseGeocoder.load(args.places_index_dir) - quattroshapes_rtree = None - if args.quattroshapes_rtree_dir: - quattroshapes_rtree = QuattroshapesReverseGeocoder.load(args.quattroshapes_rtree_dir) - metro_stations_index = None if args.metro_stations_index_dir: metro_stations_index = MetroStationReverseGeocoder.load(args.metro_stations_index_dir) @@ -532,11 +528,6 @@ if __name__ == '__main__': if args.buildings_rtree_dir: buildings_rtree = OSMBuildingReverseGeocoder.load(args.buildings_rtree_dir) - geonames = None - - if args.geonames_db: - geonames = GeoNamesDB(args.geonames_db) - # Can parallelize if args.streets_file: build_ways_training_data(country_rtree, args.streets_file, args.out_dir, abbreviate_streets=not args.unabbreviated) @@ -550,28 +541,24 @@ if __name__ == '__main__': parser.error('--rtree-dir required for formatted addresses') elif neighborhoods_rtree is None: parser.error('--neighborhoods-rtree-dir required for formatted addresses') - elif quattroshapes_rtree is None: - parser.error('--quattroshapes-rtree-dir required for formatted addresses') - elif geonames is None: - parser.error('--geonames-db required for formatted addresses') elif places_index is None: parser.error('--places-index-dir required for formatted addresses') if args.address_file and args.format: - components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames) + components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index) osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index) osm_formatter.build_training_data(args.address_file, args.out_dir, tag_components=not args.untagged) if args.address_file and args.limited_addresses: - components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames) + components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index) osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index, splitter=u' ') osm_formatter.build_limited_training_data(args.address_file, args.out_dir) if args.place_nodes_file and args.format: - components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames) + components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index) osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index) osm_formatter.build_place_training_data(args.place_nodes_file, args.out_dir, tag_components=not args.untagged) if args.intersections_file and args.format: - components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index, quattroshapes_rtree, geonames) + components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index) osm_formatter = OSMAddressFormatter(components, country_rtree, subdivisions_rtree, buildings_rtree, metro_stations_index) osm_formatter.build_intersections_training_data(args.intersections_file, args.out_dir, tag_components=not args.untagged)