[osm] Reverse geocoding to metro station only for addresess in Japan
This commit is contained in:
@@ -32,3 +32,4 @@ countries:
|
||||
combine_block_house_number_probability: 1.0
|
||||
block_phrase_probability: 0.4
|
||||
romaji_probability: 0.2
|
||||
add_metro_probability: 0.6
|
||||
@@ -143,7 +143,7 @@ class OSMAddressFormatter(object):
|
||||
|
||||
boundary_component_priorities = {k: i for i, k in enumerate(AddressFormatter.BOUNDARY_COMPONENTS_ORDERED)}
|
||||
|
||||
def __init__(self, components, subdivisions_rtree=None, buildings_rtree=None):
|
||||
def __init__(self, components, subdivisions_rtree=None, buildings_rtree=None, metro_stations_index=None):
|
||||
# Instance of AddressComponents, contains structures for reverse geocoding, etc.
|
||||
self.components = components
|
||||
self.language_rtree = components.language_rtree
|
||||
@@ -151,6 +151,8 @@ class OSMAddressFormatter(object):
|
||||
self.subdivisions_rtree = subdivisions_rtree
|
||||
self.buildings_rtree = buildings_rtree
|
||||
|
||||
self.metro_stations_index = metro_stations_index
|
||||
|
||||
self.config = yaml.load(open(OSM_PARSER_DATA_DEFAULT_CONFIG))
|
||||
self.formatter = AddressFormatter()
|
||||
|
||||
@@ -326,6 +328,37 @@ class OSMAddressFormatter(object):
|
||||
house_number = separator.join([block, house_number])
|
||||
address_components['addr:housenumber'] = house_number
|
||||
|
||||
def add_metro_station(self, address_components, latitude, longitude, language=None, default_language=None):
|
||||
'''
|
||||
Metro stations
|
||||
--------------
|
||||
|
||||
Particularly in Japan, where there are rarely named streets, metro stations are
|
||||
often used to help locate an address (landmarks may be used as well). Unlike in the
|
||||
rest of the world, metro stations in Japan are a semi-official component and used
|
||||
almost as frequently as street names or house number in other countries, so we would
|
||||
want libpostal's address parser to recognize Japanese train stations in both Kanji and Romaji.
|
||||
|
||||
It's possible at some point to extend this to generate the sorts of natural language
|
||||
directions we sometimes see in NYC and other large cities where a subway stop might be
|
||||
included parenthetically after the address e.g. 61 Wythe Ave (L train to Bedford).
|
||||
The subway stations in OSM are in a variety of formats, so this would need some massaging
|
||||
and a slightly more sophisticated phrase generator than what we employ for numeric components
|
||||
like apartment numbers.
|
||||
'''
|
||||
nearest_metro = self.metro_stations_index.nearest_point(latitude, longitude)
|
||||
if nearest_metro:
|
||||
name = None
|
||||
if language is not None:
|
||||
name = nearest_metro.get('name:{}'.format(language.lower()))
|
||||
if language == default_language:
|
||||
name = nearest_metro.get('name')
|
||||
else:
|
||||
name = nearest_metro.get('name')
|
||||
|
||||
if name:
|
||||
address_components[AddressFormatter.METRO_STATION] = name
|
||||
|
||||
def venue_names(self, props, languages):
|
||||
'''
|
||||
Venue names
|
||||
@@ -681,6 +714,11 @@ class OSMAddressFormatter(object):
|
||||
sub_building_tags = self.normalize_sub_building_components(tags)
|
||||
revised_tags.update(sub_building_tags)
|
||||
|
||||
# Only including nearest metro station in Japan
|
||||
if country == JAPAN:
|
||||
if random.random() < float(nested_get(self.config, ('countries', 'jp', 'add_metro_probability'), default=0.0)):
|
||||
self.add_metro_station(revised_tags, latitude, longitude, language, default_language=JAPANESE)
|
||||
|
||||
num_floors = None
|
||||
num_basements = None
|
||||
zone = None
|
||||
|
||||
Reference in New Issue
Block a user