[osm] moving osm_address_components to its own module
This commit is contained in:
@@ -14,7 +14,7 @@ from geodata.countries.country_names import *
|
|||||||
from geodata.language_id.disambiguation import *
|
from geodata.language_id.disambiguation import *
|
||||||
from geodata.language_id.sample import sample_random_language
|
from geodata.language_id.sample import sample_random_language
|
||||||
from geodata.names.normalization import name_affixes
|
from geodata.names.normalization import name_affixes
|
||||||
from geodata.osm.extract import osm_address_components
|
from geodata.osm.components import osm_address_components
|
||||||
from geodata.states.state_abbreviations import state_abbreviations
|
from geodata.states.state_abbreviations import state_abbreviations
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
67
scripts/geodata/osm/components.py
Normal file
67
scripts/geodata/osm/components.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
|
|
||||||
|
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
||||||
|
'resources', 'boundaries', 'osm')
|
||||||
|
|
||||||
|
|
||||||
|
class OSMAddressComponents(object):
|
||||||
|
'''
|
||||||
|
Keeps a map of OSM keys and values to the standard components
|
||||||
|
of an address like city, state, etc. used for address formatting.
|
||||||
|
When we reverse geocode a point, it will fall into a number of
|
||||||
|
polygons, and we simply need to assign the names of said polygons
|
||||||
|
to an address field.
|
||||||
|
'''
|
||||||
|
|
||||||
|
ADMIN_LEVEL = 'admin_level'
|
||||||
|
|
||||||
|
# These keys are country-independent
|
||||||
|
global_keys = {
|
||||||
|
'place': {
|
||||||
|
'country': AddressFormatter.COUNTRY,
|
||||||
|
'state': AddressFormatter.STATE,
|
||||||
|
'region': AddressFormatter.STATE,
|
||||||
|
'province': AddressFormatter.STATE,
|
||||||
|
'county': AddressFormatter.STATE_DISTRICT,
|
||||||
|
'island': AddressFormatter.ISLAND,
|
||||||
|
'islet': AddressFormatter.ISLAND,
|
||||||
|
'municipality': AddressFormatter.CITY,
|
||||||
|
'city': AddressFormatter.CITY,
|
||||||
|
'town': AddressFormatter.CITY,
|
||||||
|
'township': AddressFormatter.CITY,
|
||||||
|
'village': AddressFormatter.CITY,
|
||||||
|
'hamlet': AddressFormatter.CITY,
|
||||||
|
'borough': AddressFormatter.CITY_DISTRICT,
|
||||||
|
'suburb': AddressFormatter.SUBURB,
|
||||||
|
'quarter': AddressFormatter.SUBURB,
|
||||||
|
'neighbourhood': AddressFormatter.SUBURB
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, boundaries_dir=OSM_BOUNDARIES_DIR):
|
||||||
|
self.config = {}
|
||||||
|
|
||||||
|
for filename in os.listdir(boundaries_dir):
|
||||||
|
if not filename.endswith('.yaml'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
country_code = filename.rsplit('.yaml', 1)[0]
|
||||||
|
data = yaml.load(open(os.path.join(boundaries_dir, filename)))
|
||||||
|
for prop, values in data.iteritems():
|
||||||
|
for k, v in values.iteritems():
|
||||||
|
if v not in AddressFormatter.address_formatter_fields:
|
||||||
|
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
|
||||||
|
self.config[country_code] = data
|
||||||
|
|
||||||
|
def get_component(self, country, prop, value):
|
||||||
|
props = self.config.get(country, {}).get(prop, {})
|
||||||
|
if not props and prop in self.global_keys:
|
||||||
|
props = self.global_keys[prop]
|
||||||
|
return props.get(value, None)
|
||||||
|
|
||||||
|
osm_address_components = OSMAddressComponents()
|
||||||
@@ -6,30 +6,20 @@ Extracts nodes/ways/relations, their metadata and dependencies
|
|||||||
from .osm XML files.
|
from .osm XML files.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import six
|
import six
|
||||||
import sys
|
|
||||||
import urllib
|
import urllib
|
||||||
import ujson as json
|
|
||||||
import yaml
|
|
||||||
import HTMLParser
|
import HTMLParser
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
|
||||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
|
||||||
|
|
||||||
from geodata.address_formatting.formatter import AddressFormatter
|
|
||||||
from geodata.csv_utils import unicode_csv_reader
|
from geodata.csv_utils import unicode_csv_reader
|
||||||
from geodata.text.normalize import normalize_string, NORMALIZE_STRING_DECOMPOSE, NORMALIZE_STRING_LATIN_ASCII
|
from geodata.text.normalize import normalize_string, NORMALIZE_STRING_DECOMPOSE, NORMALIZE_STRING_LATIN_ASCII
|
||||||
|
|
||||||
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
|
||||||
'resources', 'boundaries', 'osm')
|
|
||||||
|
|
||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
|
|
||||||
|
|
||||||
WAY_OFFSET = 10 ** 15
|
WAY_OFFSET = 10 ** 15
|
||||||
RELATION_OFFSET = 2 * 10 ** 15
|
RELATION_OFFSET = 2 * 10 ** 15
|
||||||
|
|
||||||
@@ -178,61 +168,3 @@ def parse_osm_number_range(value):
|
|||||||
else:
|
else:
|
||||||
numbers.extend(non_breaking_dash_regex.split(safe_decode(val)))
|
numbers.extend(non_breaking_dash_regex.split(safe_decode(val)))
|
||||||
return numbers
|
return numbers
|
||||||
|
|
||||||
|
|
||||||
class OSMAddressComponents(object):
|
|
||||||
'''
|
|
||||||
Keeps a map of OSM keys and values to the standard components
|
|
||||||
of an address like city, state, etc. used for address formatting.
|
|
||||||
When we reverse geocode a point, it will fall into a number of
|
|
||||||
polygons, and we simply need to assign the names of said polygons
|
|
||||||
to an address field.
|
|
||||||
'''
|
|
||||||
|
|
||||||
ADMIN_LEVEL = 'admin_level'
|
|
||||||
|
|
||||||
# These keys are country-independent
|
|
||||||
global_keys = {
|
|
||||||
'place': {
|
|
||||||
'country': AddressFormatter.COUNTRY,
|
|
||||||
'state': AddressFormatter.STATE,
|
|
||||||
'region': AddressFormatter.STATE,
|
|
||||||
'province': AddressFormatter.STATE,
|
|
||||||
'county': AddressFormatter.STATE_DISTRICT,
|
|
||||||
'island': AddressFormatter.ISLAND,
|
|
||||||
'islet': AddressFormatter.ISLAND,
|
|
||||||
'municipality': AddressFormatter.CITY,
|
|
||||||
'city': AddressFormatter.CITY,
|
|
||||||
'town': AddressFormatter.CITY,
|
|
||||||
'township': AddressFormatter.CITY,
|
|
||||||
'village': AddressFormatter.CITY,
|
|
||||||
'hamlet': AddressFormatter.CITY,
|
|
||||||
'borough': AddressFormatter.CITY_DISTRICT,
|
|
||||||
'suburb': AddressFormatter.SUBURB,
|
|
||||||
'quarter': AddressFormatter.SUBURB,
|
|
||||||
'neighbourhood': AddressFormatter.SUBURB
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, boundaries_dir=OSM_BOUNDARIES_DIR):
|
|
||||||
self.config = {}
|
|
||||||
|
|
||||||
for filename in os.listdir(boundaries_dir):
|
|
||||||
if not filename.endswith('.yaml'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
country_code = filename.rsplit('.yaml', 1)[0]
|
|
||||||
data = yaml.load(open(os.path.join(boundaries_dir, filename)))
|
|
||||||
for prop, values in data.iteritems():
|
|
||||||
for k, v in values.iteritems():
|
|
||||||
if v not in AddressFormatter.address_formatter_fields:
|
|
||||||
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
|
|
||||||
self.config[country_code] = data
|
|
||||||
|
|
||||||
def get_component(self, country, prop, value):
|
|
||||||
props = self.config.get(country, {}).get(prop, {})
|
|
||||||
if not props and prop in self.global_keys:
|
|
||||||
props = self.global_keys[prop]
|
|
||||||
return props.get(value, None)
|
|
||||||
|
|
||||||
osm_address_components = OSMAddressComponents()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user