[osm] moving osm_address_components to its own module
This commit is contained in:
@@ -14,7 +14,7 @@ from geodata.countries.country_names import *
|
||||
from geodata.language_id.disambiguation import *
|
||||
from geodata.language_id.sample import sample_random_language
|
||||
from geodata.names.normalization import name_affixes
|
||||
from geodata.osm.extract import osm_address_components
|
||||
from geodata.osm.components import osm_address_components
|
||||
from geodata.states.state_abbreviations import state_abbreviations
|
||||
|
||||
|
||||
|
||||
67
scripts/geodata/osm/components.py
Normal file
67
scripts/geodata/osm/components.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import os
|
||||
import yaml
|
||||
|
||||
from geodata.address_formatting.formatter import AddressFormatter
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
|
||||
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
||||
'resources', 'boundaries', 'osm')
|
||||
|
||||
|
||||
class OSMAddressComponents(object):
|
||||
'''
|
||||
Keeps a map of OSM keys and values to the standard components
|
||||
of an address like city, state, etc. used for address formatting.
|
||||
When we reverse geocode a point, it will fall into a number of
|
||||
polygons, and we simply need to assign the names of said polygons
|
||||
to an address field.
|
||||
'''
|
||||
|
||||
ADMIN_LEVEL = 'admin_level'
|
||||
|
||||
# These keys are country-independent
|
||||
global_keys = {
|
||||
'place': {
|
||||
'country': AddressFormatter.COUNTRY,
|
||||
'state': AddressFormatter.STATE,
|
||||
'region': AddressFormatter.STATE,
|
||||
'province': AddressFormatter.STATE,
|
||||
'county': AddressFormatter.STATE_DISTRICT,
|
||||
'island': AddressFormatter.ISLAND,
|
||||
'islet': AddressFormatter.ISLAND,
|
||||
'municipality': AddressFormatter.CITY,
|
||||
'city': AddressFormatter.CITY,
|
||||
'town': AddressFormatter.CITY,
|
||||
'township': AddressFormatter.CITY,
|
||||
'village': AddressFormatter.CITY,
|
||||
'hamlet': AddressFormatter.CITY,
|
||||
'borough': AddressFormatter.CITY_DISTRICT,
|
||||
'suburb': AddressFormatter.SUBURB,
|
||||
'quarter': AddressFormatter.SUBURB,
|
||||
'neighbourhood': AddressFormatter.SUBURB
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, boundaries_dir=OSM_BOUNDARIES_DIR):
|
||||
self.config = {}
|
||||
|
||||
for filename in os.listdir(boundaries_dir):
|
||||
if not filename.endswith('.yaml'):
|
||||
continue
|
||||
|
||||
country_code = filename.rsplit('.yaml', 1)[0]
|
||||
data = yaml.load(open(os.path.join(boundaries_dir, filename)))
|
||||
for prop, values in data.iteritems():
|
||||
for k, v in values.iteritems():
|
||||
if v not in AddressFormatter.address_formatter_fields:
|
||||
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
|
||||
self.config[country_code] = data
|
||||
|
||||
def get_component(self, country, prop, value):
|
||||
props = self.config.get(country, {}).get(prop, {})
|
||||
if not props and prop in self.global_keys:
|
||||
props = self.global_keys[prop]
|
||||
return props.get(value, None)
|
||||
|
||||
osm_address_components = OSMAddressComponents()
|
||||
@@ -6,30 +6,20 @@ Extracts nodes/ways/relations, their metadata and dependencies
|
||||
from .osm XML files.
|
||||
'''
|
||||
|
||||
import os
|
||||
import re
|
||||
import six
|
||||
import sys
|
||||
import urllib
|
||||
import ujson as json
|
||||
import yaml
|
||||
import HTMLParser
|
||||
|
||||
from collections import OrderedDict
|
||||
from lxml import etree
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
|
||||
from geodata.address_formatting.formatter import AddressFormatter
|
||||
from geodata.csv_utils import unicode_csv_reader
|
||||
from geodata.text.normalize import normalize_string, NORMALIZE_STRING_DECOMPOSE, NORMALIZE_STRING_LATIN_ASCII
|
||||
|
||||
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
|
||||
'resources', 'boundaries', 'osm')
|
||||
|
||||
from geodata.encoding import safe_decode
|
||||
|
||||
|
||||
WAY_OFFSET = 10 ** 15
|
||||
RELATION_OFFSET = 2 * 10 ** 15
|
||||
|
||||
@@ -178,61 +168,3 @@ def parse_osm_number_range(value):
|
||||
else:
|
||||
numbers.extend(non_breaking_dash_regex.split(safe_decode(val)))
|
||||
return numbers
|
||||
|
||||
|
||||
class OSMAddressComponents(object):
|
||||
'''
|
||||
Keeps a map of OSM keys and values to the standard components
|
||||
of an address like city, state, etc. used for address formatting.
|
||||
When we reverse geocode a point, it will fall into a number of
|
||||
polygons, and we simply need to assign the names of said polygons
|
||||
to an address field.
|
||||
'''
|
||||
|
||||
ADMIN_LEVEL = 'admin_level'
|
||||
|
||||
# These keys are country-independent
|
||||
global_keys = {
|
||||
'place': {
|
||||
'country': AddressFormatter.COUNTRY,
|
||||
'state': AddressFormatter.STATE,
|
||||
'region': AddressFormatter.STATE,
|
||||
'province': AddressFormatter.STATE,
|
||||
'county': AddressFormatter.STATE_DISTRICT,
|
||||
'island': AddressFormatter.ISLAND,
|
||||
'islet': AddressFormatter.ISLAND,
|
||||
'municipality': AddressFormatter.CITY,
|
||||
'city': AddressFormatter.CITY,
|
||||
'town': AddressFormatter.CITY,
|
||||
'township': AddressFormatter.CITY,
|
||||
'village': AddressFormatter.CITY,
|
||||
'hamlet': AddressFormatter.CITY,
|
||||
'borough': AddressFormatter.CITY_DISTRICT,
|
||||
'suburb': AddressFormatter.SUBURB,
|
||||
'quarter': AddressFormatter.SUBURB,
|
||||
'neighbourhood': AddressFormatter.SUBURB
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, boundaries_dir=OSM_BOUNDARIES_DIR):
|
||||
self.config = {}
|
||||
|
||||
for filename in os.listdir(boundaries_dir):
|
||||
if not filename.endswith('.yaml'):
|
||||
continue
|
||||
|
||||
country_code = filename.rsplit('.yaml', 1)[0]
|
||||
data = yaml.load(open(os.path.join(boundaries_dir, filename)))
|
||||
for prop, values in data.iteritems():
|
||||
for k, v in values.iteritems():
|
||||
if v not in AddressFormatter.address_formatter_fields:
|
||||
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
|
||||
self.config[country_code] = data
|
||||
|
||||
def get_component(self, country, prop, value):
|
||||
props = self.config.get(country, {}).get(prop, {})
|
||||
if not props and prop in self.global_keys:
|
||||
props = self.global_keys[prop]
|
||||
return props.get(value, None)
|
||||
|
||||
osm_address_components = OSMAddressComponents()
|
||||
|
||||
Reference in New Issue
Block a user