[osm] moving osm_address_components to its own module

This commit is contained in:
Al
2016-05-05 19:00:27 -04:00
parent 4271f7f84b
commit ad81095879
3 changed files with 69 additions and 70 deletions

View File

@@ -14,7 +14,7 @@ from geodata.countries.country_names import *
from geodata.language_id.disambiguation import *
from geodata.language_id.sample import sample_random_language
from geodata.names.normalization import name_affixes
from geodata.osm.extract import osm_address_components
from geodata.osm.components import osm_address_components
from geodata.states.state_abbreviations import state_abbreviations

View File

@@ -0,0 +1,67 @@
import os
import yaml
from geodata.address_formatting.formatter import AddressFormatter
this_dir = os.path.realpath(os.path.dirname(__file__))
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
'resources', 'boundaries', 'osm')
class OSMAddressComponents(object):
'''
Keeps a map of OSM keys and values to the standard components
of an address like city, state, etc. used for address formatting.
When we reverse geocode a point, it will fall into a number of
polygons, and we simply need to assign the names of said polygons
to an address field.
'''
ADMIN_LEVEL = 'admin_level'
# These keys are country-independent
global_keys = {
'place': {
'country': AddressFormatter.COUNTRY,
'state': AddressFormatter.STATE,
'region': AddressFormatter.STATE,
'province': AddressFormatter.STATE,
'county': AddressFormatter.STATE_DISTRICT,
'island': AddressFormatter.ISLAND,
'islet': AddressFormatter.ISLAND,
'municipality': AddressFormatter.CITY,
'city': AddressFormatter.CITY,
'town': AddressFormatter.CITY,
'township': AddressFormatter.CITY,
'village': AddressFormatter.CITY,
'hamlet': AddressFormatter.CITY,
'borough': AddressFormatter.CITY_DISTRICT,
'suburb': AddressFormatter.SUBURB,
'quarter': AddressFormatter.SUBURB,
'neighbourhood': AddressFormatter.SUBURB
}
}
def __init__(self, boundaries_dir=OSM_BOUNDARIES_DIR):
self.config = {}
for filename in os.listdir(boundaries_dir):
if not filename.endswith('.yaml'):
continue
country_code = filename.rsplit('.yaml', 1)[0]
data = yaml.load(open(os.path.join(boundaries_dir, filename)))
for prop, values in data.iteritems():
for k, v in values.iteritems():
if v not in AddressFormatter.address_formatter_fields:
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
self.config[country_code] = data
def get_component(self, country, prop, value):
props = self.config.get(country, {}).get(prop, {})
if not props and prop in self.global_keys:
props = self.global_keys[prop]
return props.get(value, None)
osm_address_components = OSMAddressComponents()

View File

@@ -6,30 +6,20 @@ Extracts nodes/ways/relations, their metadata and dependencies
from .osm XML files.
'''
import os
import re
import six
import sys
import urllib
import ujson as json
import yaml
import HTMLParser
from collections import OrderedDict
from lxml import etree
this_dir = os.path.realpath(os.path.dirname(__file__))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
from geodata.address_formatting.formatter import AddressFormatter
from geodata.csv_utils import unicode_csv_reader
from geodata.text.normalize import normalize_string, NORMALIZE_STRING_DECOMPOSE, NORMALIZE_STRING_LATIN_ASCII
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
'resources', 'boundaries', 'osm')
from geodata.encoding import safe_decode
WAY_OFFSET = 10 ** 15
RELATION_OFFSET = 2 * 10 ** 15
@@ -178,61 +168,3 @@ def parse_osm_number_range(value):
else:
numbers.extend(non_breaking_dash_regex.split(safe_decode(val)))
return numbers
class OSMAddressComponents(object):
'''
Keeps a map of OSM keys and values to the standard components
of an address like city, state, etc. used for address formatting.
When we reverse geocode a point, it will fall into a number of
polygons, and we simply need to assign the names of said polygons
to an address field.
'''
ADMIN_LEVEL = 'admin_level'
# These keys are country-independent
global_keys = {
'place': {
'country': AddressFormatter.COUNTRY,
'state': AddressFormatter.STATE,
'region': AddressFormatter.STATE,
'province': AddressFormatter.STATE,
'county': AddressFormatter.STATE_DISTRICT,
'island': AddressFormatter.ISLAND,
'islet': AddressFormatter.ISLAND,
'municipality': AddressFormatter.CITY,
'city': AddressFormatter.CITY,
'town': AddressFormatter.CITY,
'township': AddressFormatter.CITY,
'village': AddressFormatter.CITY,
'hamlet': AddressFormatter.CITY,
'borough': AddressFormatter.CITY_DISTRICT,
'suburb': AddressFormatter.SUBURB,
'quarter': AddressFormatter.SUBURB,
'neighbourhood': AddressFormatter.SUBURB
}
}
def __init__(self, boundaries_dir=OSM_BOUNDARIES_DIR):
self.config = {}
for filename in os.listdir(boundaries_dir):
if not filename.endswith('.yaml'):
continue
country_code = filename.rsplit('.yaml', 1)[0]
data = yaml.load(open(os.path.join(boundaries_dir, filename)))
for prop, values in data.iteritems():
for k, v in values.iteritems():
if v not in AddressFormatter.address_formatter_fields:
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
self.config[country_code] = data
def get_component(self, country, prop, value):
props = self.config.get(country, {}).get(prop, {})
if not props and prop in self.global_keys:
props = self.global_keys[prop]
return props.get(value, None)
osm_address_components = OSMAddressComponents()