[formatting] Adding OSM address components lookup by country

This commit is contained in:
Al
2015-11-17 11:39:34 -05:00
parent 0b74039a6a
commit b3ef8ded12

View File

@@ -6,13 +6,26 @@ Extracts nodes/ways/relations, their metadata and dependencies
from .osm XML files.
'''
import os
import re
import sys
import urllib
import ujson as json
import HTMLParser
from collections import OrderedDict
from lxml import etree
this_dir = os.path.realpath(os.path.dirname(__file__))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
from geodata.address_formatting.formatter import AddressFormatter
from geodata.csv_utils import unicode_csv_reader
OSM_BOUNDARIES_DIR = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
'resources', 'boundaries', 'osm')
from geodata.encoding import safe_decode
WAY_OFFSET = 10 ** 15
@@ -111,3 +124,37 @@ def osm_wikipedia_title_and_language(key, value):
value = value.rsplit(u':', 1)[-1]
return normalize_wikipedia_title(value), language
class OSMAddressComponents(object):
ADMIN_LEVEL = 'admin_level'
global_keys = {
'place': {
'city': AddressFormatter.CITY,
'suburb': AddressFormatter.SUBURB
}
}
def __init__(self):
self.config = {}
def configure(self, d=OSM_BOUNDARIES_DIR):
for filename in os.listdir(d):
if not filename.endswith('.json'):
continue
country_code = filename.rsplit('.json', 1)[0]
data = json.load(open(os.path.join(d, filename)))
for prop, values in data.iteritems():
for k, v in values.iteritems():
if v not in AddressFormatter.address_formatter_fields:
raise ValueError(u'Invalid value in {} for prop={}, key={}: {}'.format(filename, prop, k, v))
self.config[country_code] = data
self.config[None] = self.global_keys
def get_component(self, country, prop, value):
return self.config.get(country, {}).get(prop, {}).get(value, None)
osm_address_components = OSMAddressComponents()