[addresses] adding new config for postal codes around the world. Allows appending the ISO alpha-2 country code to the beginning of the postcode as in e.g. SI-1000 (only used if the postcode begins with a digit). This system was used for postal codes in continental Europe as a recommendation from the CEPT. Now 7 member states still use it, so in those countries add the country-code with higher probability. The config also contains the license plate codes for countries where e.g. L-1234 might be used instead of LU-1234. Allows configuring in which countries postcodes should be validated using Google's per-country validation regexes (and the ability to override with a custom regex), and in which countries other admin component names should be stripped.

This commit is contained in:
Al
2017-02-10 18:38:32 -05:00
parent 109aa76718
commit 293587bae9
8 changed files with 447 additions and 29 deletions

View File

@@ -34,7 +34,6 @@ from geodata.configs.utils import nested_get
from geodata.countries.country_names import *
from geodata.language_id.disambiguation import *
from geodata.language_id.sample import INTERNET_LANGUAGE_DISTRIBUTION
from geodata.i18n.google import postcode_regexes
from geodata.i18n.languages import *
from geodata.intersections.query import Intersection, IntersectionQuery
from geodata.address_formatting.formatter import AddressFormatter
@@ -45,6 +44,7 @@ from geodata.osm.intersections import OSMIntersectionReader
from geodata.places.config import place_config
from geodata.polygons.language_polys import *
from geodata.polygons.reverse_geocode import *
from geodata.postal_codes.validation import postcode_regexes
from geodata.i18n.unicode_paths import DATA_DIR
from geodata.text.tokenize import tokenize, token_types
from geodata.text.utils import is_numeric
@@ -1101,10 +1101,11 @@ class OSMAddressFormatter(object):
if u';' in v:
v = random.choice(v.split(u';'))
for p in v.split(','):
if self.valid_postal_code(country, p):
revised_tags[AddressFormatter.POSTCODE] = p.strip()
break
if u',' in v:
for p in v.split(','):
if self.valid_postal_code(country, p):
revised_tags[AddressFormatter.POSTCODE] = p.strip()
break
elif k == AddressFormatter.HOUSE:
building_venue_names.append((v, building_is_generic_place, building_is_known_venue_type))