[osm] add new method in OSM formatting to extract one or more expanded postal codes from an addr:postcode tag, using the new country-specific rules

This commit is contained in:
Al
2017-02-11 00:53:52 -05:00
parent bbcb6444c8
commit ffc12ec5ab

View File

@@ -566,7 +566,7 @@ class OSMAddressFormatter(object):
def valid_postal_code(self, country, postal_code):
return PostalCodes.is_valid(postal_code, country)
def extract_valid_postal_codes(self, country, postal_code, validate=True):
def parse_valid_postal_codes(self, country, postal_code, validate=True):
'''
"Valid" postal codes
--------------------
@@ -606,6 +606,49 @@ class OSMAddressFormatter(object):
return postal_codes
def expand_postal_codes(self, postal_code, country, languages, osm_components):
'''
Expanded postal codes
---------------------
Clean up OSM addr:postcode tag. Sometimes it will be a full address
e.g. addr:postcode="750 Park Pl, Brooklyn, NY 11216", sometimes
just "NY 11216", etc.
'''
match = self.number_split_regex.search(postal_code)
valid = []
should_strip_components = PostalCodes.should_strip_components(country)
needs_validation = PostalCodes.needs_validation(country)
if not match:
if not should_strip_components and not needs_validation:
valid.append(postal_code)
return valid
if should_strip_components:
postal_code = self.components.strip_components(postal_code, osm_components, country, languages)
if not needs_validation or PostalCodes.is_valid(postal_code, country):
valid.append(PostalCodes.add_country_code(postal_code, country))
else:
candidates = self.number_split_regex.split(postal_code)
if not should_strip_components and not needs_validation:
return [c.strip() for c in candidates]
for candidate in candidates:
if should_strip_components:
candidate = self.components.strip_components(candidate, osm_components, country, languages)
if not candidate:
continue
# If we're splitting, validate every delimited phrase
if PostalCodes.is_valid(candidate, country):
valid.append(PostalCodes.add_country_code(candidate, country))
return valid
def cleanup_place_components(self, address_components, osm_components, country, language, containing_ids, population=None, keep_component=None, population_from_city=False):
revised_address_components = self.components.dropout_places(address_components, osm_components, country, language, population=population, population_from_city=population_from_city)
@@ -738,7 +781,7 @@ class OSMAddressFormatter(object):
postal_codes = []
if postal_code:
postal_codes = self.extract_valid_postal_codes(country, postal_code)
postal_codes = self.parse_valid_postal_codes(country, postal_code)
try:
population = int(tags.get('population', 0))
@@ -1012,6 +1055,9 @@ class OSMAddressFormatter(object):
if not (country and candidate_languages):
return None, None, None
all_local_languages = set([l for l, d in candidate_languages])
random_languages = set(INTERNET_LANGUAGE_DISTRIBUTION)
combined_street = self.combine_street_name(tags)
namespaced_language = self.namespaced_language(tags, candidate_languages)
@@ -1051,21 +1097,14 @@ class OSMAddressFormatter(object):
zone = None
postal_code = revised_tags.get(AddressFormatter.POSTCODE, None)
expanded_postal_codes = []
postcode_needs_validation = PostalCodes.needs_validation(country)
postcode_strip_components = PostalCodes.should_strip_components(country)
if postal_code:
expanded_postal_codes = self.expand_postal_codes(postal_code, osm_components, country, all_local_languages | random_languages)
if postal_code and u';' in postal_code:
postal_code = random.choice(postal_code.split(u';'))
if postal_code and u',' in postal_code:
for p in postal_code.split(u','):
if PostalCodes.is_valid(p, country):
revised_tags[AddressFormatter.POSTCODE] = postal_code = p.strip()
break
elif postcode_strip_components:
else:
if len(expanded_postal_codes) == 1:
revised_tags[AddressFormatter.POSTCODE] = expanded_postal_codes[0]
elif not expanded_postal_codes:
revised_tags.pop(AddressFormatter.POSTCODE)
postal_code = None
@@ -1091,6 +1130,14 @@ class OSMAddressFormatter(object):
if k not in revised_tags and k in (AddressFormatter.HOUSE_NUMBER, AddressFormatter.ROAD):
revised_tags[k] = v
elif k not in revised_tags and k == AddressFormatter.POSTCODE:
expanded_postal_codes = self.expand_postal_codes(v, osm_components, country, all_local_languages | random_languages)
if not expanded_postal_codes:
revised_tags.pop(AddressFormatter.POSTCODE)
postal_code = None
elif len(expanded_postal_codes) == 1:
revised_tags[AddressFormatter.POSTCODE] = expanded_postal_codes[0]
m = number_split_regex.search(v)
if not m:
@@ -1175,11 +1222,18 @@ class OSMAddressFormatter(object):
if alternate_house_number is not None:
original_house_number = address_components.get(AddressFormatter.HOUSE_NUMBER)
address_components[AddressFormatter.HOUSE_NUMBER] = alternate_house_number
formatted_addresses = self.formatted_addresses_with_venue_names(address_components, reduced_venue_names, country, language=language,
tag_components=tag_components, minimal_only=not tag_components)
formatted_addresses.extend(self.formatted_addresses_with_venue_names(address_components, reduced_venue_names, country, language=language,
tag_components=tag_components, minimal_only=not tag_components))
if original_house_number:
address_components[AddressFormatter.HOUSE_NUMBER] = original_house_number
if len(expanded_postal_codes) > 1:
for postal_code in expanded_postal_codes:
address_components[AddressFormatter.POSTCODE] = postal_code
self.components.add_postcode_phrase(address_components)
formatted_addresses.extend(self.formatted_addresses_with_venue_names(address_components, reduced_venue_names, country, language=language,
tag_components=tag_components, minimal_only=not tag_components))
if expanded_only_venue_names:
formatted_addresses.extend(self.formatted_addresses_with_venue_names(expanded_components, expanded_only_venue_names, country, language=language,
tag_components=tag_components, minimal_only=not tag_components))