[osm] addr:postcode can be all over the place in OSM. Start with postcodes containing commas or semicolons. If addr:postcode (on address of building) contains either, iterate over the values and pick the first one that matches a postcode validation regex for that country
This commit is contained in:
@@ -562,6 +562,17 @@ class OSMAddressFormatter(object):
|
|||||||
formatted_addresses.append(formatted_address)
|
formatted_addresses.append(formatted_address)
|
||||||
return formatted_addresses
|
return formatted_addresses
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def valid_postal_code(self, country, postal_code):
|
||||||
|
postcode_regex = postcode_regexes.get(country)
|
||||||
|
|
||||||
|
if postcode_regex:
|
||||||
|
postal_code = postal_code.strip()
|
||||||
|
match = postcode_regex.match(postal_code)
|
||||||
|
if match and match.end() == len(postal_code):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def extract_valid_postal_codes(self, country, postal_code, validate=True):
|
def extract_valid_postal_codes(self, country, postal_code, validate=True):
|
||||||
'''
|
'''
|
||||||
"Valid" postal codes
|
"Valid" postal codes
|
||||||
@@ -583,15 +594,11 @@ class OSMAddressFormatter(object):
|
|||||||
if postal_code:
|
if postal_code:
|
||||||
valid_postcode = False
|
valid_postcode = False
|
||||||
if validate:
|
if validate:
|
||||||
postcode_regex = postcode_regexes.get(country)
|
|
||||||
values = number_split_regex.split(postal_code)
|
values = number_split_regex.split(postal_code)
|
||||||
|
for p in values:
|
||||||
if postcode_regex:
|
if self.valid_postal_code(country, p):
|
||||||
for p in values:
|
valid_postcode = True
|
||||||
match = postcode_regex.match(p)
|
postal_codes.append(p)
|
||||||
if match and match.end() == len(p):
|
|
||||||
valid_postcode = True
|
|
||||||
postal_codes.append(p)
|
|
||||||
else:
|
else:
|
||||||
valid_postcode = True
|
valid_postcode = True
|
||||||
|
|
||||||
@@ -599,12 +606,9 @@ class OSMAddressFormatter(object):
|
|||||||
postal_codes = parse_osm_number_range(postal_code, parse_letter_range=False, max_range=1000)
|
postal_codes = parse_osm_number_range(postal_code, parse_letter_range=False, max_range=1000)
|
||||||
if validate:
|
if validate:
|
||||||
valid_postal_codes = []
|
valid_postal_codes = []
|
||||||
postcode_regex = postcode_regexes.get(country)
|
for pc in postal_codes:
|
||||||
if postcode_regex:
|
if self.valid_postal_code(country, pc):
|
||||||
for pc in postal_codes:
|
valid_postal_codes.append(pc)
|
||||||
match = postcode_regex.match(pc)
|
|
||||||
if match and match.end() == len(pc):
|
|
||||||
valid_postal_codes.append(pc)
|
|
||||||
postal_codes = valid_postal_codes
|
postal_codes = valid_postal_codes
|
||||||
|
|
||||||
return postal_codes
|
return postal_codes
|
||||||
@@ -776,13 +780,13 @@ class OSMAddressFormatter(object):
|
|||||||
language_suffix = ''
|
language_suffix = ''
|
||||||
|
|
||||||
if name and name.strip():
|
if name and name.strip():
|
||||||
if six.u(';') in name:
|
if u';' in name:
|
||||||
name = random.choice(name.split(six.u(';')))
|
name = random.choice(name.split(u';'))
|
||||||
elif six.u(',') in name:
|
elif u',' in name:
|
||||||
name = name.split(six.u(','), 1)[0]
|
name = name.split(u',', 1)[0]
|
||||||
|
|
||||||
if six.u('|') in name:
|
if u'|' in name:
|
||||||
name = name.replace(six.u('|'), six.u(''))
|
name = name.replace(u'|', u'')
|
||||||
|
|
||||||
name = self.components.strip_whitespace_and_hyphens(name)
|
name = self.components.strip_whitespace_and_hyphens(name)
|
||||||
|
|
||||||
@@ -1053,6 +1057,20 @@ class OSMAddressFormatter(object):
|
|||||||
num_basements = None
|
num_basements = None
|
||||||
zone = None
|
zone = None
|
||||||
|
|
||||||
|
postal_code = revised_tags.get(AddressFormatter.POSTCODE, None)
|
||||||
|
|
||||||
|
if postal_code and u';' in postal_code:
|
||||||
|
postal_code = random.choice(postal_code.split(u';'))
|
||||||
|
|
||||||
|
if postal_code and u',' in postal_code:
|
||||||
|
for p in postal_code.split(u','):
|
||||||
|
if self.valid_postal_code(country, p):
|
||||||
|
revised_tags[AddressFormatter.POSTCODE] = postal_code = p.strip()
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
revised_tags.pop(AddressFormatter.POSTCODE)
|
||||||
|
postal_code = None
|
||||||
|
|
||||||
building_venue_names = []
|
building_venue_names = []
|
||||||
|
|
||||||
building_components = self.building_components(latitude, longitude)
|
building_components = self.building_components(latitude, longitude)
|
||||||
@@ -1072,8 +1090,21 @@ class OSMAddressFormatter(object):
|
|||||||
building_is_known_venue_type = building_is_known_venue_type or self.is_known_venue_type(building_tags)
|
building_is_known_venue_type = building_is_known_venue_type or self.is_known_venue_type(building_tags)
|
||||||
|
|
||||||
for k, v in six.iteritems(building_tags):
|
for k, v in six.iteritems(building_tags):
|
||||||
if k not in revised_tags and k in (AddressFormatter.HOUSE_NUMBER, AddressFormatter.ROAD, AddressFormatter.POSTCODE):
|
if k not in revised_tags and k in (AddressFormatter.HOUSE_NUMBER, AddressFormatter.ROAD):
|
||||||
revised_tags[k] = v
|
revised_tags[k] = v
|
||||||
|
elif k not in revised_tags and k == AddressFormatter.POSTCODE:
|
||||||
|
m = number_split_regex.search(v)
|
||||||
|
|
||||||
|
if not m:
|
||||||
|
revised_tags[k] = v
|
||||||
|
else:
|
||||||
|
if u';' in v:
|
||||||
|
v = random.choice(v.split(u';'))
|
||||||
|
|
||||||
|
for p in v.split(','):
|
||||||
|
if self.valid_postal_code(country, p):
|
||||||
|
revised_tags[AddressFormatter.POSTCODE] = p.strip()
|
||||||
|
break
|
||||||
elif k == AddressFormatter.HOUSE:
|
elif k == AddressFormatter.HOUSE:
|
||||||
building_venue_names.append((v, building_is_generic_place, building_is_known_venue_type))
|
building_venue_names.append((v, building_is_generic_place, building_is_known_venue_type))
|
||||||
|
|
||||||
@@ -1156,8 +1187,6 @@ class OSMAddressFormatter(object):
|
|||||||
|
|
||||||
formatted_addresses.extend(self.formatted_places(address_components, country, language))
|
formatted_addresses.extend(self.formatted_places(address_components, country, language))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# In Japan an address without places is basically just house_number + metro_station (if given)
|
# In Japan an address without places is basically just house_number + metro_station (if given)
|
||||||
# However, where there are streets, it's useful to have address-only queries as well
|
# However, where there are streets, it's useful to have address-only queries as well
|
||||||
if country != Countries.JAPAN:
|
if country != Countries.JAPAN:
|
||||||
|
|||||||
Reference in New Issue
Block a user