[osm] checking for valid street names in OSM street-only training data so e.g. the street name is not just a simple number like "831"

This commit is contained in:
Al
2017-01-19 02:34:29 -05:00
parent 7c04627f60
commit a931c5ddc9
2 changed files with 5 additions and 2 deletions

View File

@@ -1502,9 +1502,12 @@ class AddressComponents(object):
invalid_street_regex = re.compile('^\s*(?:none|null|not applicable|n\s*/\s*a)\s*$', re.I) invalid_street_regex = re.compile('^\s*(?:none|null|not applicable|n\s*/\s*a)\s*$', re.I)
def street_name_is_valid(self, street):
return street is not None and not (self.invalid_street_regex.match(street) or not any((c.isalnum() for c in street)))
def cleanup_street(self, address_components): def cleanup_street(self, address_components):
street = address_components.get(AddressFormatter.ROAD) street = address_components.get(AddressFormatter.ROAD)
if street is not None and (self.invalid_street_regex.match(street) or not any((c.isalnum() for c in street))): if not self.street_name_is_valid(street):
address_components.pop(AddressFormatter.ROAD) address_components.pop(AddressFormatter.ROAD)
newline_regex = re.compile('[\n]+') newline_regex = re.compile('[\n]+')

View File

@@ -1597,7 +1597,7 @@ class OSMAddressFormatter(object):
for v, is_base in vals: for v, is_base in vals:
for street_name in v.split(';'): for street_name in v.split(';'):
street_name = street_name.strip() street_name = street_name.strip()
if street_name: if street_name and self.components.street_name_is_valid(street_name):
address_components = {AddressFormatter.ROAD: street_name} address_components = {AddressFormatter.ROAD: street_name}
self.components.add_admin_boundaries(address_components, osm_components, self.components.add_admin_boundaries(address_components, osm_components,