[openaddresses] Adding quick-and-dirty regex-based exclusion list for fields containing various patterns in OpenAddresses, to be used sparingly
This commit is contained in:
@@ -93,6 +93,9 @@ countries:
|
||||
us:
|
||||
cldr_country_probability: 0.05
|
||||
add_osm_neighborhoods: true
|
||||
ignore_fields_containing:
|
||||
city:
|
||||
- "\bcounty\b"
|
||||
subdirs:
|
||||
al:
|
||||
add:
|
||||
|
||||
@@ -128,6 +128,9 @@ class OpenAddressesFormatter(object):
|
||||
numeric_postcodes_only = bool(self.get_property('numeric_postcodes_only', *configs) or False)
|
||||
postcode_strip_non_digit_chars = bool(self.get_property('postcode_strip_non_digit_chars', *configs) or False)
|
||||
|
||||
ignore_fields_containing = {field: re.compile(six.u('|').join([six.u('(?:{})').format(safe_decode(v)) for v in value]), re.I | re.UNICODE)
|
||||
for field, value in six.iteritems(dict(self.get_property('ignore_fields_containing', *configs) or {}))}
|
||||
|
||||
language = self.get_property('language', *configs)
|
||||
|
||||
add_components = self.get_property('add', *configs)
|
||||
@@ -174,6 +177,10 @@ class OpenAddressesFormatter(object):
|
||||
value = multiple_spaces_regex.sub(six.u(' '), value)
|
||||
|
||||
value = value.strip(', ')
|
||||
|
||||
if key in ignore_fields_containing and ignore_fields_containing[key].search(value):
|
||||
continue
|
||||
|
||||
if value:
|
||||
components[key] = value
|
||||
|
||||
|
||||
Reference in New Issue
Block a user