[openaddresses] Adding quick-and-dirty regex-based exclusion list for fields containing various patterns in OpenAddresses, to be used sparingly
This commit is contained in:
@@ -93,6 +93,9 @@ countries:
|
|||||||
us:
|
us:
|
||||||
cldr_country_probability: 0.05
|
cldr_country_probability: 0.05
|
||||||
add_osm_neighborhoods: true
|
add_osm_neighborhoods: true
|
||||||
|
ignore_fields_containing:
|
||||||
|
city:
|
||||||
|
- "\bcounty\b"
|
||||||
subdirs:
|
subdirs:
|
||||||
al:
|
al:
|
||||||
add:
|
add:
|
||||||
|
|||||||
@@ -128,6 +128,9 @@ class OpenAddressesFormatter(object):
|
|||||||
numeric_postcodes_only = bool(self.get_property('numeric_postcodes_only', *configs) or False)
|
numeric_postcodes_only = bool(self.get_property('numeric_postcodes_only', *configs) or False)
|
||||||
postcode_strip_non_digit_chars = bool(self.get_property('postcode_strip_non_digit_chars', *configs) or False)
|
postcode_strip_non_digit_chars = bool(self.get_property('postcode_strip_non_digit_chars', *configs) or False)
|
||||||
|
|
||||||
|
ignore_fields_containing = {field: re.compile(six.u('|').join([six.u('(?:{})').format(safe_decode(v)) for v in value]), re.I | re.UNICODE)
|
||||||
|
for field, value in six.iteritems(dict(self.get_property('ignore_fields_containing', *configs) or {}))}
|
||||||
|
|
||||||
language = self.get_property('language', *configs)
|
language = self.get_property('language', *configs)
|
||||||
|
|
||||||
add_components = self.get_property('add', *configs)
|
add_components = self.get_property('add', *configs)
|
||||||
@@ -174,6 +177,10 @@ class OpenAddressesFormatter(object):
|
|||||||
value = multiple_spaces_regex.sub(six.u(' '), value)
|
value = multiple_spaces_regex.sub(six.u(' '), value)
|
||||||
|
|
||||||
value = value.strip(', ')
|
value = value.strip(', ')
|
||||||
|
|
||||||
|
if key in ignore_fields_containing and ignore_fields_containing[key].search(value):
|
||||||
|
continue
|
||||||
|
|
||||||
if value:
|
if value:
|
||||||
components[key] = value
|
components[key] = value
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user