diff --git a/resources/parser/data_sets/openaddresses.yaml b/resources/parser/data_sets/openaddresses.yaml index d8f1a9c9..a0ca6fb8 100644 --- a/resources/parser/data_sets/openaddresses.yaml +++ b/resources/parser/data_sets/openaddresses.yaml @@ -54,7 +54,7 @@ global: component: unit ignore_simple_units: &ignore_simple_units - - &simple_unit_regex "^(?:[A-Z]?[0-9]*|[0-9]*[A-Z]?)$" + - &simple_unit_regex "^(?:[A-Z]?\-?[0-9]*(?:[\\s]*/[\\s]*[0-9]*)?|[0-9]*(?:[\\s]*/[\\s]*[0-9]*)?-?[A-Z]?|[A-Z]*[0-9]+[A-Z]?/[0-9]*[A-Z][0-9]*|[0-9]*[A-Z][0-9*]/[0-9]+[A-Z]?)$" # Turned off by default to avoid performance penalty for reverse geocoding add_osm_boundaries: false @@ -3389,6 +3389,13 @@ countries: ignore_fields_containing: unit: *ignore_simple_units - filename: city_of_columbia.csv + fields: *fields_with_unit + non_numeric_units: true + ignore_fields_containing: + unit: + - *simple_unit_regex + # Too ambiguous + - "^park$" - filename: city_of_kansas_city.csv - filename: city_of_perryville.csv add_osm_boundaries: true