diff --git a/resources/parser/data_sets/openaddresses.yaml b/resources/parser/data_sets/openaddresses.yaml index fdfa1aa2..faa4a795 100644 --- a/resources/parser/data_sets/openaddresses.yaml +++ b/resources/parser/data_sets/openaddresses.yaml @@ -52,8 +52,8 @@ global: UNIT: component: unit - simple_unit_regex: &simple_unit_regex - - "^(?:[A-Z]?[0-9]*|[0-9]*[A-Z]?)$" + ignore_simple_units: &ignore_simple_units + - &simple_unit_regex "^(?:[A-Z]?[0-9]*|[0-9]*[A-Z]?)$" # Turned off by default to avoid performance penalty for reverse geocoding add_osm_boundaries: false @@ -1824,7 +1824,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units add_osm_boundaries: true city_replacements: city: Scotsdale @@ -2013,7 +2013,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: city_of_erie.csv - filename: city_of_fort_collins.csv add_osm_boundaries: true @@ -2321,6 +2321,8 @@ countries: state_district: DeKalb County ignore_fields_containing: unit: + # Ignore unit number without a type e.g. "123" or "A" + - *simple_unit_regex # Ignore units like "Fl 2" as that's a level in our parlance - "\\bfl\\b" - filename: dooly.csv @@ -2354,6 +2356,14 @@ countries: non_numeric_units: true city_replacements: state_district: Fayette County + - filename: forsyth.csv + add_osm_boundaries: false + fields: *fields_with_unit + non_numeric_units: true + city_replacements: + state_district: Forsyth County + ignore_fields_containing: + unit: *ignore_simple_units - filename: fulton.csv add_osm_boundaries: false fields: *fields_with_state_and_unit @@ -2366,10 +2376,15 @@ countries: non_numeric_units: true city_replacements: state_district: Gwinnett County + ignore_fields_containing: + unit: *ignore_simple_units - filename: habersham.csv add_osm_boundaries: false city_replacements: state_district: Habersham County + - filename: hall.csv + city_replacements: + state_district: Hall County - filename: harris.csv city_replacements: state_district: Harris County @@ -2385,6 +2400,9 @@ countries: - filename: irwin.csv city_replacements: state_district: Irwin County + - filename: jackson.csv + city_replacements: + state_district: Jackson County - filename: jefferson.csv city_replacements: state_district: Jefferson County @@ -2882,7 +2900,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units add_osm_boundaries: true city_replacements: state_district: Campbell County @@ -3159,7 +3177,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: city_of_columbia.csv - filename: city_of_kansas_city.csv - filename: city_of_perryville.csv @@ -3181,7 +3199,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: st_charles.csv city_replacements: state_district: Saint Charles County @@ -3612,7 +3630,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: broome.csv - filename: city_of_new_york.csv add_osm_boundaries: true @@ -3788,7 +3806,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: city_of_corvallis.csv - filename: city_of_salem.csv - filename: clatsop.csv @@ -3852,7 +3870,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: allegheny.csv - filename: armstrong.csv - filename: bedford.csv @@ -3875,7 +3893,7 @@ countries: add_osm_boundaries: true non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units city_replacements: state_district: Chester County - filename: clearfield.csv @@ -3932,7 +3950,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units add_osm_boundaries: true city_replacements: state_district: York County @@ -4351,7 +4369,7 @@ countries: fields: *fields_with_unit non_numeric_units: true ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units - filename: walla_walla.csv wi: @@ -4493,7 +4511,7 @@ countries: add_osm_boundaries: true fields: *fields_with_unit ignore_fields_containing: - unit: *simple_unit_regex + unit: *ignore_simple_units city_replacements: state_district: Teton County