[openaddresses] Changes to OA config utilizing some of the new cleanup options. Adding language to brussels-fr and brussels-nl, adding New York and New Jersey statewide with the understanding that OSM components will be added in NJ and postcodes will be stripped of letters in NY

This commit is contained in:
Al
2016-08-23 00:38:43 -04:00
parent 8ec288d8f8
commit ed0b49884e
2 changed files with 31 additions and 16 deletions

View File

@@ -8,21 +8,25 @@ global:
separate_street_probability: 0.2
abbreviate_unit_probability: 0.3
separate_unit_probability: 0.2
field_map:
fields:
- field_name: NUMBER
component: house_number
- field_name: STREET
component: road
- field_name: UNIT
component: unit
- field_name: CITY
component: city
- field_name: REGION
component: state
- field_name: POSTCODE
component: postcode
# Units have strong restrictions, have to be a number or hyphenated number
non_numeric_units: false
countries:
au:
field_map:
fields:
- field_name: NUMBER
component: house_number
- field_name: STREET
@@ -38,24 +42,20 @@ countries:
files:
- filename: countrywide.csv
non_numeric_units: true
nz:
field_map:
- field_name: NUMBER
component: house_number
- field_name: STREET
component: road
- field_name: POSTCODE
component: postcode
files:
- filename: countrywide.csv
- filename: city_of_palmerston_north.csv
be:
subdirs:
wa:
files:
- filename: brussels-fr.csv
language: fr
- filename: brussels-nl.csv
language: nl
us:
cldr_country_probability: 0.05
subdirs:
@@ -115,3 +115,18 @@ countries:
- filename: ventura.csv
- filename: yolo.csv
- filename: yuba.csv
nj:
add:
state: NJ
files:
- filename: statewide.csv
add_osm_boundaries: true
ny:
add:
state: NY
files:
- filename: statewide.csv
strip_alpha_from_postcode: true

View File

@@ -111,17 +111,17 @@ class OpenAddressesFormatter(object):
add_components = self.get_property('add', *configs)
field_map = self.get_property('field_map', *configs)
if not field_map:
fields = self.get_property('fields', *configs)
if not fields:
return
field_map = {f['field_name']: f['component'] for f in field_map}
fields = {f['field_name']: f['component'] for f in fields}
f = open(path)
reader = unicode_csv_reader(f)
headers = reader.next()
header_indices = {i: field_map[k] for i, k in enumerate(headers) if k in field_map}
header_indices = {i: fields[k] for i, k in enumerate(headers) if k in fields}
latitude_index = headers.index('LAT')
longitude_index = headers.index('LON')