[osm] Removing postal code keys in formatted language training data
This commit is contained in:
@@ -491,6 +491,12 @@ COUNTRY_KEYS = (
|
|||||||
'country_name',
|
'country_name',
|
||||||
'addr:country',
|
'addr:country',
|
||||||
)
|
)
|
||||||
|
POSTAL_KEYS = (
|
||||||
|
'postcode',
|
||||||
|
'postal_code',
|
||||||
|
'addr:postcode',
|
||||||
|
'addr:postal_code',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_address_format_training_data_limited(language_rtree, infile, out_dir):
|
def build_address_format_training_data_limited(language_rtree, infile, out_dir):
|
||||||
@@ -501,13 +507,15 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir):
|
|||||||
f = open(os.path.join(out_dir, ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME), 'w')
|
f = open(os.path.join(out_dir, ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME), 'w')
|
||||||
writer = csv.writer(f, 'tsv_no_quote')
|
writer = csv.writer(f, 'tsv_no_quote')
|
||||||
|
|
||||||
|
remove_keys = NAME_KEYS + COUNTRY_KEYS + POSTAL_KEYS
|
||||||
|
|
||||||
for key, value in parse_osm(infile):
|
for key, value in parse_osm(infile):
|
||||||
try:
|
try:
|
||||||
latitude, longitude = latlon_to_floats(value['lat'], value['lon'])
|
latitude, longitude = latlon_to_floats(value['lat'], value['lon'])
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for k in NAME_KEYS + COUNTRY_KEYS:
|
for k in remove_keys:
|
||||||
_ = value.pop(k, None)
|
_ = value.pop(k, None)
|
||||||
|
|
||||||
if not value:
|
if not value:
|
||||||
|
|||||||
Reference in New Issue
Block a user