[osm] OSM untagged formatted addresses now use the new language labeling scheme

This commit is contained in:
Al
2015-08-18 15:12:54 -04:00
parent 3daba2ddcd
commit c09cb4dd82

View File

@@ -619,8 +619,8 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir):
formatter = AddressFormatter()
formatted_file = open(os.path.join(out_dir, ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME), 'w')
formatted_writer = csv.writer(formatted_file, 'tsv_no_quote')
f = open(os.path.join(out_dir, ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME), 'w')
writer = csv.writer(f, 'tsv_no_quote')
for key, value in parse_osm(infile):
try:
@@ -628,20 +628,24 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir):
except Exception:
continue
country, default_languages, language_props = country_and_languages(language_rtree, latitude, longitude)
if not (country and default_languages):
continue
for key in NAME_KEYS + COUNTRY_KEYS:
_ = value.pop(key, None)
for k in NAME_KEYS + COUNTRY_KEYS:
_ = value.pop(k, None)
if not value:
continue
country, name_language = get_language_names(language_rtree, key, value, tag_prefix='addr:street')
if not name_language:
continue
formatted_address_untagged = formatter.format_address(country, value, tag_components=False)
if formatted_address_untagged is not None:
formatted_address_untagged = tsv_string(formatted_address_untagged)
formatted_writer.writerow((default_languages[0]['lang'], country, formatted_address_untagged))
for k, v in name_language.iteritems():
for s in v:
if k in languages:
writer.writerow((k, country, formatted_address_untagged))
i += 1
if i % 1000 == 0 and i > 0: