[osm] OSM untagged formatted addresses try to use language namespaced tags
This commit is contained in:
@@ -638,18 +638,31 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir):
|
|||||||
if not name_language:
|
if not name_language:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
formatted_address_untagged = formatter.format_address(country, value, tag_components=False)
|
single_language = len(name_language) == 1
|
||||||
if formatted_address_untagged is not None:
|
for lang, val in name_language.iteritems():
|
||||||
formatted_address_untagged = tsv_string(formatted_address_untagged)
|
if lang not in languages:
|
||||||
|
continue
|
||||||
|
|
||||||
for k, v in name_language.iteritems():
|
address_dict = value.copy()
|
||||||
for s in v:
|
for k in address_dict.keys():
|
||||||
if k in languages:
|
namespaced_val = u'{}:{}'.format(k, lang)
|
||||||
writer.writerow((k, country, formatted_address_untagged))
|
if namespaced_val in address_dict:
|
||||||
|
address_dict[k] = d[namespaced_val]
|
||||||
|
elif not single_language:
|
||||||
|
address_dict.pop(k)
|
||||||
|
|
||||||
i += 1
|
if not address_dict:
|
||||||
if i % 1000 == 0 and i > 0:
|
continue
|
||||||
print 'did', i, 'formatted addresses'
|
|
||||||
|
formatted_address_untagged = formatter.format_address(country, address_dict, tag_components=False)
|
||||||
|
if formatted_address_untagged is not None:
|
||||||
|
formatted_address_untagged = tsv_string(formatted_address_untagged)
|
||||||
|
|
||||||
|
writer.writerow((lang, country, formatted_address_untagged))
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
if i % 1000 == 0 and i > 0:
|
||||||
|
print 'did', i, 'formatted addresses'
|
||||||
|
|
||||||
|
|
||||||
def build_address_training_data(langauge_rtree, infile, out_dir, format=False):
|
def build_address_training_data(langauge_rtree, infile, out_dir, format=False):
|
||||||
|
|||||||
Reference in New Issue
Block a user