[fix] untagged formatted addresses

This commit is contained in:
Al
2015-10-04 02:02:59 -04:00
parent 89d0fd5718
commit cfa57c96a3

View File

@@ -578,17 +578,16 @@ def build_address_format_training_data(language_rtree, infile, out_dir, tag_comp
formatted_addresses.append(formatted_address) formatted_addresses.append(formatted_address)
for formatted_address in formatted_addresses: for formatted_address in formatted_addresses:
if formatted_address is not None: if formatted_address and formatted_address.strip():
formatted_address = tsv_string(formatted_address) formatted_address = tsv_string(formatted_address)
if not formatted_address or not formatted_address.strip(): if not formatted_address or not formatted_address.strip():
continue continue
row = (language, country, formatted_address) row = (language, country, formatted_address)
writer.writerow(row) writer.writerow(row)
else: elif formatted_address and formatted_address.strip():
if formatted_address is not None: formatted_address = tsv_string(formatted_address)
formatted_address = tsv_string(formatted_address) writer.writerow([formatted_address])
writer.writerow([formatted_address])
i += 1 i += 1
if i % 1000 == 0 and i > 0: if i % 1000 == 0 and i > 0: