diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index fd03e1e3..6f6dde60 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -498,12 +498,13 @@ def build_ways_training_data(language_rtree, infile, out_dir): continue for lang, val in name_language.iteritems(): - for s in val: - if lang in languages: - writer.writerow((lang, country, tsv_string(s))) - abbrev = osm_abbreviate(street_types_gazetteer, s, lang, abbreviate_prob=1.0, separate_prob=0.5) - if abbrev != s: - writer.writerow((lang, country, tsv_string(abbrev))) + for v in val: + for s in v.split(';'): + if lang in languages: + writer.writerow((lang, country, tsv_string(s))) + abbrev = osm_abbreviate(street_types_gazetteer, s, lang, abbreviate_prob=1.0, separate_prob=0.5) + if abbrev != s: + writer.writerow((lang, country, tsv_string(abbrev))) if i % 1000 == 0 and i > 0: print('did {} ways'.format(i)) i += 1