From bab7a0f961ce90f86e43aa9bdb31f46d49fce2cd Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 24 Jan 2016 00:42:25 -0500 Subject: [PATCH] [osm] splitting streets (way names) on semicolons --- scripts/geodata/osm/osm_address_training_data.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index fd03e1e3..6f6dde60 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -498,12 +498,13 @@ def build_ways_training_data(language_rtree, infile, out_dir): continue for lang, val in name_language.iteritems(): - for s in val: - if lang in languages: - writer.writerow((lang, country, tsv_string(s))) - abbrev = osm_abbreviate(street_types_gazetteer, s, lang, abbreviate_prob=1.0, separate_prob=0.5) - if abbrev != s: - writer.writerow((lang, country, tsv_string(abbrev))) + for v in val: + for s in v.split(';'): + if lang in languages: + writer.writerow((lang, country, tsv_string(s))) + abbrev = osm_abbreviate(street_types_gazetteer, s, lang, abbreviate_prob=1.0, separate_prob=0.5) + if abbrev != s: + writer.writerow((lang, country, tsv_string(abbrev))) if i % 1000 == 0 and i > 0: print('did {} ways'.format(i)) i += 1