diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 95e02707..50d94826 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -414,6 +414,9 @@ def build_address_training_data(langauge_rtree, infile, out_dir, format=False): for k, v in street_language.iteritems(): for s in v: + s = s.strip() + if not s: + continue if k in languages: writer.writerow((k, country, safe_encode(s))) if i % 1000 == 0 and i > 0: