diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py
index 8eba1cfd..79ac7034 100644
--- a/scripts/geodata/osm/osm_address_training_data.py
+++ b/scripts/geodata/osm/osm_address_training_data.py
@@ -382,6 +382,14 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
 
             added = False
 
+            if random.random() > abbreviate_prob:
+                for j, (t_i, c_i) in enumerate(t):
+                    abbreviated.append(tokens[i + j][0])
+                    if c_i != token_types.IDEOGRAPHIC_CHAR:
+                        abbreviated.append(u' ')
+                i += len(t)
+                continue
+
             for lang, dictionary, is_canonical, canonical in data:
                 if lang not in (language, 'all'):
                     continue
@@ -395,7 +403,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
                 suffix = None
                 prefix = None
 
-                if not is_canonical or random.random() > abbreviate_prob:
+                if not is_canonical:
                     continue
 
                 if not is_prefix and not is_suffix:
@@ -405,7 +413,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
                     abbreviated.append(token)
                     if t[-1][1] != token_types.IDEOGRAPHIC_CHAR:
                         abbreviated.append(u' ')
-                    added = True
+                    break
                 elif is_prefix:
                     token = tokens[i][0]
                     prefix, token = token[:length], token[length:]
@@ -417,7 +425,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
                     else:
                         abbreviated.append(token)
                     abbreviated.append(u' ')
-                    added = True
+                    break
                 elif is_suffix:
                     token = tokens[i][0]
 
@@ -451,9 +459,8 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
                     else:
                         abbreviated.append(abbreviation)
                     abbreviated.append(u' ')
-                    added = True
-
-            if not added:
+                    break
+            else:
                 for j, (t_i, c_i) in enumerate(t):
                     abbreviated.append(tokens[i + j][0])
                     if c_i != token_types.IDEOGRAPHIC_CHAR:
@@ -1392,7 +1399,7 @@ def build_toponym_training_data(language_rtree, infile, out_dir):
     writer = csv.writer(f, 'tsv_no_quote')
 
     for key, value, deps in parse_osm(infile):
-        if not sum((1 for k, v in value.iteritems() if k.startswith('name'))) > 0:
+        if not any((k.startswith('name') for k, v in value.iteritems())):
             continue
 
         try: