diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py
index 8e1d69a2..b1733407 100644
--- a/scripts/geodata/osm/osm_address_training_data.py
+++ b/scripts/geodata/osm/osm_address_training_data.py
@@ -644,6 +644,13 @@ NAME_KEYS = (
     'name',
     'addr:housename',
 )
+
+HOUSE_NUMBER_KEYS = (
+    'addr:house_number',
+    'addr:housenumber',
+    'house_number'
+)
+
 COUNTRY_KEYS = (
     'country',
     'country_name',
@@ -673,16 +680,17 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir):
 
     Example:
 
-    nb      no      Olaf Ryes Plass 8 | Oslo
+    nb      no      Olaf Ryes Plass Oslo
     '''
     i = 0
 
-    formatter = AddressFormatter()
+    # Simple whitespace splitter is all that's necessary
+    formatter = AddressFormatter(splitter=' ')
 
     f = open(os.path.join(out_dir, ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME), 'w')
     writer = csv.writer(f, 'tsv_no_quote')
 
-    remove_keys = NAME_KEYS + COUNTRY_KEYS + POSTAL_KEYS + OSM_IGNORE_KEYS
+    remove_keys = NAME_KEYS + HOUSE_NUMBER_KEYS + COUNTRY_KEYS + POSTAL_KEYS + OSM_IGNORE_KEYS
 
     for key, value, deps in parse_osm(infile):
         try: