diff --git a/scripts/geodata/names/normalization.py b/scripts/geodata/names/normalization.py
new file mode 100644
index 00000000..a93521ed
--- /dev/null
+++ b/scripts/geodata/names/normalization.py
@@ -0,0 +1,31 @@
+from __future__ import unicode_literals
+import re
+
+from geodata.encoding import safe_decode
+
+name_prefixes = ['{} '.format(s) for s in (
+    'city of',
+    'township of',
+    'municipality of',
+    'borough of',
+    'london borough of',
+    'town of',
+)]
+
+name_suffixes = [' {}'.format(s) for s in (
+    'township',
+    'municipality',
+)]
+
+name_prefix_regex = re.compile('^{}'.format('|'.join(name_prefixes)), re.I | re.UNICODE)
+name_suffix_regex = re.compile('{}$'.format('|'.join(name_suffixes)), re.I | re.UNICODE)
+
+
+def replace_name_prefixes(name):
+    name = safe_decode(name)
+    return name_prefix_regex.sub('', name)
+
+
+def replace_name_suffixes(name):
+    name = safe_decode(name)
+    return name_suffix_regex.sub('', name)
diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py
index 89e4e5ed..f0789f61 100644
--- a/scripts/geodata/osm/osm_address_training_data.py
+++ b/scripts/geodata/osm/osm_address_training_data.py
@@ -63,6 +63,7 @@ from geodata.states.state_abbreviations import STATE_ABBREVIATIONS, STATE_EXPANS
 from geodata.language_id.polygon_lookup import country_and_languages
 from geodata.i18n.languages import *
 from geodata.address_formatting.formatter import AddressFormatter
+from geodata.names.normalization import replace_name_prefixes, replace_name_suffixes
 from geodata.osm.extract import *
 from geodata.polygons.language_polys import *
 from geodata.polygons.reverse_geocode import *
@@ -192,6 +193,15 @@ osm_fields = [
 ]
 
 
+REPLACE_COMPONENTS = (
+    AddressFormatter.SUBURB,
+    AddressFormatter.CITY_DISTRICT,
+    AddressFormatter.CITY,
+    AddressFormatter.STATE_DISTRICT,
+    AddressFormatter.STATE
+)
+
+
 def write_osm_json(filename, out_filename):
     out = open(out_filename, 'w')
     writer = csv.writer(out, 'tsv_no_quote')
@@ -709,6 +719,18 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
             if component not in address_components and random.random() < 0.5:
                 address_components[component] = neighborhoods[0]
 
+        '''
+        Name normalization
+        ------------------
+
+        Probabilistically strip standard prefixes/suffixes e.g. "London Borough of"
+        '''
+        for component in REPLACE_COMPONENTS:
+            name = address_components[component]
+            replacement = replace_name_prefixes(replace_name_suffixes())
+            if replacement != name and random.random() < 0.6:
+                address_components[component] = replacement
+
         # Version with all components
         formatted_address = formatter.format_address(country, address_components, tag_components=tag_components, minimal_only=not tag_components)