diff --git a/resources/boundaries/names/global.yaml b/resources/boundaries/names/global.yaml
index a741f1f2..2b24e209 100644
--- a/resources/boundaries/names/global.yaml
+++ b/resources/boundaries/names/global.yaml
@@ -29,6 +29,12 @@ names:
                     - alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
                       probability: 0.01
 
+    regex_replacements:
+        - country: fr
+          pattern: "(?:lyon|paris|marseilles) ([\\d]+er? arrondissement)"
+          replace_with_group: 1
+          replace_probability: 0.5
+
     # This section overrides place names
     exceptions:
         # Boroughs of New York City
diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py
index a27f8e7e..18d135e4 100644
--- a/scripts/geodata/addresses/components.py
+++ b/scripts/geodata/addresses/components.py
@@ -694,9 +694,6 @@ class AddressComponents(object):
         include these qualifiers in the training data.
         '''
 
-        simple_name_key = 'name:simple'
-        international_name_key = 'int_name'
-
         if osm_components:
             name_key = ''.join((boundary_names.DEFAULT_NAME_KEY, language_suffix))
             raw_name_key = boundary_names.DEFAULT_NAME_KEY
@@ -721,6 +718,9 @@ class AddressComponents(object):
                     for k in (key, name_key, raw_key, raw_name_key):
                         name = component_value.get(k)
 
+                        if name:
+                            name = boundary_names.name(country, name)
+
                         if name and not (name == existing_city_name and component != AddressFormatter.CITY and drop_duplicate_city_names):
                             if six.u(';') in name:
                                 name = random.choice(name.split(six.u(';'))).strip()
diff --git a/scripts/geodata/boundaries/names.py b/scripts/geodata/boundaries/names.py
index f337b246..e7401594 100644
--- a/scripts/geodata/boundaries/names.py
+++ b/scripts/geodata/boundaries/names.py
@@ -1,4 +1,6 @@
 import os
+import random
+import re
 import six
 import yaml
 
@@ -36,6 +38,20 @@ class BoundaryNames(object):
             component_name_keys, component_probs = alternative_probabilities(component_names)
             self.component_name_keys[component] = (component_name_keys, cdf(component_probs))
 
+        self.country_regex_replacements = defaultdict(list)
+        for props in nested_get(config, ('names', 'regex_replacements',), default=[]):
+            country = props.get('country')
+            re_flags = re.I | re.UNICODE
+            if not props.get('case_insensitive', True):
+                re.flags ^= re.I
+
+            pattern = re.compile(props['pattern'], re_flags)
+            replace_group = props['replace_with_group']
+            replace_probability = props['replace_probability']
+            self.country_regex_replacements[country].append((pattern, replace_group, replace_probability))
+
+        self.country_regex_replacements = dict(self.country_regex_replacements)
+
         self.exceptions = {}
 
         for props in nested_get(config, ('names', 'exceptions'), default=[]):
@@ -61,4 +77,17 @@ class BoundaryNames(object):
         name_keys, probs = self.component_name_keys.get(component, (self.name_keys, self.name_key_probs))
         return weighted_choice(name_keys, probs)
 
+    def name(self, country, name):
+        all_replacements = self.country_regex_replacements.get(country, []) + self.country_regex_replacements.get(None, [])
+        if not all_replacements:
+            return name
+
+        for regex, group, prob in all_replacements:
+            match = regex.match(name)
+            if match and random.random() < prob:
+                name = match.group(group)
+        return name
+
+
+
 boundary_names = BoundaryNames()