From 51afc2619bbcc3951074e498e425e6dfcf2e58fd Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 19 Oct 2016 01:23:58 -0400 Subject: [PATCH] [fix] only replace whitespace between words, not for instance whitespace around an existing hyphen, and reducing to one space for spaced hyphens --- scripts/geodata/addresses/components.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 8ba68413..8914d49d 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -1224,8 +1224,8 @@ class AddressComponents(object): address_components.pop(AddressFormatter.HOUSE_NUMBER, None) name_regex = re.compile('^[\s\-]*(.*?)[\s\-]*$') - whitespace_regex = re.compile('[\s]+') - hyphen_regex = re.compile('[\-]+') + whitespace_regex = re.compile('(?<=[\w])[\s]+(?=[\w])') + hyphen_regex = re.compile('[\s]*[\-]+[\s]*') def dehyphenate_multiword_name(self, name): return self.hyphen_regex.sub(six.u(' '), name)