From e9106698d26f37258c647df88425d01136422d48 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 27 Oct 2016 12:01:48 -0400 Subject: [PATCH] [fix] convert newlines --- scripts/geodata/addresses/components.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index ef0fdb08..22000a11 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -1223,6 +1223,7 @@ class AddressComponents(object): else: address_components.pop(AddressFormatter.HOUSE_NUMBER, None) + newline_regex = re.compile('[\n]+') name_regex = re.compile('^[\s\-]*(.*?)[\s\-]*$') whitespace_regex = re.compile('(?<=[\w])[\s]+(?=[\w])') hyphen_regex = re.compile('[\s]*[\-]+[\s]*') @@ -1237,6 +1238,7 @@ class AddressComponents(object): @classmethod def strip_whitespace_and_hyphens(cls, name): + name = cls.newline_regex.sub(six.u(' '), name) return cls.name_regex.match(name).group(1) def name_hyphens(self, name, hyphenate_multiword_probability=None, remove_hyphen_probability=None):