From 8c6a4c763c0a0c68b6c818424ad9e7315e8e49e2 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 26 Aug 2016 23:43:53 -0400 Subject: [PATCH] [openaddresses] Increasing limit to 3 characters for unit abbreviations in case anything clashes (not a huge issue if a few units are tacked on, but this seems more common in OpenAddresses than OSM) --- scripts/geodata/openaddresses/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index b7081c61..085b75bd 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -42,7 +42,7 @@ class OpenAddressesFormatter(object): for (lang, dictionary_type), values in six.iteritems(address_phrase_dictionaries.phrases): if dictionary_type == 'unit_types_numbered': - unit_phrases = itertools.chain(*[safe_encode(p) for p in values if len(p) > 1]) + unit_phrases = itertools.chain(*[safe_encode(p) for p in values if len(p) > 2]) pattern = re.compile(r'\b(?:{})\s+(?:#?\s*)(?:[\d]+|[a-z]|[a-z][\d]+|[\d]+[a-z])\s*$'.format(six.u('|').join(unit_phrases)), re.I | re.UNICODE) unit_type_regexes[lang] = pattern