From 5b829cd5a789b39b88e641f2ef482494d9f1f703 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 26 Sep 2015 21:49:28 -0400 Subject: [PATCH] [fix] blank values containing punctuation in formatting --- scripts/geodata/address_formatting/formatter.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index fb4e0b18..7908ee03 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -167,16 +167,20 @@ class AddressFormatter(object): def strip_component(self, value, tagged=False): if not tagged: start = end = 0 - tokens = tokenize_raw(value) + tokens = tokenize_raw(value.strip()) for token_start, token_length, token_type in tokens: start = token_start if token_type < token_types.PERIOD.value: break + else: + start = token_start + token_length for token_start, token_length, token_type in reversed(tokens): end = token_start + token_length if token_type < token_types.PERIOD.value: break + else: + end = token_start return value[start:end] else: