From 93b3110a491be2f3b9ca2073a180c16af9b36c6a Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 27 Sep 2015 19:25:28 -0400 Subject: [PATCH] [fix] only commas and hyphens need to be eliminated at the end of phrases in untagged address formatting --- scripts/geodata/address_formatting/formatter.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index 5901a59f..b13c7234 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -167,18 +167,21 @@ class AddressFormatter(object): def strip_component(self, value, tagged=False): if not tagged: + comma = token_types.COMMA.value + hyphen = token_types.HYPHEN.value + start = end = 0 tokens = tokenize_raw(value.strip()) for token_start, token_length, token_type in tokens: start = token_start - if token_type < token_types.PERIOD.value: + if token_type in (comma, hyphen): break else: start = token_start + token_length for token_start, token_length, token_type in reversed(tokens): end = token_start + token_length - if token_type < token_types.PERIOD.value: + if token_type in (comma, hyphen): break else: end = token_start