[addresses] strip phrases like "# 123" off of English street names if they follow a thoroughfare/post-directional phrase whose expansion does not contain highway/route
This commit is contained in:
@@ -978,8 +978,6 @@ class AddressComponents(object):
|
|||||||
address_components[AddressFormatter.CITY_DISTRICT] = address_components.pop(AddressFormatter.CITY)
|
address_components[AddressFormatter.CITY_DISTRICT] = address_components.pop(AddressFormatter.CITY)
|
||||||
address_components[AddressFormatter.CITY] = match.group(1)
|
address_components[AddressFormatter.CITY] = match.group(1)
|
||||||
|
|
||||||
street_unit_suffix_regex = re.compile("^(.+?)(?:\\s+\(?\\s*(?:unit|apartment|apt\.?|suite|ste\.?|bldg\.?|lot)\\b(?:(?:\\s*#|\\s+(?:number|no|no.)\\b)?)).*$", re.I)
|
|
||||||
|
|
||||||
unit_type_regexes = {}
|
unit_type_regexes = {}
|
||||||
|
|
||||||
lang_phrase_dictionaries = [lang for lang, dictionary_type in six.iterkeys(address_phrase_dictionaries.phrases)]
|
lang_phrase_dictionaries = [lang for lang, dictionary_type in six.iterkeys(address_phrase_dictionaries.phrases)]
|
||||||
@@ -994,10 +992,26 @@ class AddressComponents(object):
|
|||||||
re.I | re.UNICODE)
|
re.I | re.UNICODE)
|
||||||
unit_type_regexes[lang] = pattern
|
unit_type_regexes[lang] = pattern
|
||||||
|
|
||||||
|
english_streets = address_phrase_dictionaries.phrases.get((ENGLISH, 'street_types'), [])
|
||||||
|
english_directionals = address_phrase_dictionaries.phrases.get((ENGLISH, 'directionals'), [])
|
||||||
|
english_numbered_route_regex = re.compile('highway|route')
|
||||||
|
english_numbered_route_phrases = set([safe_encode(p) for p in itertools.chain(*[streets for streets in english_streets if english_numbered_route_regex.search(streets[0])])])
|
||||||
|
english_street_phrases = [safe_encode(p) for p in itertools.chain(*(english_streets + english_directionals)) if safe_encode(p) not in english_numbered_route_phrases]
|
||||||
|
english_numbered_unit_regex = re.compile('^(.+ (?:{}))\s*#\s*(?:[\d]+|[a-z]|[a-z][\d]*\-?[\d]+|[\d]+\-?[\d]*[a-z])\s*$'.format(safe_encode('|').join(english_street_phrases)), re.I)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def strip_english_unit_number_suffix(cls, value):
|
||||||
|
match = cls.english_numbered_unit_regex.match(value)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return value
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def strip_unit_phrases_for_language(cls, value, language):
|
def strip_unit_phrases_for_language(cls, value, language):
|
||||||
if language in cls.unit_type_regexes:
|
if language in cls.unit_type_regexes:
|
||||||
return cls.unit_type_regexes[language].sub(six.u(''), value)
|
value = cls.unit_type_regexes[language].sub(six.u(''), value)
|
||||||
|
if language == ENGLISH:
|
||||||
|
value = cls.strip_english_unit_number_suffix(value)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def abbreviated_state(self, state, country, language):
|
def abbreviated_state(self, state, country, language):
|
||||||
|
|||||||
Reference in New Issue
Block a user