Files
libpostal/scripts/geodata/names/normalization.py
2015-12-05 18:41:22 -05:00

33 lines
772 B
Python

from __future__ import unicode_literals
import re
from geodata.encoding import safe_decode
name_prefixes = ['{} '.format(s) for s in (
'city of',
'township of',
'regional municipality of',
'municipality of',
'borough of',
'london borough of',
'town of',
)]
name_suffixes = [' {}'.format(s) for s in (
'township',
'municipality',
)]
name_prefix_regex = re.compile('^(?:{})'.format('|'.join(name_prefixes)), re.I | re.UNICODE)
name_suffix_regex = re.compile('(?:{})$'.format('|'.join(name_suffixes)), re.I | re.UNICODE)
def replace_name_prefixes(name):
name = safe_decode(name)
return name_prefix_regex.sub('', name)
def replace_name_suffixes(name):
name = safe_decode(name)
return name_suffix_regex.sub('', name)