diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index 30a59725..71de8420 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -806,7 +806,8 @@ class AddressFormatter(object): return self.get_template_from_config(self.templates_place_only, country, language=language) def tagged_tokens(self, name, label): - return six.u(' ').join([six.u('{}/{}').format(t.replace(' ', ''), label if t != ',' else self.separator_tag) for t, c in tokenize(name)]) + name = ftfy.fix_encoding(name) + return six.u(' ').join([six.u('{}/{}').format(t.replace(' ', ''), label if t not in (',', '-') else self.separator_tag) for t, c in tokenize(name)]) def template_language_matters(self, country, language): return '{}_{}'.format(country.upper(), language) in self.country_formats or '{}_{}'.format(country, language) in self.country_formats diff --git a/scripts/requirements.txt b/scripts/requirements.txt index cf9c0b95..aa7b734e 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -12,6 +12,7 @@ cssselect==0.9.1 distribute==0.7.3 future==0.15.2 futures==3.0.5 +ftfy==4.2.0 gevent==1.1.2 greenlet==0.4.10 jmespath==0.9.0 @@ -30,4 +31,4 @@ s3transfer==0.1.3 six==1.10.0 ujson==1.35 urlnorm==1.1.3 -wsgiref==0.1.2 \ No newline at end of file +wsgiref==0.1.2