[osm] adding the excellent ftfy (https://github.com/LuminosoInsight/python-ftfy) to fix Mojibake, etc. in address components
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import csv
|
||||
import ftfy
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
@@ -217,6 +218,9 @@ class OpenAddressesFormatter(object):
|
||||
return self.unit_type_regexes[language].sub(six.u(''), value)
|
||||
return value
|
||||
|
||||
def fix_component_encodings(self, components):
|
||||
return {k: ftfy.fix_encoding(safe_decode(v)) for k, v in six.iteritems(components)}
|
||||
|
||||
def formatted_addresses(self, country_dir, path, configs, tag_components=True):
|
||||
abbreviate_street_prob = float(self.get_property('abbreviate_street_probability', *configs))
|
||||
separate_street_prob = float(self.get_property('separate_street_probability', *configs) or 0.0)
|
||||
@@ -344,6 +348,8 @@ class OpenAddressesFormatter(object):
|
||||
continue
|
||||
candidate_languages = candidate_languages.items()
|
||||
|
||||
components = self.fix_component_encodings(components)
|
||||
|
||||
if language is None:
|
||||
language = AddressComponents.address_language(components, candidate_languages)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user