[openaddresses] Cleaning up house numbers as well, which can sometimes be stored as floats

This commit is contained in:
Al
2016-08-27 01:50:05 -04:00
parent 1a62d67be7
commit 01ac1371b5

View File

@@ -14,6 +14,7 @@ from geodata.address_formatting.formatter import AddressFormatter
from geodata.addresses.components import AddressComponents from geodata.addresses.components import AddressComponents
from geodata.countries.names import country_names from geodata.countries.names import country_names
from geodata.encoding import safe_decode, safe_encode from geodata.encoding import safe_decode, safe_encode
from geodata.language_id.disambiguation import UNKNOWN_LANGUAGE
from geodata.math.sampling import cdf, weighted_choice from geodata.math.sampling import cdf, weighted_choice
from geodata.text.utils import is_numeric, is_numeric_strict from geodata.text.utils import is_numeric, is_numeric_strict
@@ -133,25 +134,25 @@ class OpenAddressesFormatter(object):
return country_name return country_name
def cleanup_postcode(self, postcode): def cleanup_number(self, num):
postcode = postcode.strip() num = num.strip()
try: try:
postcode_int = int(postcode) num_int = int(num)
except (ValueError, TypeError): except (ValueError, TypeError):
try: try:
postcode_float = float(postcode) num_float = float(num)
num_leading_zeros = 0 leading_zeros = 0
for c in postcode: for c in num:
if c == six.u('0'): if c == six.u('0'):
num_leading_zeros += 1 leading_zeros += 1
else: else:
break break
postcode = safe_decode(int(postcode_float)) num = safe_decode(int(num_float))
if num_leading_zeros: if leading_zeros:
postcode = six.u('{}{}').format(six.u('0') * num_leading_zeros, postcode) num = six.u('{}{}').format(six.u('0') * leading_zeros, num)
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
return postcode return num
def strip_unit_phrases_for_language(self, value, language): def strip_unit_phrases_for_language(self, value, language):
if language in self.unit_type_regexes: if language in self.unit_type_regexes:
@@ -255,10 +256,11 @@ class OpenAddressesFormatter(object):
house_number = components.get(AddressFormatter.HOUSE_NUMBER, None) house_number = components.get(AddressFormatter.HOUSE_NUMBER, None)
if house_number: if house_number:
house_number = numeric_range_regex.replace(six.u('-'), house_number).strip() house_number = numeric_range_regex.replace(six.u('-'), house_number).strip()
house_number = self.cleanup_number(house_number)
postcode = components.get(AddressFormatter.POSTCODE, None) postcode = components.get(AddressFormatter.POSTCODE, None)
if postcode: if postcode:
postcode = self.cleanup_postcode(postcode) postcode = self.cleanup_number(postcode)
if postcode_strip_non_digit_chars: if postcode_strip_non_digit_chars:
postcode = six.u('').join((c for c in postcode if c.isdigit())) postcode = six.u('').join((c for c in postcode if c.isdigit()))