[openaddresses] Cleaning up house numbers as well, which can sometimes be stored as floats
This commit is contained in:
@@ -14,6 +14,7 @@ from geodata.address_formatting.formatter import AddressFormatter
|
|||||||
from geodata.addresses.components import AddressComponents
|
from geodata.addresses.components import AddressComponents
|
||||||
from geodata.countries.names import country_names
|
from geodata.countries.names import country_names
|
||||||
from geodata.encoding import safe_decode, safe_encode
|
from geodata.encoding import safe_decode, safe_encode
|
||||||
|
from geodata.language_id.disambiguation import UNKNOWN_LANGUAGE
|
||||||
from geodata.math.sampling import cdf, weighted_choice
|
from geodata.math.sampling import cdf, weighted_choice
|
||||||
from geodata.text.utils import is_numeric, is_numeric_strict
|
from geodata.text.utils import is_numeric, is_numeric_strict
|
||||||
|
|
||||||
@@ -133,25 +134,25 @@ class OpenAddressesFormatter(object):
|
|||||||
|
|
||||||
return country_name
|
return country_name
|
||||||
|
|
||||||
def cleanup_postcode(self, postcode):
|
def cleanup_number(self, num):
|
||||||
postcode = postcode.strip()
|
num = num.strip()
|
||||||
try:
|
try:
|
||||||
postcode_int = int(postcode)
|
num_int = int(num)
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
try:
|
try:
|
||||||
postcode_float = float(postcode)
|
num_float = float(num)
|
||||||
num_leading_zeros = 0
|
leading_zeros = 0
|
||||||
for c in postcode:
|
for c in num:
|
||||||
if c == six.u('0'):
|
if c == six.u('0'):
|
||||||
num_leading_zeros += 1
|
leading_zeros += 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
postcode = safe_decode(int(postcode_float))
|
num = safe_decode(int(num_float))
|
||||||
if num_leading_zeros:
|
if leading_zeros:
|
||||||
postcode = six.u('{}{}').format(six.u('0') * num_leading_zeros, postcode)
|
num = six.u('{}{}').format(six.u('0') * leading_zeros, num)
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
pass
|
pass
|
||||||
return postcode
|
return num
|
||||||
|
|
||||||
def strip_unit_phrases_for_language(self, value, language):
|
def strip_unit_phrases_for_language(self, value, language):
|
||||||
if language in self.unit_type_regexes:
|
if language in self.unit_type_regexes:
|
||||||
@@ -255,10 +256,11 @@ class OpenAddressesFormatter(object):
|
|||||||
house_number = components.get(AddressFormatter.HOUSE_NUMBER, None)
|
house_number = components.get(AddressFormatter.HOUSE_NUMBER, None)
|
||||||
if house_number:
|
if house_number:
|
||||||
house_number = numeric_range_regex.replace(six.u('-'), house_number).strip()
|
house_number = numeric_range_regex.replace(six.u('-'), house_number).strip()
|
||||||
|
house_number = self.cleanup_number(house_number)
|
||||||
|
|
||||||
postcode = components.get(AddressFormatter.POSTCODE, None)
|
postcode = components.get(AddressFormatter.POSTCODE, None)
|
||||||
if postcode:
|
if postcode:
|
||||||
postcode = self.cleanup_postcode(postcode)
|
postcode = self.cleanup_number(postcode)
|
||||||
|
|
||||||
if postcode_strip_non_digit_chars:
|
if postcode_strip_non_digit_chars:
|
||||||
postcode = six.u('').join((c for c in postcode if c.isdigit()))
|
postcode = six.u('').join((c for c in postcode if c.isdigit()))
|
||||||
|
|||||||
Reference in New Issue
Block a user