[openaddresses] In some OpenAddresses data sets, the house number is just a copy of the street name, so eliminate non-numeric house numbers to be safe
This commit is contained in:
@@ -10,6 +10,7 @@ from geodata.address_formatting.formatter import AddressFormatter
|
||||
from geodata.addresses.components import AddressComponents
|
||||
from geodata.countries.names import country_names
|
||||
from geodata.math.sampling import cdf, weighted_choice
|
||||
from geodata.text.utils import is_numeric
|
||||
|
||||
from geodata.csv_utils import tsv_string, unicode_csv_reader
|
||||
|
||||
@@ -125,6 +126,10 @@ class OpenAddressesFormatter(object):
|
||||
separate_prob=separate_street_prob)
|
||||
components[AddressFormatter.ROAD] = street
|
||||
|
||||
house_number = components.get(AddressFormatter.HOUSE_NUMBER, None)
|
||||
if house_number and not is_numeric(house_number):
|
||||
components.pop(AddressFormatter.HOUSE_NUMBER)
|
||||
|
||||
unit = components.get(AddressFormatter.UNIT, None)
|
||||
if unit is not None:
|
||||
unit = abbreviate(unit_types_gazetteer, unit, language,
|
||||
|
||||
Reference in New Issue
Block a user