[openaddresses] removing spaces in numeric ranges in OpenAddresses, sometimes see things like '12 -23'
This commit is contained in:
@@ -5,7 +5,7 @@ import re
|
|||||||
import six
|
import six
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from geodata.addresses.unit import Unit
|
from geodata.addresses.units import Unit
|
||||||
from geodata.address_expansions.abbreviations import abbreviate
|
from geodata.address_expansions.abbreviations import abbreviate
|
||||||
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer
|
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer
|
||||||
from geodata.address_formatting.formatter import AddressFormatter
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
@@ -25,6 +25,7 @@ OPENADDRESSES_PARSER_DATA_CONFIG = os.path.join(this_dir, os.pardir, os.pardir,
|
|||||||
OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
|
OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
|
||||||
OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
|
OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
|
||||||
|
|
||||||
|
numeric_range_regex = re.compile(six.u('[\s]*\-[\s]*'))
|
||||||
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
|
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
|
||||||
|
|
||||||
|
|
||||||
@@ -192,7 +193,7 @@ class OpenAddressesFormatter(object):
|
|||||||
|
|
||||||
house_number = components.get(AddressFormatter.HOUSE_NUMBER, None)
|
house_number = components.get(AddressFormatter.HOUSE_NUMBER, None)
|
||||||
if house_number:
|
if house_number:
|
||||||
house_number = house_number.strip()
|
house_number = numeric_range_regex.replace(six.u('-'), house_number).strip()
|
||||||
|
|
||||||
postcode = components.get(AddressFormatter.POSTCODE, None)
|
postcode = components.get(AddressFormatter.POSTCODE, None)
|
||||||
if postcode:
|
if postcode:
|
||||||
|
|||||||
Reference in New Issue
Block a user