[openaddresses] moving postcode fixes before validation. Adding regex for validating Russian house numbers in the Ukraine
This commit is contained in:
@@ -38,8 +38,9 @@ unknown_regex = re.compile('\bunknown\b', re.I)
|
|||||||
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
|
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
|
||||||
sin_numero_regex = re.compile('^\s*s\s*/\s*n\s*$', re.I)
|
sin_numero_regex = re.compile('^\s*s\s*/\s*n\s*$', re.I)
|
||||||
|
|
||||||
russian_number_regex_str = safe_decode(r'(?:(?:[\d]+\w?(?:[\-/](?:(?:[\d]+\w?)|\w))*)|(?:[\d]+\s*\w?)|(?:\b\w\b))')
|
russian_number_regex_str = safe_decode(r'(?:№\s*)?(?:(?:[\d]+\w?(?:[\-/](?:(?:[\d]+\w?)|\w))*)|(?:[\d]+\s*\w?)|(?:\b\w\b))')
|
||||||
dom_korpus_stroyeniye_regex = re.compile(safe_decode('(?:(?:дом(?=\s)|д\.?)\s*)?{}(?:(?:\s*,|\s+)\s*(?:(?:корпус(?=\s)|к\.?)\s*)?{})?(?:(?:\s*,|\s+)\s*(?:(?:строение(?=\s)|с\.?)\s*)?{})?\s*$').format(russian_number_regex_str, russian_number_regex_str, russian_number_regex_str), re.I | re.U)
|
dom_korpus_stroyeniye_regex = re.compile(safe_decode('(?:(?:дом(?=\s)|д\.?)\s*)?{}(?:(?:\s*,|\s+)\s*(?:(?:корпус(?=\s)|к\.?)\s*){})?(?:(?:\s*,|\s+)\s*(?:(?:строение(?=\s)|с\.?)\s*){})?\s*$').format(russian_number_regex_str, russian_number_regex_str, russian_number_regex_str), re.I | re.U)
|
||||||
|
uchastok_regex = re.compile(safe_decode('{}\s*(?:,?\s*участок\s+{}\s*)?$').format(russian_number_regex_str, russian_number_regex_str), re.I | re.U)
|
||||||
bea_nomera_regex = re.compile(safe_decode('^\s*б\s*/\s*н\s*$'), re.I)
|
bea_nomera_regex = re.compile(safe_decode('^\s*б\s*/\s*н\s*$'), re.I)
|
||||||
fraction_regex = re.compile('^\s*[\d]+[\s]*/[\s]*(?:[\d]+|[a-z]|[\d]+[a-z]|[a-z][\d]+)[\s]*$', re.I)
|
fraction_regex = re.compile('^\s*[\d]+[\s]*/[\s]*(?:[\d]+|[a-z]|[\d]+[a-z]|[a-z][\d]+)[\s]*$', re.I)
|
||||||
number_space_letter_regex = re.compile('^[\d]+\s+[a-z]$', re.I)
|
number_space_letter_regex = re.compile('^[\d]+\s+[a-z]$', re.I)
|
||||||
@@ -141,7 +142,9 @@ class OpenAddressesFormatter(object):
|
|||||||
def validate_russian_house_number(cls, house_number):
|
def validate_russian_house_number(cls, house_number):
|
||||||
if dom_korpus_stroyeniye_regex.match(house_number):
|
if dom_korpus_stroyeniye_regex.match(house_number):
|
||||||
return True
|
return True
|
||||||
if bea_nomera_regex.match(house_number):
|
elif uchastok_regex.match(house_number):
|
||||||
|
return True
|
||||||
|
elif bea_nomera_regex.match(house_number):
|
||||||
return True
|
return True
|
||||||
return cls.validate_house_number(house_number)
|
return cls.validate_house_number(house_number)
|
||||||
|
|
||||||
@@ -323,6 +326,18 @@ class OpenAddressesFormatter(object):
|
|||||||
if key == AddressFormatter.ROAD and language == SPANISH:
|
if key == AddressFormatter.ROAD and language == SPANISH:
|
||||||
value = self.components.spanish_street_name(value)
|
value = self.components.spanish_street_name(value)
|
||||||
|
|
||||||
|
if key == AddressFormatter.POSTCODE:
|
||||||
|
value = self.cleanup_number(value)
|
||||||
|
|
||||||
|
if postcode_strip_non_digit_chars:
|
||||||
|
value = six.u('').join((c for c in value if c.isdigit()))
|
||||||
|
|
||||||
|
if value and not is_numeric(value) and numeric_postcodes_only:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
if postcode_length:
|
||||||
|
value = value.zfill(postcode_length)[:postcode_length]
|
||||||
|
|
||||||
if key in AddressFormatter.BOUNDARY_COMPONENTS and key != AddressFormatter.POSTCODE:
|
if key in AddressFormatter.BOUNDARY_COMPONENTS and key != AddressFormatter.POSTCODE:
|
||||||
if add_osm_boundaries:
|
if add_osm_boundaries:
|
||||||
continue
|
continue
|
||||||
@@ -401,29 +416,12 @@ class OpenAddressesFormatter(object):
|
|||||||
if house_number is not None:
|
if house_number is not None:
|
||||||
components[AddressFormatter.HOUSE_NUMBER] = house_number
|
components[AddressFormatter.HOUSE_NUMBER] = house_number
|
||||||
|
|
||||||
postcode = components.get(AddressFormatter.POSTCODE, None)
|
|
||||||
if postcode:
|
|
||||||
postcode = self.cleanup_number(postcode)
|
|
||||||
|
|
||||||
if postcode_strip_non_digit_chars:
|
|
||||||
postcode = six.u('').join((c for c in postcode if c.isdigit()))
|
|
||||||
|
|
||||||
if postcode and not is_numeric(postcode) and numeric_postcodes_only:
|
|
||||||
components.pop(AddressFormatter.POSTCODE)
|
|
||||||
postcode = None
|
|
||||||
else:
|
|
||||||
if postcode_length:
|
|
||||||
postcode = postcode.zfill(postcode_length)[:postcode_length]
|
|
||||||
|
|
||||||
if postcode:
|
|
||||||
components[AddressFormatter.POSTCODE] = postcode
|
|
||||||
elif AddressFormatter.POSTCODE in components:
|
|
||||||
components.pop(AddressFormatter.POSTCODE)
|
|
||||||
|
|
||||||
unit = components.get(AddressFormatter.UNIT, None)
|
unit = components.get(AddressFormatter.UNIT, None)
|
||||||
|
|
||||||
street_required = country != Countries.JAPAN and country not in Countries.FORMER_SOVIET_UNION_COUNTRIES
|
street_required = country != Countries.JAPAN and country not in Countries.FORMER_SOVIET_UNION_COUNTRIES
|
||||||
|
|
||||||
|
postcode = components.get(AddressFormatter.POSTCODE, None)
|
||||||
|
|
||||||
# If there's a postcode, we can still use just the city/state/postcode, otherwise discard
|
# If there's a postcode, we can still use just the city/state/postcode, otherwise discard
|
||||||
if (not street and street_required and not house_number) or (street and house_number and (street.lower() == house_number.lower())) or (unit and street and street.lower() == unit.lower()):
|
if (not street and street_required and not house_number) or (street and house_number and (street.lower() == house_number.lower())) or (unit and street and street.lower() == unit.lower()):
|
||||||
if not postcode:
|
if not postcode:
|
||||||
|
|||||||
Reference in New Issue
Block a user