From 4ecd6c23c6045a8e5842383711335f2d5e158405 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 8 Apr 2017 15:42:59 -0400 Subject: [PATCH] [formatting] removing the ability to insert city between house number and road in France from discussion in #27 --- resources/formatting/global.yaml | 6 --- .../geodata/address_formatting/formatter.py | 54 +++++++++---------- 2 files changed, 25 insertions(+), 35 deletions(-) diff --git a/resources/formatting/global.yaml b/resources/formatting/global.yaml index a6e45d25..4e8152d7 100644 --- a/resources/formatting/global.yaml +++ b/resources/formatting/global.yaml @@ -698,12 +698,6 @@ countries: unit_before_level: before: level probability: 0.005 - # libpostal issue #27 - city: - city_before_road: - before: road - probability: 0.001 - allow_between_house_number_and_road: true # Andorra, uses same template as France ad_ca: diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index 5077eaaa..e1bf2018 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -286,16 +286,14 @@ class AddressFormatter(object): if k == 'conditional' or not v: continue - allow_between_house_number_and_road = v.get('allow_between_house_number_and_road', False) - if 'before' in v: - val = (self.BEFORE, v['before'], allow_between_house_number_and_road) + val = (self.BEFORE, v['before']) elif 'after' in v: - val = (self.AFTER, v['after'], allow_between_house_number_and_road) + val = (self.AFTER, v['after']) elif 'last' in v: - val = (self.LAST, None, False) + val = (self.LAST, None) elif 'first' in v: - val = (self.FIRST, None, False) + val = (self.FIRST, None) else: raise ValueError('Insertions must contain one of {{first, before, after, last}}. Value was: {}'.format(v)) @@ -670,17 +668,17 @@ class AddressFormatter(object): conditional_insertions = v break - order, other, allow_between_house_number_and_road = None, None, False + order, other = None, None # Check the conditional probabilities first if conditional_insertions is not None: values, probs = conditional_insertions - order, other, allow_between_house_number_and_road = weighted_choice(values, probs) + order, other = weighted_choice(values, probs) # If there are no conditional probabilites or the "default" value was chosen, sample from the marginals if other is None: values, probs = insertions - order, other, allow_between_house_number_and_road = weighted_choice(values, probs) + order, other = weighted_choice(values, probs) # Even though we may change the value of "other" below, use # the original cache key because changes from here on are @@ -705,30 +703,28 @@ class AddressFormatter(object): # house_number, unit, road, which we don't want. So effectively # treat house_number and road as an atomic unit. - if not allow_between_house_number_and_road: + if other == self.HOUSE_NUMBER and component != self.ROAD: + road_tag = self.tag_token(self.ROAD) + house_number_tag = other_token - if other == self.HOUSE_NUMBER and component != self.ROAD: - road_tag = self.tag_token(self.ROAD) - house_number_tag = other_token + if house_number_tag in template and road_tag in template: + road_after_house_number = template.index(road_tag) > template.index(house_number_tag) - if house_number_tag in template and road_tag in template: - road_after_house_number = template.index(road_tag) > template.index(house_number_tag) + if road_after_house_number and order == self.AFTER: + other = self.ROAD + elif not road_after_house_number and order == self.BEFORE: + other = self.ROAD + elif other == self.ROAD and component != self.HOUSE_NUMBER: + house_number_tag = self.tag_token(self.HOUSE_NUMBER) + road_tag = other_token - if road_after_house_number and order == self.AFTER: - other = self.ROAD - elif not road_after_house_number and order == self.BEFORE: - other = self.ROAD - elif other == self.ROAD and component != self.HOUSE_NUMBER: - house_number_tag = self.tag_token(self.HOUSE_NUMBER) - road_tag = other_token + if house_number_tag in template and road_tag in template: + road_before_house_number = template.index(road_tag) < template.index(house_number_tag) - if house_number_tag in template and road_tag in template: - road_before_house_number = template.index(road_tag) < template.index(house_number_tag) - - if road_before_house_number and order == self.AFTER: - other = self.HOUSE_NUMBER - elif not road_before_house_number and order == self.BEFORE: - other = self.HOUSE_NUMBER + if road_before_house_number and order == self.AFTER: + other = self.HOUSE_NUMBER + elif not road_before_house_number and order == self.BEFORE: + other = self.HOUSE_NUMBER if order == self.BEFORE and other_token in template: template = self.insert_component(template, component, before=other)