From 85ad3bf0f415a0be077aa2a54653c464b7291f57 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 9 Sep 2016 01:38:39 -0400 Subject: [PATCH] [formatting] allowing a non-default option for components that can be inserted between road and house number --- resources/formatting/global.yaml | 1 + .../geodata/address_formatting/formatter.py | 55 ++++++++++--------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/resources/formatting/global.yaml b/resources/formatting/global.yaml index 5da8a6c2..9bde86ab 100644 --- a/resources/formatting/global.yaml +++ b/resources/formatting/global.yaml @@ -674,6 +674,7 @@ countries: city_before_road: before: road probability: 0.001 + allow_between_house_number_and_road: true # Andorra, uses same template as France ad_ca: diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index cc8a15d5..3f02e07c 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -265,14 +265,16 @@ class AddressFormatter(object): if k == 'conditional': continue + allow_between_house_number_and_road = v.get('allow_between_house_number_and_road', False) + if 'before' in v: - val = (self.BEFORE, v['before']) + val = (self.BEFORE, v['before'], allow_between_house_number_and_road) elif 'after' in v: - val = (self.AFTER, v['after']) + val = (self.AFTER, v['after'], allow_between_house_number_and_road) elif 'last' in v: - val = (self.LAST, None) + val = (self.LAST, None, False) elif 'first' in v: - val = (self.FIRST, None) + val = (self.FIRST, None, False) else: raise ValueError('Insertions must contain one of {{first, before, after, last}}. Value was: {}'.format(v)) @@ -597,17 +599,17 @@ class AddressFormatter(object): conditional_insertions = v break - order, other = None, None + order, other, allow_between_house_number_and_road = None, None, False # Check the conditional probabilities first if conditional_insertions is not None: values, probs = conditional_insertions - order, other = weighted_choice(values, probs) + order, other, allow_between_house_number_and_road = weighted_choice(values, probs) # If there are no conditional probabilites or the "default" value was chosen, sample from the marginals if other is None: values, probs = insertions - order, other = weighted_choice(values, probs) + order, other, allow_between_house_number_and_road = weighted_choice(values, probs) # Even though we may change the value of "other" below, use # the original cache key because changes from here on are @@ -630,30 +632,31 @@ class AddressFormatter(object): # house_number, unit, road, which we don't want. So effectively # treat house_number and road as an atomic unit. - other_token = self.tag_token(other) + if not allow_between_house_number_and_road: + other_token = self.tag_token(other) - if other == self.HOUSE_NUMBER and component != self.ROAD: - road_tag = self.tag_token(self.ROAD) - house_number_tag = other_token + if other == self.HOUSE_NUMBER and component != self.ROAD: + road_tag = self.tag_token(self.ROAD) + house_number_tag = other_token - if house_number_tag in template and road_tag in template: - road_after_house_number = template.index(road_tag) > template.index(house_number_tag) + if house_number_tag in template and road_tag in template: + road_after_house_number = template.index(road_tag) > template.index(house_number_tag) - if road_after_house_number and order == self.AFTER: - other = self.ROAD - elif not road_after_house_number and order == self.BEFORE: - other = self.ROAD - elif other == self.ROAD and component != self.HOUSE_NUMBER: - house_number_tag = self.tag_token(self.HOUSE_NUMBER) - road_tag = other_token + if road_after_house_number and order == self.AFTER: + other = self.ROAD + elif not road_after_house_number and order == self.BEFORE: + other = self.ROAD + elif other == self.ROAD and component != self.HOUSE_NUMBER: + house_number_tag = self.tag_token(self.HOUSE_NUMBER) + road_tag = other_token - if house_number_tag in template and road_tag in template: - road_before_house_number = template.index(road_tag) < template.index(house_number_tag) + if house_number_tag in template and road_tag in template: + road_before_house_number = template.index(road_tag) < template.index(house_number_tag) - if road_before_house_number and order == self.AFTER: - other = self.HOUSE_NUMBER - elif not road_before_house_number and order == self.BEFORE: - other = self.HOUSE_NUMBER + if road_before_house_number and order == self.AFTER: + other = self.HOUSE_NUMBER + elif not road_before_house_number and order == self.BEFORE: + other = self.HOUSE_NUMBER if order == self.BEFORE and other_token in template: template = self.insert_component(template, component, before=other)