From aeffac10478cc159a18aed259fc734cc5fbc9e51 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 17 May 2016 21:20:36 -0400 Subject: [PATCH] [formatting] conditional probabilities of template swaps in formatter config --- resources/formatting/global.yaml | 275 ++++++++++++++++++++++++++----- 1 file changed, 233 insertions(+), 42 deletions(-) diff --git a/resources/formatting/global.yaml b/resources/formatting/global.yaml index 655662ef..4afa75d4 100644 --- a/resources/formatting/global.yaml +++ b/resources/formatting/global.yaml @@ -1,22 +1,9 @@ global: # Add these components to templates that don't have them admin_components: - subdivision: - after: - - road - before: - - suburb - - city_district - - city - - island - - state_district - - state - - postcode - - country suburb: after: - road - - subdivision before: - city_district - city @@ -36,7 +23,7 @@ global: - state - postcode - country - # This is added to all the templates but only makes it in + # This is added to all the templates but only makes it into island nations, Hawaii, etc. island: after: - road @@ -134,10 +121,87 @@ global: first: true probability: 0.1 + care_of: + care_of_after_attention: + after: attention + probability: 0.9 + care_of_after_house: + after: house + probability: 0.1 + + subdivision: + subdivision_before_suburb: + before: suburb + probability: 1.0 + # Overrides for languages (better for e.g. covering all French-speaking countries) languages: en: insertions: + building: + building_after_house: + after: house + probability: 0.6 + building_after_road: + after: road + probability: 0.3 + building_before_suburb: + before: suburb + probability: 0.1 + conditional: + - component: subdivision + probabilities: + building_after_house: + after: house + probability: 0.6 + building_after_road: + after: road + probability: 0.2 + building_before_subdivision: + before: subdivision + probability: 0.2 + entrance: + # e.g. 123 East 45th St, 6th Floor, NYC + entrance_after_road: + after: road + probability: 0.75 + entrance_before_house: + before: house + probability: 0.1 + # e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London + entrance_before_house_number: + before: house_number + probability: 0.15 + conditional: + - component: building + probabilities: + # default: 0.8 + entrance_after_building: + after: building + probability: 0.2 + + + staircase: + # e.g. 123 East 45th St, Staircase C, NYC + staircase_after_road: + after: road + probability: 0.5 + # e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London + staircase_before_house: + before: house + probability: 0.1 + # e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London + staircase_before_house_number: + before: house_number + probability: 0.4 + conditional: + - component: entrance + probabilities: + # default: 0.1 + staircase_after_entrance: + after: entrance + probability: 0.9 + level: # e.g. 123 East 45th St, 6th Floor, NYC level_after_road: @@ -148,10 +212,24 @@ languages: before: house probability: 0.25 # e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London - level_before_road: - before: road + level_before_house_number: + before: house_number probability: 0.25 + conditional: + - component: staircase + probabilities: + # default: 0.4 + level_after_staircase: + after: staircase + probability: 0.6 + - component: entrance + probabilities: + # default: 0.4 + level_after_entrance: + after: entrance + probability: 0.6 + unit: # e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London unit_before_house: @@ -166,20 +244,87 @@ languages: # e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK) unit_after_road: after: road - probability: 0.1 + probability: 0.2 - # e.g. Floor 5, Apt 6 - unit_after_level: - after: level - probability: 0.09 - - # e.g. Apt. 6, 5/F (less common) - unit_before_level: - before: level - probability: 0.01 + conditional: + - component: level + probabilities: + unit_before_house: + before: house + probability: 0.1 + # e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London + unit_before_house_number: + before: house_number + probability: 0.1 + # e.g. Floor 5, Apt 6 + unit_after_level: + after: level + probability: 0.79 + # e.g. Apt. 6, 5/F (less common) + unit_before_level: + before: level + probability: 0.01 es: insertions: + building: + building_after_house_number: + after: house_number + probability: 0.8 + building_after_house: + after: house + probability: 0.05 + building_before_suburb: + before: suburb + probability: 0.15 + conditional: + - component: subdivision + probabilities: + building_after_house_number: + after: house_number + probability: 0.8 + building_before_subdivision: + before: subdivision + probability: 0.2 + + entrance: + # e.g. 123 East 45th St, 6th Floor, NYC + entrance_after_house_number: + after: house_number + probability: 0.8 + entrance_before_house: + before: house + probability: 0.2 + conditional: + - component: building + probabilities: + entrance_after_building: + after: building + probability: 0.9 + + staircase: + # e.g. 123 East 45th St, Staircase C, NYC + staircase_after_house_number: + after: house_number + probability: 0.8 + # e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London + staircase_before_house: + before: house + probability: 0.2 + conditional: + - component: entrance + probabilities: + # default: 0.1 + staircase_after_entrance: + after: entrance + probability: 0.9 + - component: building + probabilities: + # default: 0.1 + staircase_after_building: + after: building + probability: 0.9 + level: # e.g. Calle Ruiz de Alarcón 23 piso 3 level_after_house_number: @@ -194,21 +339,67 @@ languages: before: road probability: 0.1 + conditional: + - component: staircase + probabilities: + # default: 0.1 + level_after_staircase: + after: staircase + probability: 0.9 + - component: entrance + probabilities: + # default: 0.1 + level_after_entrance: + after: entrance + probability: 0.9 + - component: building + probabilities: + # default: 0.1 + level_after_building: + after: building + probability: 0.9 + unit: unit_before_house: before: house - probability: 0.05 - unit_before_house_number: - before: house_number - probability: 0.05 - # e.g. Piso 3 Dpto 12 (most common) - unit_after_level: - after: level - probability: 0.8 - # e.g. Apto 6, 2o piso (less common) - unit_before_level: - before: level + probability: 0.2 + unit_after_house_number: + after: house_number + probability: 0.7 + unit_before_road: + before: road probability: 0.1 + conditional: + - component: level + probabilities: + # default: 0.1 + # e.g. Piso 3 Dpto 12 (most common) + unit_after_level: + after: level + probability: 0.85 + # e.g. Apto 6, 2o piso (less common) + unit_before_level: + before: level + probability: 0.05 + - component: staircase + probabilities: + # default: 0.1 + unit_after_staircase: + after: staircase + probability: 0.9 + - component: entrance + probabilities: + # default: 0.1 + unit_after_entrance: + after: entrance + probability: 0.9 + - component: building + probabilities: + # default: 0.1 + unit_after_building: + after: building + probability: 0.9 + fr: # libpostal issue #27 @@ -247,15 +438,15 @@ countries: # e.g. 123 East 45th St, 6th Floor, NYC level_after_road: after: road - probability: 0.75 + probability: 0.875 # e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London level_before_house: before: house - probability: 0.125 + probability: 0.005 # e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London - level_before_road: - before: road - probability: 0.125 + level_before_house_number: + before: house_number + probability: 0.12 unit: # e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London