From eb7488ab556a4dcb4145b0ce4d4bf77dfba5cda8 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 23 Nov 2015 13:46:14 -0500 Subject: [PATCH] [fix] Making country replacement probability independent of the probability used for local vs non-local languages --- scripts/geodata/osm/osm_address_training_data.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 651f7bee..ee3c22b2 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -447,11 +447,12 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood non_local_language = None - r = random.random() - if r < 0.2: - # 20% of the time: add Quattroshapes country + if random.random() < 0.3: + # 30% of the time: add Quattroshapes country address_country = country.upper() + r = random.random() + # 1. 60% of the time: use the country name in the current language or the country's local language if address_country and r < 0.6: localized = None