From 824c779107ab31b27c2bfd7be543c96910bdd803 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 24 Nov 2015 23:22:57 -0500 Subject: [PATCH] [fix] Cutting down training repeatedly on country names --- scripts/geodata/osm/osm_address_training_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index bee89c27..68138070 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -707,6 +707,9 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir): have_country = True _ = value.pop(k, None) + if have_country and random.random() < 0.8: + have_country = False + if not value: continue