From 4344c5fdf37f508d90f68331eec22fdfa665a2fb Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 27 Dec 2016 23:10:36 -0500 Subject: [PATCH] [formatting] adding non-zero invert probabilities to all the former Soviet states. Other template insertions can still apply afterward for #125 --- resources/formatting/global.yaml | 63 +++++++++++++++++++ .../geodata/address_formatting/formatter.py | 7 ++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/resources/formatting/global.yaml b/resources/formatting/global.yaml index 4e9e0b77..525d3105 100644 --- a/resources/formatting/global.yaml +++ b/resources/formatting/global.yaml @@ -8,6 +8,9 @@ global: ko_rm: en zh_pinyin: en + # Former Soviet states sometimes write addresses generai-to-specific + invert_probability: 0.0 + insertions: # For each component, insertions are mutually exclusive # They don't have to sum to 1 (especially for components @@ -857,3 +860,63 @@ countries: kr_en: insertions: *us_template_insertions + + # Russia + ru: + invert_probability: 0.2 + + # Ukraine + uk: + invert_probability: 0.2 + + # Belarus (using inverted by default) + by: + invert_probability: 0.4 + + # Kazakhstan (using inverted by default) + kz: + invert_probability: 0.4 + + # Kyrgyzstan (using inverted by default) + kg: + invert_probability: 0.4 + + # Latvia + lv: + invert_probability: 0.2 + + # Lithuania + lt: + invert_probability: 0.2 + + # Estonia + ee: + invert_probability: 0.2 + + # Armenia + am: + invert_probability: 0.2 + + # Azerbaijan + az: + invert_probability: 0.2 + + # Georgia + ge: + invert_probability: 0.2 + + # Moldova + md: + invert_probability: 0.2 + + # Uzbekistan + uz: + invert_probability: 0.2 + + # Tajikistan + tj: + invert_probability: 0.2 + + # Turkmenistan + tm: + invert_probability: 0.2 diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index 64334ae9..52e2d0b2 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -630,8 +630,13 @@ class AddressFormatter(object): invert_probability = self.country_invert_probabilities.get(country, self.global_invert_probability) if random.random() < invert_probability: - template = self.inverted(template) cache_keys.append('inverted') + cache_key = tuple(sorted(cache_keys)) + if cache_key in self.template_cache: + template = self.template_cache[cache_key] + else: + template = self.inverted(template) + self.template_cache[cache_key] = template for component in sorted(components, key=self.component_order.get): scope = country