From 973466bb1370fc20f54b6acb8209c46b2994b8a3 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 9 Dec 2016 19:48:59 -0500 Subject: [PATCH] [states] adding multiple state abbreviations for states that can have periods in the naem like D.C., D.F. in Mexico and Brasil, etc. --- resources/states/au.yaml | 28 +++++++++++++++++++ resources/states/br.yaml | 4 +++ resources/states/mx.yaml | 4 +++ resources/states/us.yaml | 4 +++ scripts/geodata/states/state_abbreviations.py | 25 ++++++++++------- 5 files changed, 55 insertions(+), 10 deletions(-) diff --git a/resources/states/au.yaml b/resources/states/au.yaml index 68b02f0a..c8019f13 100644 --- a/resources/states/au.yaml +++ b/resources/states/au.yaml @@ -1,20 +1,48 @@ +"A.A.T": + en: Australian Antarctic Territory +"A.A.T.": + en: Australian Antarctic Territory AAT: en: Australian Antarctic Territory +"A.C.T": + en: Australian Capital Territory +"A.C.T.": + en: Australian Capital Territory ACT: en: Australian Capital Territory +"J.B.T.": + en: Jervis Bay Territory +"J.B.T": + en: Jervis Bay Territory JBT: en: Jervis Bay Territory +"N.S.W": + en: New South Wales +"N.S.W.": + en: New South Wales NSW: en: New South Wales +"N.T": + en: Northern Territory +"N.T.": + en: Northern Territory NT: en: Northern Territory QLD: en: Queensland +"S.A": + en: South Australia +"S.A.": + en: South Australia SA: en: South Australia TAS: en: Tasmania VIC: en: Victoria +"W.A": + en: Western Australia +"W.A.": + en: Western Australia WA: en: Western Australia \ No newline at end of file diff --git a/resources/states/br.yaml b/resources/states/br.yaml index 752c875f..f64c0445 100644 --- a/resources/states/br.yaml +++ b/resources/states/br.yaml @@ -10,6 +10,10 @@ BA: pt: Bahia CE: pt: CearĂ¡ +"D.F": + pt: Distrito Federal +"D.F.": + pt: Distrito Federal DF: pt: Distrito Federal ES: diff --git a/resources/states/mx.yaml b/resources/states/mx.yaml index 7412db6c..dfd69811 100644 --- a/resources/states/mx.yaml +++ b/resources/states/mx.yaml @@ -14,6 +14,10 @@ CA: es: Coahuila CL: es: Colima +"D.F.": + es: Distrito Federal +"D.F": + es: Distrito Federal DF: es: Distrito Federal DU: diff --git a/resources/states/us.yaml b/resources/states/us.yaml index 5e3862e0..7e971448 100644 --- a/resources/states/us.yaml +++ b/resources/states/us.yaml @@ -12,6 +12,10 @@ CO: en: Colorado CT: en: Connecticut +"D.C": + en: District of Columbia +"D.C.": + en: District of Columbia DC: en: District of Columbia DE: diff --git a/scripts/geodata/states/state_abbreviations.py b/scripts/geodata/states/state_abbreviations.py index 07d980be..e3ad15d1 100644 --- a/scripts/geodata/states/state_abbreviations.py +++ b/scripts/geodata/states/state_abbreviations.py @@ -1,4 +1,5 @@ import os +import random import six import sys import yaml @@ -24,30 +25,34 @@ class StateAbbreviations(object): country = filename.split('.yaml')[0] country_config = yaml.load(open(os.path.join(base_dir, filename))) - country_abbreviations = defaultdict(dict) - country_full_names = defaultdict(dict) + country_abbreviations = defaultdict(list) + country_full_names = defaultdict(list) for abbreviation, vals in six.iteritems(country_config): for language, full_name in six.iteritems(vals): full_name = safe_decode(full_name) abbreviation = safe_decode(abbreviation) - country_abbreviations[full_name.lower()][language] = abbreviation - country_full_names[abbreviation.lower()][language] = full_name + country_abbreviations[(full_name.lower(), language)].append(abbreviation) + country_full_names[(abbreviation.lower(), language)].append(full_name) self.abbreviations[country] = dict(country_abbreviations) self.full_names[country] = dict(country_full_names) def get_abbreviation(self, country, language, state, default=None): - value = nested_get(self.abbreviations, (country.lower(), state.lower(), language.lower())) - if value is DoesNotExist: + values = nested_get(self.abbreviations, (country.lower(), (state.lower(), language.lower()))) + if values is DoesNotExist: return default - return value + if len(values) == 1: + return values[0] + return random.choice(values) def get_full_name(self, country, language, state, default=None): - value = nested_get(self.full_names, (country.lower(), state.lower(), language.lower())) - if value is DoesNotExist: + values = nested_get(self.full_names, (country.lower(), (state.lower(), language.lower()))) + if values is DoesNotExist: return default - return value + if len(values) == 1: + return values[0] + return random.choice(values) state_abbreviations = StateAbbreviations()