From 1ef57ee7d217d7b6f1192a34d723919a72175d1b Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 26 Jul 2016 17:42:29 -0400 Subject: [PATCH] [i18n/postcodes] Fetching postcode regexes from the data source used by Google's libaddressinput, caches requests for the length of the running program (e.g. generating parser data, so the regexes will get updated over time). --- scripts/geodata/i18n/google.py | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 scripts/geodata/i18n/google.py diff --git a/scripts/geodata/i18n/google.py b/scripts/geodata/i18n/google.py new file mode 100644 index 00000000..988a6320 --- /dev/null +++ b/scripts/geodata/i18n/google.py @@ -0,0 +1,60 @@ +import re +import requests +import six.moves.urllib_parse as urlparse +import ujson + +requests.models.json = ujson + + +GOOGLE_I18N_API = 'http://i18napis.appspot.com' +GOOGLE_ADDRESS_DATA_API = urlparse.urljoin(GOOGLE_I18N_API, 'address/data/') + + +class GoogleI18N(object): + ''' + Fetches data from e.g. http://i18napis.appspot.com/address/data/GB + and caches it in a dictionary for each country. These requests are + lightweight, so for a given run of a program, max 250 requests + will be made. + ''' + def __init__(self): + self.responses = {} + + def get(self, country_code): + ret = self.responses.get(country_code.lower()) + + if ret is None: + url = urlparse.urljoin(GOOGLE_ADDRESS_DATA_API, country_code.upper()) + response = requests.get(url) + if response.ok: + ret = response.json() + self.responses[country_code.lower()] = ret + else: + self.responses[country_code.lower()] = {} + return ret + + +google_i18n = GoogleI18N() + + +class PostcodeRegexes(object): + def __init__(self): + self.responses = {} + self.postcode_regexes = {} + + def get(self, country_code): + ret = self.postcode_regexes.get(country_code.lower()) + if ret is None: + response = google_i18n.get(country_code) + if response: + postcode_expression = response.get('zip') + if not postcode_expression: + self.postcode_regexes[country_code.lower()] = None + return None + ret = re.compile(postcode_expression, re.I) + self.postcode_regexes[country_code.lower()] = ret + + return ret + + +postcode_regexes = PostcodeRegexes()