From 0c1b12b65cb2a04560b8bb82c5f98bb1d12e6cac Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 26 Jul 2016 18:00:38 -0400 Subject: [PATCH] [fix] Use local language with script e.g. ja_rm in place training data --- scripts/geodata/osm/formatter.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index f473e84a..870ce4c7 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -392,6 +392,15 @@ class OSMAddressFormatter(object): all_local_languages = set([l for l, d in local_languages]) random_languages = set(INTERNET_LANGUAGE_DISTRIBUTION) + language_defaults = OrderedDict(local_languages) + + for tag in tags: + if ':' in tag: + tag, lang = tag.rsplt(':', 1) + if lang.lower() not in all_local_languages and lang.lower().split('_', 1)[0] in all_local_languages: + local_languages.append((lang, language_defaults[lang.split('_')[0]])) + all_local_languages.add(lang) + more_than_one_official_language = len([lang for lang, default in local_languages if default]) > 1 containing_ids = [(b['type'], b['id']) for b in osm_components]