From 6ac4ff6021b918f1b793a617bd4e5126384638ad Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 31 May 2015 02:07:36 -0400 Subject: [PATCH] [transliteration] Adding reverse/bidirectional transforms e.g. for Katakana-Latin --- scripts/geodata/i18n/transliteration_rules.py | 129 +++++++++++++----- 1 file changed, 96 insertions(+), 33 deletions(-) diff --git a/scripts/geodata/i18n/transliteration_rules.py b/scripts/geodata/i18n/transliteration_rules.py index 74497ce1..a8eae57a 100644 --- a/scripts/geodata/i18n/transliteration_rules.py +++ b/scripts/geodata/i18n/transliteration_rules.py @@ -74,6 +74,13 @@ GROUP_INDICATOR_CHAR = u"\x06" BEGIN_SET_CHAR = u"\x0e" END_SET_CHAR = u"\x0f" +BIDIRECTIONAL_TRANSLITERATORS = { + 'fullwidth-halfwidth': 'halfwidth-fullwidth' +} + +REVERSE_TRANSLITERATORS = { + 'latin-katakana': 'katakana-latin', +} EXCLUDE_TRANSLITERATORS = set([ 'hangul-latin', @@ -206,7 +213,7 @@ CONTEXT_TYPE_REGEX = 'CONTEXT_TYPE_REGEX' all_transforms = set() pre_transform_full_regex = re.compile('::[\s]*(.*)[\s]*', re.UNICODE) -pre_transform_regex = re.compile('[\s]*([^\s\(\)]*)[\s]*(?:\(.*\)[\s]*)?', re.UNICODE) +pre_transform_regex = re.compile('[\s]*([^\s\(\)]*)[\s]*(?:\((.*)\)[\s]*)?', re.UNICODE) assignment_regex = re.compile(u"(?:[\s]*(\$[^\s\=]+)[\s]*\=[\s]*(?!=[\s])(.*)(?)|[←<→>↔])(?:[\s]*(?!=[\s])(.*)(?