[cldr] simple Python scanner for creating dynamic scanners for CLDR rule parsing

2015-04-14 15:49:24 -04:00
parent efdcbc9eef
commit 5fa03587fb
1 changed files with 37 additions and 0 deletions
--- a/scripts/geodata/i18n/scanner.py
+++ b/scripts/geodata/i18n/scanner.py
@@ -0,0 +1,37 @@
 import re
 import os
 import sys
 this_dir = os.path.realpath(os.path.dirname(__file__))
 sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
 from geodata.encoding import safe_decode
 class Scanner(object):
    '''
    Simple scanner implementation in Python using regular expression groups.
    Used to create dynamic lexicons for parsing various CLDR files
    without compiling a C scanner. Only C scanners are used at runtime
    '''
    def __init__(self, lexicon, flags=re.VERBOSE | re.I | re.UNICODE):
        self.lexicon = lexicon
        regexes, responses = zip(*lexicon)
        self.regex = re.compile(u'|'.join([u'({})'.format(safe_decode(r)) for r in regexes]), flags)
        self.responses = responses
    def scan(self, s):
        for match in self.regex.finditer(safe_decode(s)):
            i = match.lastindex
            response = self.responses[i - 1]
            token = match.group(i)
            if not callable(response):
                yield (token, response)
            else:
                responses = response(match, token)
                if responses is not None:
                    for response, token in responses:
                        yield (token, response)