[cldr] simple Python scanner for creating dynamic scanners for CLDR rule parsing

This commit is contained in:
Al
2015-04-14 15:49:24 -04:00
parent efdcbc9eef
commit 5fa03587fb

View File

@@ -0,0 +1,37 @@
import re
import os
import sys
this_dir = os.path.realpath(os.path.dirname(__file__))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
from geodata.encoding import safe_decode
class Scanner(object):
'''
Simple scanner implementation in Python using regular expression groups.
Used to create dynamic lexicons for parsing various CLDR files
without compiling a C scanner. Only C scanners are used at runtime
'''
def __init__(self, lexicon, flags=re.VERBOSE | re.I | re.UNICODE):
self.lexicon = lexicon
regexes, responses = zip(*lexicon)
self.regex = re.compile(u'|'.join([u'({})'.format(safe_decode(r)) for r in regexes]), flags)
self.responses = responses
def scan(self, s):
for match in self.regex.finditer(safe_decode(s)):
i = match.lastindex
response = self.responses[i - 1]
token = match.group(i)
if not callable(response):
yield (token, response)
else:
responses = response(match, token)
if responses is not None:
for response, token in responses:
yield (token, response)