38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import re
|
|
import os
|
|
import sys
|
|
|
|
this_dir = os.path.realpath(os.path.dirname(__file__))
|
|
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
|
|
|
from geodata.encoding import safe_decode
|
|
|
|
class Scanner(object):
|
|
'''
|
|
Simple scanner implementation in Python using regular expression groups.
|
|
Used to create dynamic lexicons for parsing various CLDR files
|
|
without compiling a C scanner. Only C scanners are used at runtime
|
|
'''
|
|
|
|
def __init__(self, lexicon, flags=re.VERBOSE | re.I | re.UNICODE):
|
|
self.lexicon = lexicon
|
|
|
|
regexes, responses = zip(*lexicon)
|
|
|
|
self.regex = re.compile(u'|'.join([u'({})'.format(safe_decode(r)) for r in regexes]), flags)
|
|
self.responses = responses
|
|
|
|
def scan(self, s):
|
|
|
|
for match in self.regex.finditer(safe_decode(s)):
|
|
i = match.lastindex
|
|
response = self.responses[i - 1]
|
|
token = match.group(i)
|
|
if not callable(response):
|
|
yield (token, response)
|
|
else:
|
|
responses = response(match, token)
|
|
if responses is not None:
|
|
for response, token in responses:
|
|
yield (token, response)
|