[cldr] simple Python scanner for creating dynamic scanners for CLDR rule parsing
This commit is contained in:
37
scripts/geodata/i18n/scanner.py
Normal file
37
scripts/geodata/i18n/scanner.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import re
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||||
|
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||||
|
|
||||||
|
from geodata.encoding import safe_decode
|
||||||
|
|
||||||
|
class Scanner(object):
|
||||||
|
'''
|
||||||
|
Simple scanner implementation in Python using regular expression groups.
|
||||||
|
Used to create dynamic lexicons for parsing various CLDR files
|
||||||
|
without compiling a C scanner. Only C scanners are used at runtime
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, lexicon, flags=re.VERBOSE | re.I | re.UNICODE):
|
||||||
|
self.lexicon = lexicon
|
||||||
|
|
||||||
|
regexes, responses = zip(*lexicon)
|
||||||
|
|
||||||
|
self.regex = re.compile(u'|'.join([u'({})'.format(safe_decode(r)) for r in regexes]), flags)
|
||||||
|
self.responses = responses
|
||||||
|
|
||||||
|
def scan(self, s):
|
||||||
|
|
||||||
|
for match in self.regex.finditer(safe_decode(s)):
|
||||||
|
i = match.lastindex
|
||||||
|
response = self.responses[i - 1]
|
||||||
|
token = match.group(i)
|
||||||
|
if not callable(response):
|
||||||
|
yield (token, response)
|
||||||
|
else:
|
||||||
|
responses = response(match, token)
|
||||||
|
if responses is not None:
|
||||||
|
for response, token in responses:
|
||||||
|
yield (token, response)
|
||||||
Reference in New Issue
Block a user