[cldr] simple Python scanner for creating dynamic scanners for CLDR rule parsing
This commit is contained in:
37
scripts/geodata/i18n/scanner.py
Normal file
37
scripts/geodata/i18n/scanner.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
|
||||
from geodata.encoding import safe_decode
|
||||
|
||||
class Scanner(object):
|
||||
'''
|
||||
Simple scanner implementation in Python using regular expression groups.
|
||||
Used to create dynamic lexicons for parsing various CLDR files
|
||||
without compiling a C scanner. Only C scanners are used at runtime
|
||||
'''
|
||||
|
||||
def __init__(self, lexicon, flags=re.VERBOSE | re.I | re.UNICODE):
|
||||
self.lexicon = lexicon
|
||||
|
||||
regexes, responses = zip(*lexicon)
|
||||
|
||||
self.regex = re.compile(u'|'.join([u'({})'.format(safe_decode(r)) for r in regexes]), flags)
|
||||
self.responses = responses
|
||||
|
||||
def scan(self, s):
|
||||
|
||||
for match in self.regex.finditer(safe_decode(s)):
|
||||
i = match.lastindex
|
||||
response = self.responses[i - 1]
|
||||
token = match.group(i)
|
||||
if not callable(response):
|
||||
yield (token, response)
|
||||
else:
|
||||
responses = response(match, token)
|
||||
if responses is not None:
|
||||
for response, token in responses:
|
||||
yield (token, response)
|
||||
Reference in New Issue
Block a user