[fix] string lengths on the various transliteration rules
This commit is contained in:
@@ -55,7 +55,7 @@ WORD_BOUNDARY_VAR = '${}'.format(WORD_BOUNDARY_VAR_NAME)
|
|||||||
|
|
||||||
word_boundary_var_regex = re.compile(WORD_BOUNDARY_VAR.replace('$', '\$'))
|
word_boundary_var_regex = re.compile(WORD_BOUNDARY_VAR.replace('$', '\$'))
|
||||||
|
|
||||||
EMPTY_TRANSITION = u'\u007f'
|
EMPTY_TRANSITION = u'\u0000'
|
||||||
|
|
||||||
EXCLUDE_TRANSLITERATORS = set([
|
EXCLUDE_TRANSLITERATORS = set([
|
||||||
'Hangul-Latin',
|
'Hangul-Latin',
|
||||||
@@ -509,6 +509,10 @@ def char_permutations(s):
|
|||||||
string_replacements = {
|
string_replacements = {
|
||||||
u'[': u'\[',
|
u'[': u'\[',
|
||||||
u']': u'\]',
|
u']': u'\]',
|
||||||
|
u'(': u'\(',
|
||||||
|
u')': u'\)',
|
||||||
|
u'\\': u'\\\\',
|
||||||
|
u'\u0000': '',
|
||||||
u'': EMPTY_TRANSITION,
|
u'': EMPTY_TRANSITION,
|
||||||
u'*': u'\*',
|
u'*': u'\*',
|
||||||
u'+': u'\+',
|
u'+': u'\+',
|
||||||
@@ -566,15 +570,16 @@ def format_groups(char_types, groups):
|
|||||||
charset_regex = re.compile(r'(?<!\\)\[')
|
charset_regex = re.compile(r'(?<!\\)\[')
|
||||||
|
|
||||||
|
|
||||||
def encode_string(s):
|
def escape_string(s):
|
||||||
return safe_encode(s).encode('string-escape')
|
return s.encode('string-escape')
|
||||||
|
|
||||||
|
|
||||||
def format_rule(rule):
|
def format_rule(rule):
|
||||||
'''
|
'''
|
||||||
Creates the C literal for a given transliteration rule
|
Creates the C literal for a given transliteration rule
|
||||||
'''
|
'''
|
||||||
key = rule[0]
|
key = safe_encode(rule[0])
|
||||||
|
key_len = len(key)
|
||||||
|
|
||||||
pre_context_type = rule[1]
|
pre_context_type = rule[1]
|
||||||
pre_context = rule[2]
|
pre_context = rule[2]
|
||||||
@@ -582,8 +587,9 @@ def format_rule(rule):
|
|||||||
pre_context = 'NULL'
|
pre_context = 'NULL'
|
||||||
pre_context_len = 0
|
pre_context_len = 0
|
||||||
else:
|
else:
|
||||||
|
pre_context = safe_encode(pre_context)
|
||||||
pre_context_len = len(pre_context)
|
pre_context_len = len(pre_context)
|
||||||
pre_context = quote_string(encode_string(pre_context))
|
pre_context = quote_string(escape_string(pre_context))
|
||||||
|
|
||||||
pre_context_max_len = rule[3]
|
pre_context_max_len = rule[3]
|
||||||
|
|
||||||
@@ -594,8 +600,9 @@ def format_rule(rule):
|
|||||||
post_context = 'NULL'
|
post_context = 'NULL'
|
||||||
post_context_len = 0
|
post_context_len = 0
|
||||||
else:
|
else:
|
||||||
|
post_context = safe_encode(post_context)
|
||||||
post_context_len = len(post_context)
|
post_context_len = len(post_context)
|
||||||
post_context = quote_string(encode_string(post_context))
|
post_context = quote_string(escape_string(post_context))
|
||||||
|
|
||||||
post_context_max_len = rule[6]
|
post_context_max_len = rule[6]
|
||||||
|
|
||||||
@@ -604,15 +611,17 @@ def format_rule(rule):
|
|||||||
groups = 'NULL'
|
groups = 'NULL'
|
||||||
groups_len = 0
|
groups_len = 0
|
||||||
else:
|
else:
|
||||||
|
groups = safe_encode(groups)
|
||||||
groups_len = len(groups)
|
groups_len = len(groups)
|
||||||
groups = quote_string(encode_string(groups))
|
groups = quote_string(escape_string(groups))
|
||||||
|
|
||||||
replacement = rule[8]
|
replacement = safe_encode(rule[8])
|
||||||
|
replacement_len = len(replacement)
|
||||||
move = rule[9]
|
move = rule[9]
|
||||||
|
|
||||||
output_rule = (
|
output_rule = (
|
||||||
quote_string(encode_string(key)),
|
quote_string(escape_string(key)),
|
||||||
str(len(key)),
|
str(key_len),
|
||||||
pre_context_type,
|
pre_context_type,
|
||||||
str(pre_context_max_len),
|
str(pre_context_max_len),
|
||||||
pre_context,
|
pre_context,
|
||||||
@@ -623,8 +632,8 @@ def format_rule(rule):
|
|||||||
post_context,
|
post_context,
|
||||||
str(post_context_len),
|
str(post_context_len),
|
||||||
|
|
||||||
quote_string(encode_string(replacement)),
|
quote_string(escape_string(replacement)),
|
||||||
str(len(replacement)),
|
str(replacement_len),
|
||||||
str(move),
|
str(move),
|
||||||
groups,
|
groups,
|
||||||
str(groups_len),
|
str(groups_len),
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user