[transliteration] Doing HTML escapes first in Latin-ASCII transliteration as they may need to be resolved further in subsequent steps
This commit is contained in:
@@ -1185,11 +1185,11 @@ EXISTING_STEP = 'EXISTING_STEP'
|
|||||||
PREPEND_STEP = 'PREPEND_STEP'
|
PREPEND_STEP = 'PREPEND_STEP'
|
||||||
|
|
||||||
|
|
||||||
html_escapes = {'&{};'.format(name): escape_string(safe_encode(unichr(value)))
|
html_escapes = {'&{};'.format(name): safe_encode(unichr(value))
|
||||||
for name, value in htmlentitydefs.name2codepoint.iteritems()
|
for name, value in htmlentitydefs.name2codepoint.iteritems()
|
||||||
}
|
}
|
||||||
|
|
||||||
html_escapes.update({'&#{};'.format(i): escape_string(safe_encode(unichr(i)))
|
html_escapes.update({'&#{};'.format(i): safe_encode(unichr(i))
|
||||||
for i in xrange(NUM_CHARS)
|
for i in xrange(NUM_CHARS)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -1204,6 +1204,10 @@ latin_lower_rule = quote_string(latin_lower_rule)
|
|||||||
supplemental_transliterations = {
|
supplemental_transliterations = {
|
||||||
'latin-ascii': [
|
'latin-ascii': [
|
||||||
# Prepend transformations get applied in the reverse order of their appearance here
|
# Prepend transformations get applied in the reverse order of their appearance here
|
||||||
|
(PREPEND_STEP, [(quote_string(name), str(len(name)), CONTEXT_TYPE_NONE, '0', 'NULL', '0', CONTEXT_TYPE_NONE, '0', 'NULL', '0', quote_string(escape_string(value)), str(len(value)), 'NULL', '0', 'NULL', '0')
|
||||||
|
for name, value in html_escapes.iteritems()
|
||||||
|
]
|
||||||
|
),
|
||||||
(PREPEND_STEP, [
|
(PREPEND_STEP, [
|
||||||
# German transliterations not handled by standard NFD normalization
|
# German transliterations not handled by standard NFD normalization
|
||||||
# ä => ae
|
# ä => ae
|
||||||
@@ -1241,10 +1245,7 @@ supplemental_transliterations = {
|
|||||||
|
|
||||||
|
|
||||||
]),
|
]),
|
||||||
(PREPEND_STEP, [(quote_string(name), str(len(name)), CONTEXT_TYPE_NONE, '0', 'NULL', '0', CONTEXT_TYPE_NONE, '0', 'NULL', '0', quote_string(value), '0', 'NULL', '0', 'NULL', '0')
|
|
||||||
for name, value in html_escapes.iteritems()
|
|
||||||
]
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user