From cf706158508bff5c155fab471858589921c96269 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 11 Aug 2015 23:10:55 -0400 Subject: [PATCH] [transliteration] Doing HTML escapes first in Latin-ASCII transliteration as they may need to be resolved further in subsequent steps --- scripts/geodata/i18n/transliteration_rules.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/geodata/i18n/transliteration_rules.py b/scripts/geodata/i18n/transliteration_rules.py index 70c452e7..5518a7a3 100644 --- a/scripts/geodata/i18n/transliteration_rules.py +++ b/scripts/geodata/i18n/transliteration_rules.py @@ -1185,11 +1185,11 @@ EXISTING_STEP = 'EXISTING_STEP' PREPEND_STEP = 'PREPEND_STEP' -html_escapes = {'&{};'.format(name): escape_string(safe_encode(unichr(value))) +html_escapes = {'&{};'.format(name): safe_encode(unichr(value)) for name, value in htmlentitydefs.name2codepoint.iteritems() } -html_escapes.update({'&#{};'.format(i): escape_string(safe_encode(unichr(i))) +html_escapes.update({'&#{};'.format(i): safe_encode(unichr(i)) for i in xrange(NUM_CHARS) }) @@ -1204,6 +1204,10 @@ latin_lower_rule = quote_string(latin_lower_rule) supplemental_transliterations = { 'latin-ascii': [ # Prepend transformations get applied in the reverse order of their appearance here + (PREPEND_STEP, [(quote_string(name), str(len(name)), CONTEXT_TYPE_NONE, '0', 'NULL', '0', CONTEXT_TYPE_NONE, '0', 'NULL', '0', quote_string(escape_string(value)), str(len(value)), 'NULL', '0', 'NULL', '0') + for name, value in html_escapes.iteritems() + ] + ), (PREPEND_STEP, [ # German transliterations not handled by standard NFD normalization # รค => ae @@ -1241,10 +1245,7 @@ supplemental_transliterations = { ]), - (PREPEND_STEP, [(quote_string(name), str(len(name)), CONTEXT_TYPE_NONE, '0', 'NULL', '0', CONTEXT_TYPE_NONE, '0', 'NULL', '0', quote_string(value), '0', 'NULL', '0', 'NULL', '0') - for name, value in html_escapes.iteritems() - ] - ), + ], }