[scripts] Better script code aliasing

This commit is contained in:
Al
2015-08-13 18:25:55 -04:00
parent b54ff95ecc
commit 03febc7e20

View File

@@ -248,6 +248,7 @@ def get_word_break_properties():
return dict(props)
def build_master_scripts_list(chars):
all_scripts = OrderedDict.fromkeys(filter(bool, chars))
@@ -260,6 +261,12 @@ def build_master_scripts_list(chars):
return all_scripts
SCRIPT_ALIASES_SUPPLEMENTAL = {
'Hant': 'Han',
'Hans': 'Han'
}
def get_script_codes(all_scripts):
temp_dir = tempfile.gettempdir()
script_codes_filename = os.path.join(temp_dir, ISO_15924_URL.rsplit('/')[-1])
@@ -289,9 +296,24 @@ def get_script_codes(all_scripts):
script_codes[code] = normalized_name
seen_scripts.add(normalized_name)
value_aliases = get_property_value_aliases()
script_aliases = value_aliases['sc']
for code, script in script_aliases.iteritems():
if code not in script_codes and script in all_scripts:
script_codes[code] = script
script_codes.update(SCRIPT_ALIASES_SUPPLEMENTAL)
return script_codes
SCRIPT_CODE_ALIASES = {
'Jpan': ['Hani', 'Hira', 'Kana'],
'Kore': ['Hang', 'Han']
}
def extract_language_scripts(xml):
language_scripts = defaultdict(list)
@@ -301,7 +323,11 @@ def extract_language_scripts(xml):
if not scripts:
continue
for script in scripts.split():
language_scripts[language_code].append(script)
script_aliases = SCRIPT_CODE_ALIASES.get(script)
if not script_aliases:
language_scripts[language_code].append(script)
else:
language_scripts[language_code].extend(script_aliases)
return language_scripts
@@ -332,11 +358,11 @@ def get_script_languages(script_codes):
for script in scripts:
script_code_languages[script].append(language)
script_languages = {}
script_languages = defaultdict(list)
for script_code, script_name in script_codes.iteritems():
langs = script_code_languages.get(script_code, [])
script_languages[script_name] = langs
script_languages[script_name].extend(langs)
return script_languages