[scripts] Better script code aliasing
This commit is contained in:
@@ -248,6 +248,7 @@ def get_word_break_properties():
|
|||||||
|
|
||||||
return dict(props)
|
return dict(props)
|
||||||
|
|
||||||
|
|
||||||
def build_master_scripts_list(chars):
|
def build_master_scripts_list(chars):
|
||||||
all_scripts = OrderedDict.fromkeys(filter(bool, chars))
|
all_scripts = OrderedDict.fromkeys(filter(bool, chars))
|
||||||
|
|
||||||
@@ -260,6 +261,12 @@ def build_master_scripts_list(chars):
|
|||||||
return all_scripts
|
return all_scripts
|
||||||
|
|
||||||
|
|
||||||
|
SCRIPT_ALIASES_SUPPLEMENTAL = {
|
||||||
|
'Hant': 'Han',
|
||||||
|
'Hans': 'Han'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_script_codes(all_scripts):
|
def get_script_codes(all_scripts):
|
||||||
temp_dir = tempfile.gettempdir()
|
temp_dir = tempfile.gettempdir()
|
||||||
script_codes_filename = os.path.join(temp_dir, ISO_15924_URL.rsplit('/')[-1])
|
script_codes_filename = os.path.join(temp_dir, ISO_15924_URL.rsplit('/')[-1])
|
||||||
@@ -289,9 +296,24 @@ def get_script_codes(all_scripts):
|
|||||||
script_codes[code] = normalized_name
|
script_codes[code] = normalized_name
|
||||||
seen_scripts.add(normalized_name)
|
seen_scripts.add(normalized_name)
|
||||||
|
|
||||||
|
value_aliases = get_property_value_aliases()
|
||||||
|
script_aliases = value_aliases['sc']
|
||||||
|
|
||||||
|
for code, script in script_aliases.iteritems():
|
||||||
|
if code not in script_codes and script in all_scripts:
|
||||||
|
script_codes[code] = script
|
||||||
|
|
||||||
|
script_codes.update(SCRIPT_ALIASES_SUPPLEMENTAL)
|
||||||
|
|
||||||
return script_codes
|
return script_codes
|
||||||
|
|
||||||
|
|
||||||
|
SCRIPT_CODE_ALIASES = {
|
||||||
|
'Jpan': ['Hani', 'Hira', 'Kana'],
|
||||||
|
'Kore': ['Hang', 'Han']
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def extract_language_scripts(xml):
|
def extract_language_scripts(xml):
|
||||||
language_scripts = defaultdict(list)
|
language_scripts = defaultdict(list)
|
||||||
|
|
||||||
@@ -301,7 +323,11 @@ def extract_language_scripts(xml):
|
|||||||
if not scripts:
|
if not scripts:
|
||||||
continue
|
continue
|
||||||
for script in scripts.split():
|
for script in scripts.split():
|
||||||
language_scripts[language_code].append(script)
|
script_aliases = SCRIPT_CODE_ALIASES.get(script)
|
||||||
|
if not script_aliases:
|
||||||
|
language_scripts[language_code].append(script)
|
||||||
|
else:
|
||||||
|
language_scripts[language_code].extend(script_aliases)
|
||||||
|
|
||||||
return language_scripts
|
return language_scripts
|
||||||
|
|
||||||
@@ -332,11 +358,11 @@ def get_script_languages(script_codes):
|
|||||||
for script in scripts:
|
for script in scripts:
|
||||||
script_code_languages[script].append(language)
|
script_code_languages[script].append(language)
|
||||||
|
|
||||||
script_languages = {}
|
script_languages = defaultdict(list)
|
||||||
|
|
||||||
for script_code, script_name in script_codes.iteritems():
|
for script_code, script_name in script_codes.iteritems():
|
||||||
langs = script_code_languages.get(script_code, [])
|
langs = script_code_languages.get(script_code, [])
|
||||||
script_languages[script_name] = langs
|
script_languages[script_name].extend(langs)
|
||||||
|
|
||||||
return script_languages
|
return script_languages
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user