diff --git a/scripts/geodata/i18n/unicode_properties.py b/scripts/geodata/i18n/unicode_properties.py index 565421af..ff393779 100644 --- a/scripts/geodata/i18n/unicode_properties.py +++ b/scripts/geodata/i18n/unicode_properties.py @@ -38,6 +38,7 @@ from word_breaks import script_regex, regex_char_range SCRIPTS_DATA_DIR = os.path.join(UNICODE_DATA_DIR, 'scripts') LOCAL_SCRIPTS_FILE = os.path.join(SCRIPTS_DATA_DIR, 'Scripts.txt') +LOCAL_ISO_15924_FILE = os.path.join(SCRIPTS_DATA_DIR, 'iso15924.txt') BLOCKS_DATA_DIR = os.path.join(UNICODE_DATA_DIR, 'blocks') LOCAL_BLOCKS_FILE = os.path.join(BLOCKS_DATA_DIR, 'Blocks.txt') @@ -268,19 +269,26 @@ SCRIPT_ALIASES_SUPPLEMENTAL = { def get_script_codes(all_scripts): - temp_dir = tempfile.gettempdir() - script_codes_filename = os.path.join(temp_dir, ISO_15924_URL.rsplit('/')[-1]) - # This comes as a .zip - script_codes_response = requests.get(ISO_15924_URL) - zf = ZipFile(StringIO(script_codes_response.content)) - iso15924_filename = [name for name in zf.namelist() if name.startswith('iso15924')][0] + if not os.path.exists(LOCAL_ISO_15924_FILE): + temp_dir = tempfile.gettempdir() - # Strip out the comments, etc. - temp_iso15924_file = u'\n'.join([line.rstrip() for line in safe_decode(zf.read(iso15924_filename)).split('\n') - if line.strip() and not line.strip().startswith('#')]) + script_codes_filename = os.path.join(temp_dir, ISO_15924_URL.rsplit('/')[-1]) - script_codes_file = StringIO(safe_encode(temp_iso15924_file)) + # This comes as a .zip + script_codes_response = requests.get(ISO_15924_URL) + zf = ZipFile(StringIO(script_codes_response.content)) + iso15924_filename = [name for name in zf.namelist() if name.startswith('iso15924')][0] + + # Strip out the comments, etc. + temp_iso15924_file = u'\n'.join([line.rstrip() for line in safe_decode(zf.read(iso15924_filename)).split('\n') + if line.strip() and not line.strip().startswith('#')]) + + f = open(LOCAL_ISO_15924_FILE, 'w') + f.write(safe_encode(temp_iso15924_file)) + f.close() + + script_codes_file = open(LOCAL_ISO_15924_FILE) script_codes = {} seen_scripts = set()