[geonames] Adding LC_ALL environment variable for utf8 sorting
This commit is contained in:
@@ -500,7 +500,11 @@ def create_geonames_tsv(db, out_dir=DEFAULT_DATA_DIR):
|
|||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
logging.info('Sorting...')
|
logging.info('Sorting...')
|
||||||
subprocess.check_call(['sort', '-t\t', '-u', '--ignore-case',
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['LC_ALL'] = 'C'
|
||||||
|
|
||||||
|
command = ['sort', '-t\t', '-u', '--ignore-case',
|
||||||
'-k{0},{0}'.format(NAME_INDEX + 1),
|
'-k{0},{0}'.format(NAME_INDEX + 1),
|
||||||
# If there's a Wikipedia link to this name for the given id, sort first
|
# If there's a Wikipedia link to this name for the given id, sort first
|
||||||
'-k{0},{0}nr'.format(DUMMY_HAS_WIKIPEDIA_ENTRY_INDEX + 1),
|
'-k{0},{0}nr'.format(DUMMY_HAS_WIKIPEDIA_ENTRY_INDEX + 1),
|
||||||
@@ -514,7 +518,14 @@ def create_geonames_tsv(db, out_dir=DEFAULT_DATA_DIR):
|
|||||||
'-k{0},{0}nr'.format(PREFERRED_INDEX + 1),
|
'-k{0},{0}nr'.format(PREFERRED_INDEX + 1),
|
||||||
# since uniquing is done on the sort key, add language
|
# since uniquing is done on the sort key, add language
|
||||||
'-k{0},{0}'.format(LANGUAGE_INDEX + 1),
|
'-k{0},{0}'.format(LANGUAGE_INDEX + 1),
|
||||||
'-o', filename, temp_filename])
|
'-o', filename, temp_filename]
|
||||||
|
|
||||||
|
p = subprocess.Popen(command, env=env)
|
||||||
|
|
||||||
|
return_code = p.wait()
|
||||||
|
if return_code != 0:
|
||||||
|
raise subprocess.CalledProcessError(return_code, command)
|
||||||
|
|
||||||
os.unlink(temp_filename)
|
os.unlink(temp_filename)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user