[geonames] Only take alternative names that are != to the canonical name, sort by name, population desc, geonames_id
This commit is contained in:
@@ -103,6 +103,9 @@ geonames_fields = [
|
|||||||
DUMMY_BOUNDARY_TYPE_INDEX = [i for i, f in enumerate(geonames_fields)
|
DUMMY_BOUNDARY_TYPE_INDEX = [i for i, f in enumerate(geonames_fields)
|
||||||
if f.is_dummy][0]
|
if f.is_dummy][0]
|
||||||
|
|
||||||
|
GEONAMES_ID_INDEX = [i for i, f in enumerate(geonames_fields)
|
||||||
|
if f.c_constant == 'GEONAMES_ID'][0]
|
||||||
|
|
||||||
CANONICAL_NAME_INDEX = [i for i, f in enumerate(geonames_fields)
|
CANONICAL_NAME_INDEX = [i for i, f in enumerate(geonames_fields)
|
||||||
if f.c_constant == 'GEONAMES_CANONICAL'][0]
|
if f.c_constant == 'GEONAMES_CANONICAL'][0]
|
||||||
|
|
||||||
@@ -112,6 +115,9 @@ NAME_INDEX = [i for i, f in enumerate(geonames_fields)
|
|||||||
COUNTRY_CODE_INDEX = [i for i, f in enumerate(geonames_fields)
|
COUNTRY_CODE_INDEX = [i for i, f in enumerate(geonames_fields)
|
||||||
if f.c_constant == 'GEONAMES_COUNTRY_CODE'][0]
|
if f.c_constant == 'GEONAMES_COUNTRY_CODE'][0]
|
||||||
|
|
||||||
|
POPULATION_INDEX = [i for i, f in enumerate(geonames_fields)
|
||||||
|
if f.c_constant == 'GEONAMES_POPULATION'][0]
|
||||||
|
|
||||||
|
|
||||||
geonames_admin_joins = '''
|
geonames_admin_joins = '''
|
||||||
left join admin1_codes a1
|
left join admin1_codes a1
|
||||||
@@ -149,6 +155,7 @@ join alternate_names an
|
|||||||
on an.geonames_id = gn.geonames_id
|
on an.geonames_id = gn.geonames_id
|
||||||
and iso_language not in ('doi','faac','iata',
|
and iso_language not in ('doi','faac','iata',
|
||||||
'icao','link','post','tcid')
|
'icao','link','post','tcid')
|
||||||
|
and an.alternate_name != gn.name
|
||||||
{admin_joins}
|
{admin_joins}
|
||||||
{{predicate}}
|
{{predicate}}
|
||||||
'''.format(
|
'''.format(
|
||||||
@@ -301,7 +308,11 @@ def create_geonames_tsv(db, out_dir=DEFAULT_DATA_DIR):
|
|||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
logging.info('Sorting...')
|
logging.info('Sorting...')
|
||||||
subprocess.check_call(['sort', '-t\t', '-k1,1', '-k2,2', '-o', filename, temp_filename])
|
subprocess.check_call(['sort', '-t\t', '-u',
|
||||||
|
'-k{0},{0}'.format(NAME_INDEX + 1),
|
||||||
|
'-k{0},{0}nr'.format(POPULATION_INDEX + 1),
|
||||||
|
'-k{0},{0}'.format(GEONAMES_ID_INDEX + 1),
|
||||||
|
'-o', filename, temp_filename])
|
||||||
os.unlink(temp_filename)
|
os.unlink(temp_filename)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user