[geonames] Removing alternate names from geonames data set which are digits-only (most are not legitimate)
This commit is contained in:
@@ -458,6 +458,11 @@ def create_geonames_tsv(db, out_dir=DEFAULT_DATA_DIR):
|
|||||||
geonames_id = row[GEONAMES_ID_INDEX]
|
geonames_id = row[GEONAMES_ID_INDEX]
|
||||||
|
|
||||||
name = utf8_normalize(safe_decode(row[NAME_INDEX]))
|
name = utf8_normalize(safe_decode(row[NAME_INDEX]))
|
||||||
|
|
||||||
|
# For non-postal codes, don't count
|
||||||
|
if name.isdigit():
|
||||||
|
continue
|
||||||
|
|
||||||
canonical = utf8_normalize(safe_decode(row[CANONICAL_NAME_INDEX]))
|
canonical = utf8_normalize(safe_decode(row[CANONICAL_NAME_INDEX]))
|
||||||
row[POPULATION_INDEX] = int(row[POPULATION_INDEX] or 0)
|
row[POPULATION_INDEX] = int(row[POPULATION_INDEX] or 0)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user