From daad1a13136224458be8d42a06c8c218007b52bc Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 28 Sep 2015 17:46:53 -0400 Subject: [PATCH] [geonames] Removing alternate names from geonames data set which are digits-only (most are not legitimate) --- scripts/geodata/geonames/create_geonames_tsv.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/geodata/geonames/create_geonames_tsv.py b/scripts/geodata/geonames/create_geonames_tsv.py index 03c8c89a..c2e78dda 100644 --- a/scripts/geodata/geonames/create_geonames_tsv.py +++ b/scripts/geodata/geonames/create_geonames_tsv.py @@ -458,6 +458,11 @@ def create_geonames_tsv(db, out_dir=DEFAULT_DATA_DIR): geonames_id = row[GEONAMES_ID_INDEX] name = utf8_normalize(safe_decode(row[NAME_INDEX])) + + # For non-postal codes, don't count + if name.isdigit(): + continue + canonical = utf8_normalize(safe_decode(row[CANONICAL_NAME_INDEX])) row[POPULATION_INDEX] = int(row[POPULATION_INDEX] or 0)