From 9af0b0ab653e318caec56da751be667014f568e6 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 8 Jul 2015 18:43:28 -0400 Subject: [PATCH] [geodisambig] adding a few more features to geonames disambiguation --- src/geo_disambiguation.c | 61 +++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/geo_disambiguation.c b/src/geo_disambiguation.c index 1cc3c977..bf13a9a3 100644 --- a/src/geo_disambiguation.c +++ b/src/geo_disambiguation.c @@ -3,7 +3,10 @@ #define GEONAME_GENERIC_KEY_NAME "n" #define GEONAME_KEY_NAME_ADMIN1_ID "na1" #define GEONAME_KEY_NAME_ADMIN2_ID "na2" -#define GEONAME_KEY_NAME_COUNTRY "nc" +#define GEONAME_GENERIC_KEY_NAME_COUNTRY_CODE "ncc" +#define GEONAME_GENERIC_KEY_NAME_COUNTRY_ID "nci" +#define GEONAME_KEY_NAME_BOUNDARY_TYPE "nb" +#define GEONAME_KEY_NAME_LANGUAGE "nl" #define GEONAME_KEY_NAME_GEOHASH5 "nh5" #define GEONAME_KEY_NAME_GEOHASH6 "nh6" #define GEONAME_KEY_NAME_GEOHASH7 "nh7" @@ -15,17 +18,48 @@ bool geodisambig_add_name_feature(cstring_array *features, char *name) { } -bool geodisambig_add_country_feature(cstring_array *features, char *name, char *country) { +bool geodisambig_add_country_code_feature(cstring_array *features, char *name, char *country) { if (name == NULL || strlen(name) == 0 || country == NULL || strlen(country) == 0) return false; - feature_array_add(features, 3, GEONAME_KEY_NAME_COUNTRY, name, country); + feature_array_add(features, 3, GEONAME_GENERIC_KEY_NAME_COUNTRY_CODE, name, country); return true; } +bool geodisambig_add_country_id_feature(cstring_array *features, char *name, uint32_t country_id) { + char numeric_string[INT32_MAX_STRING_SIZE]; + + if (country_id != 0 && name != NULL) { + size_t n = sprintf(numeric_string, "%d", country_id); + } else { + return false; + } + + feature_array_add(features, 3, GEONAME_GENERIC_KEY_NAME_COUNTRY_ID, name, numeric_string); + return true; +} + +bool geodisambig_add_boundary_type_feature(cstring_array *features, char *name, uint8_t boundary_type) { + char numeric_string[INT8_MAX_STRING_SIZE]; + + if (country_id != 0 && name != NULL) { + size_t n = sprintf(numeric_string, "%d", boundary_type); + } else { + return false; + } + + feature_array_add(features, 3, GEONAMES_KEY_NAME_LAN, name, numeric_string); + return true; +} + +bool geodisambig_add_language_feature(cstring_array *features, char *name, char *lang) { + if (name == NULL || lang == NULL || strlen(lang) == 0) return false; + feature_array_add(features, 3, GEONAME_KEY_NAME_LANGUAGE, name, lang); + return true; +} + bool geodisambig_add_admin1_feature(cstring_array *features, char *name, uint32_t admin1_id) { char numeric_string[INT32_MAX_STRING_SIZE]; - printf("%d\n", admin1_id); if (admin1_id != 0 && name != NULL) { size_t n = sprintf(numeric_string, "%d", admin1_id); @@ -101,14 +135,20 @@ bool geodisambig_add_geo_features(cstring_array *features, char *name, double la } - bool geodisambig_add_geoname_features(cstring_array *features, geoname_t *geoname) { + char *name = char_array_get_string(geoname->name); + char *lang = char_array_get_string(geoname->iso_language); + bool add_language = strlen(lang) == 0 || strcmp(lang, "abbr"); + return (geoname != NULL - && geodisambig_add_name_feature(features, geoname->name) - && geodisambig_add_country_feature(features, geoname->name, geoname->country_code) - && (geoname->admin1_geonames_id == 0 || geodisambig_add_admin1_feature(features, geoname->name, geoname->admin1_geonames_id)) - && (geoname->admin2_geonames_id == 0 || geodisambig_add_admin2_feature(features, geoname->name, geoname->admin2_geonames_id)) - && (geodisambig_add_geo_features(features, geoname->name, geoname->latitude, geoname->longitude)) + && geodisambig_add_name_feature(features, name) + && geodisambig_add_country_feature(features, name, char_array_get_string(geoname->country_code)) + && geodisambig_add_country_id_feature(features, name, geoname->country_geonames_id) + && (geoname->admin1_geonames_id == 0 || geodisambig_add_admin1_feature(features, name, geoname->admin1_geonames_id)) + && (geoname->admin2_geonames_id == 0 || geodisambig_add_admin2_feature(features, name, geoname->admin2_geonames_id)) + && (geodisambig_add_boundary_type_feature(features, name, geoname->boundary_type)) + && (!add_language || geodisambig_add_language_feature(features, name, lang)) + && (geodisambig_add_geo_features(features, name, geoname->latitude, geoname->longitude)) ); } @@ -116,6 +156,7 @@ bool geodisambig_add_postal_code_features(cstring_array *features, gn_postal_cod return (postal_code != NULL && geodisambig_add_name_feature(features, postal_code->code) && geodisambig_add_country_feature(features, postal_code->country_code) + && geodisambig_add_country_id_feature(features, postal_code->country_geonames_id) ); }