[geodisambig] feature functions for GeoNames disambiguation
This commit is contained in:
102
src/geonames_disambiguation.c
Normal file
102
src/geonames_disambiguation.c
Normal file
@@ -0,0 +1,102 @@
|
||||
#include "geonames_disambiguation.h"
|
||||
|
||||
#define GEONAME_GENERIC_KEY_NAME "n"
|
||||
#define GEONAME_KEY_NAME_ADMIN1_ID "na1"
|
||||
#define GEONAME_KEY_NAME_ADMIN2_ID "na2"
|
||||
#define GEONAME_KEY_NAME_COUNTRY "nc"
|
||||
#define GEONAME_KEY_NAME_GEOHASH5 "nh5"
|
||||
#define GEONAME_KEY_NAME_GEOHASH6 "nh6"
|
||||
#define GEONAME_KEY_NAME_GEOHASH7 "nh7"
|
||||
|
||||
bool geodisambig_add_name_feature(cstring_array *features, char *name) {
|
||||
if (name == NULL || strlen(name) == 0) return false;
|
||||
feature_array_add(features, 2, GEONAME_GENERIC_KEY_NAME, name);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool geodisambig_add_country_feature(cstring_array *features, char *name, char *country) {
|
||||
if (name == NULL || strlen(name) == 0 || country == NULL || strlen(country) == 0) return false;
|
||||
|
||||
feature_array_add(features, 3, GEONAME_KEY_NAME_COUNTRY, name, country);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool geodisambig_add_admin1_feature(cstring_array *features, char *name, uint32_t admin1_id) {
|
||||
char numeric_string[INT32_MAX_STRING_SIZE];
|
||||
printf("%d\n", admin1_id);
|
||||
|
||||
if (admin1_id != 0 && name != NULL) {
|
||||
size_t n = sprintf(numeric_string, "%d", admin1_id);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
feature_array_add(features, 3, GEONAME_KEY_NAME_ADMIN1_ID, name, numeric_string);
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool geodisambig_add_admin2_feature(cstring_array *features, char *name, uint32_t admin2_id) {
|
||||
char numeric_string[INT32_MAX_STRING_SIZE];
|
||||
|
||||
if (admin2_id != 0 && name != NULL) {
|
||||
size_t n = sprintf(numeric_string, "%d", admin2_id);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
feature_array_add(features, 3, GEONAME_KEY_NAME_ADMIN2_ID, name, numeric_string);
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
static void geodisambig_add_geo_neighbors(cstring_array *features, char *geohash, size_t geohash_size, char *feature_name, char *name) {
|
||||
size_t neighbors_size = geohash_size * 8;
|
||||
char neighbors[neighbors_size];
|
||||
|
||||
int num_strings = 0;
|
||||
|
||||
if (geohash_neighbors(geohash, neighbors, neighbors_size, &num_strings) == GEOHASH_OK && num_strings == 8) {
|
||||
for (int i = 0; i < num_strings; i++) {
|
||||
char *neighbor = neighbors + geohash_size * i;
|
||||
feature_array_add(features, 3, feature_name, name, neighbor);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool geodisambig_add_geo_features(cstring_array *features, char *name, double latitude, double longitude) {
|
||||
if (name == NULL || strlen(name) == 0) return false;
|
||||
|
||||
size_t geohash_size = 8;
|
||||
char geohash[geohash_size];
|
||||
|
||||
if ((geohash_encode(latitude, longitude, geohash, geohash_size)) == GEOHASH_OK) {
|
||||
feature_array_add(features, 3, GEONAME_KEY_NAME_GEOHASH7, name, geohash);
|
||||
|
||||
int num_strings = 0;
|
||||
|
||||
geodisambig_add_geo_neighbors(features, geohash, geohash_size, GEONAME_KEY_NAME_GEOHASH7, name);
|
||||
|
||||
geohash_size--;
|
||||
geohash[geohash_size - 1] = '\0';
|
||||
feature_array_add(features, 3, GEONAME_KEY_NAME_GEOHASH6, name, geohash);
|
||||
|
||||
geodisambig_add_geo_neighbors(features, geohash, geohash_size, GEONAME_KEY_NAME_GEOHASH6, name);
|
||||
|
||||
geohash_size--;
|
||||
geohash[geohash_size - 1] = '\0';
|
||||
|
||||
feature_array_add(features, 3, GEONAME_KEY_NAME_GEOHASH5, name, geohash);
|
||||
|
||||
geodisambig_add_geo_neighbors(features, geohash, geohash_size, GEONAME_KEY_NAME_GEOHASH5, name);
|
||||
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
25
src/geonames_disambiguation.h
Normal file
25
src/geonames_disambiguation.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef GEONAMES_DISAMBIGUATION_H
|
||||
#define GEONAMES_DISAMBIGUATION_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "geohash/geohash.h"
|
||||
#include "features.h"
|
||||
#include "string_utils.h"
|
||||
|
||||
#define PLACE_NAME_FEATURES_DEFAULT_LENGTH 128
|
||||
#define GEO_FEATURES_DEFAULT_LENGTH 720
|
||||
#define POSTAL_CODE_FEATURES_DEFAULT_LENGTH 32
|
||||
|
||||
// Both place names and postal codes
|
||||
bool geodisambig_add_name_feature(cstring_array *features, char *name);
|
||||
bool geodisambig_add_country_feature(cstring_array *features, char *name, char *country);
|
||||
|
||||
// Only place names
|
||||
bool geodisambig_add_admin1_feature(cstring_array *features, char *name, uint32_t admin1_id);
|
||||
bool geodisambig_add_admin2_feature(cstring_array *features, char *name, uint32_t admin2_id);
|
||||
bool geodisambig_add_geo_features(cstring_array *features, char *name, double latitude, double longitude);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user