123 lines
4.1 KiB
C
123 lines
4.1 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "libpostal.h"
|
|
#include "string_utils.h"
|
|
|
|
int main(int argc, char **argv) {
|
|
if (argc < 3) {
|
|
printf("Usage: ./test_near_dupe label value [...]\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
if (!libpostal_setup() || !libpostal_setup_language_classifier()) {
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
libpostal_near_dupe_hash_options_t options = libpostal_near_dupe_hash_default_options();
|
|
|
|
cstring_array *labels_array = cstring_array_new();
|
|
cstring_array *values_array = cstring_array_new();
|
|
cstring_array *languages_array = NULL;
|
|
|
|
bool label = true;
|
|
bool next_is_latitude = false;
|
|
bool next_is_longitude = false;
|
|
bool next_is_geohash_precision = false;
|
|
bool have_latitude = false;
|
|
bool have_longitude = false;
|
|
bool next_is_language = false;
|
|
double longitude = 0.0;
|
|
double latitude = 0.0;
|
|
|
|
|
|
for (size_t i = 1; i < argc; i++) {
|
|
char *arg = argv[i];
|
|
|
|
if (string_equals(arg, "--with-unit")) {
|
|
options.with_unit = true;
|
|
} else if (string_equals(arg, "--latitude")) {
|
|
next_is_latitude = true;
|
|
} else if (string_equals(arg, "--longitude")) {
|
|
next_is_longitude = true;
|
|
} else if (string_equals(arg, "--geohash-precision")) {
|
|
next_is_geohash_precision = true;
|
|
} else if (string_equals(arg, "--name-only-keys")) {
|
|
options.name_only_keys = true;
|
|
} else if (string_equals(arg, "--address-only-keys")) {
|
|
options.address_only_keys = true;
|
|
} else if (string_equals(arg, "--language")) {
|
|
next_is_language = true;
|
|
} else if (next_is_latitude) {
|
|
sscanf(arg, "%lf", &latitude);
|
|
next_is_latitude = false;
|
|
have_latitude = true;
|
|
} else if (next_is_longitude) {
|
|
sscanf(arg, "%lf", &longitude);
|
|
next_is_longitude = false;
|
|
have_longitude = true;
|
|
} else if (next_is_geohash_precision) {
|
|
size_t geohash_precision = 0;
|
|
sscanf(arg, "%zu", &geohash_precision);
|
|
options.geohash_precision = geohash_precision;
|
|
next_is_geohash_precision = false;
|
|
} else if (next_is_language) {
|
|
if (languages_array == NULL) {
|
|
languages_array = cstring_array_new();
|
|
}
|
|
cstring_array_add_string(languages_array, arg);
|
|
} else if (label) {
|
|
cstring_array_add_string(labels_array, arg);
|
|
label = false;
|
|
} else {
|
|
cstring_array_add_string(values_array, arg);
|
|
label = true;
|
|
}
|
|
}
|
|
|
|
if (have_latitude && have_longitude) {
|
|
options.with_latlon = true;
|
|
options.latitude = latitude;
|
|
options.longitude = longitude;
|
|
}
|
|
|
|
size_t num_languages = 0;
|
|
char **languages = NULL;
|
|
if (languages_array != NULL) {
|
|
num_languages = cstring_array_num_strings(languages_array);
|
|
languages = cstring_array_to_strings(languages_array);
|
|
}
|
|
|
|
|
|
size_t num_components = cstring_array_num_strings(labels_array);
|
|
if (num_components != cstring_array_num_strings(values_array)) {
|
|
cstring_array_destroy(labels_array);
|
|
cstring_array_destroy(values_array);
|
|
printf("Must have same number of labels and values\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
char **labels = cstring_array_to_strings(labels_array);
|
|
char **values = cstring_array_to_strings(values_array);
|
|
|
|
size_t num_near_dupe_hashes = 0;
|
|
char **near_dupe_hashes = libpostal_near_dupe_hashes_languages(num_components, labels, values, options, num_languages, languages, &num_near_dupe_hashes);
|
|
|
|
for (size_t i = 0; i < num_near_dupe_hashes; i++) {
|
|
char *near_dupe_hash = near_dupe_hashes[i];
|
|
printf("%s\n", near_dupe_hash);
|
|
}
|
|
|
|
libpostal_expansion_array_destroy(near_dupe_hashes, num_near_dupe_hashes);
|
|
libpostal_expansion_array_destroy(labels, num_components);
|
|
libpostal_expansion_array_destroy(values, num_components);
|
|
|
|
if (languages != NULL) {
|
|
libpostal_expansion_array_destroy(languages, num_languages);
|
|
}
|
|
|
|
libpostal_teardown();
|
|
libpostal_teardown_language_classifier();
|
|
|
|
}
|