From f3a626463a77f5f506790c08584af7b350c753b3 Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 24 Dec 2017 12:43:28 -0500 Subject: [PATCH] [api] adding API functions for near dupe hashes to the public header --- src/libpostal.c | 42 ++++++++++++++++++++++++++++++++++++++++-- src/libpostal.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/libpostal.c b/src/libpostal.c index f12d4898..2825bab4 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -11,6 +11,7 @@ #include "expand.h" #include "language_classifier.h" +#include "near_dupe.h" #include "normalize.h" #include "scanner.h" #include "string_utils.h" @@ -45,17 +46,54 @@ libpostal_normalize_options_t libpostal_get_default_options(void) { } char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) { - return expand_address(input, options, n); + cstring_array *strings = expand_address(input, options, n); + return cstring_array_to_strings(strings); } char **libpostal_expand_address_root(char *input, libpostal_normalize_options_t options, size_t *n) { - return expand_address_root(input, options, n); + cstring_array *strings = expand_address_root(input, options, n); + return cstring_array_to_strings(strings); } void libpostal_expansion_array_destroy(char **expansions, size_t n) { expansion_array_destroy(expansions, n); } +#define DEFAULT_NEAR_DUPE_GEOHASH_PRECISION 6 + +static libpostal_near_dupe_hash_options_t LIBPOSTAL_NEAR_DUPE_HASH_DEFAULT_OPTIONS = { + .with_name = true, + .with_address = true, + .with_unit = false, + .with_city_or_equivalent = true, + .with_small_containing_boundaries = true, + .with_postal_code = true, + .with_latlon = false, + .latitude = 0.0, + .longitude = 0.0, + .geohash_precision = DEFAULT_NEAR_DUPE_GEOHASH_PRECISION, + .name_and_address_keys = true, + .name_only_keys = false, + .address_only_keys = false +}; + +libpostal_near_dupe_hash_options_t libpostal_near_dupe_hash_default_options(void) { + return LIBPOSTAL_NEAR_DUPE_HASH_DEFAULT_OPTIONS; +} + +char **libpostal_near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t *num_hashes) { + cstring_array *strings = near_dupe_hashes(num_components, labels, values, options); + *num_hashes = cstring_array_num_strings(strings); + return cstring_array_to_strings(strings); +} + + +char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t num_languages, char **languages, size_t *num_hashes) { + cstring_array *strings = near_dupe_hashes_languages(num_components, labels, values, options, num_languages, languages); + *num_hashes = cstring_array_num_strings(strings); + return cstring_array_to_strings(strings); +} + void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) { if (self == NULL) return; diff --git a/src/libpostal.h b/src/libpostal.h index f088db72..dc1c3c4e 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -152,6 +152,8 @@ typedef struct libpostal_address_parser_response { char **labels; } libpostal_address_parser_response_t; +typedef libpostal_address_parser_response_t libpostal_parsed_address_components_t; + typedef struct libpostal_address_parser_options { char *language; char *country; @@ -165,6 +167,32 @@ LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(ch LIBPOSTAL_EXPORT bool libpostal_parser_print_features(bool print_features); + +/* +Deduping +*/ + +typedef struct libpostal_near_dupe_hash_options { + bool with_name; + bool with_address; + bool with_unit; + bool with_city_or_equivalent; + bool with_small_containing_boundaries; + bool with_postal_code; + bool with_latlon; + double latitude; + double longitude; + size_t geohash_precision; + bool name_and_address_keys; + bool name_only_keys; + bool address_only_keys; +} libpostal_near_dupe_hash_options_t; + + +LIBPOSTAL_EXPORT libpostal_near_dupe_hash_options_t libpostal_near_dupe_hash_default_options(void); +LIBPOSTAL_EXPORT char **libpostal_near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t *num_hashes); +LIBPOSTAL_EXPORT char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t num_languages, char **languages, size_t *num_hashes); + // Setup/teardown methods LIBPOSTAL_EXPORT bool libpostal_setup(void);