From 26124ee72fdb6366736b753dbcbf87546a9cf973 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 25 Mar 2022 14:04:26 -0400 Subject: [PATCH] [near_dupes] exposing name_word_hashes directly in the API --- src/libpostal.c | 11 +++++++++++ src/libpostal.h | 2 +- src/near_dupe.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/libpostal.c b/src/libpostal.c index c1ac6a7a..066a3015 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -85,6 +85,17 @@ libpostal_near_dupe_hash_options_t libpostal_get_near_dupe_hash_default_options( return LIBPOSTAL_NEAR_DUPE_HASH_DEFAULT_OPTIONS; } +char **libpostal_near_dupe_name_hashes(char *name, libpostal_normalize_options_t normalize_options, size_t *num_hashes) { + cstring_array *strings = name_word_hashes(name, normalize_options); + if (strings == NULL) { + *num_hashes = 0; + return NULL; + } + *num_hashes = cstring_array_num_strings(strings); + return cstring_array_to_strings(strings); +} + + char **libpostal_near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t *num_hashes) { cstring_array *strings = near_dupe_hashes(num_components, labels, values, options); if (strings == NULL) { diff --git a/src/libpostal.h b/src/libpostal.h index ddc07dff..42a13e3c 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -204,8 +204,8 @@ typedef struct libpostal_near_dupe_hash_options { bool address_only_keys; } libpostal_near_dupe_hash_options_t; - LIBPOSTAL_EXPORT libpostal_near_dupe_hash_options_t libpostal_get_near_dupe_hash_default_options(void); +LIBPOSTAL_EXPORT char **libpostal_near_dupe_name_hashes(char *name, libpostal_normalize_options_t normalize_options, size_t *num_hashes); LIBPOSTAL_EXPORT char **libpostal_near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t *num_hashes); LIBPOSTAL_EXPORT char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t num_languages, char **languages, size_t *num_hashes); diff --git a/src/near_dupe.h b/src/near_dupe.h index 9e3d33f8..a2d497de 100644 --- a/src/near_dupe.h +++ b/src/near_dupe.h @@ -8,6 +8,7 @@ #include "libpostal.h" #include "string_utils.h" +cstring_array *name_word_hashes(char *name, libpostal_normalize_options_t normalize_options); cstring_array *near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options); cstring_array *near_dupe_hashes_languages(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t num_languages, char **languages);