[api] adding libpostal_place_languages method to public API for classifying languages consistently from components (may need to make several calls using the same languages and don't necessarily want the language classifier to be run on house numbers when we already know the languages from e.g. the street name - this provides a simple window into the language classifier focused on the entire address/record

This commit is contained in:
Al
2017-12-29 03:32:41 -05:00
parent 1d1ce10fad
commit 1f1412c120
2 changed files with 21 additions and 0 deletions

View File

@@ -13,6 +13,7 @@
#include "language_classifier.h"
#include "near_dupe.h"
#include "normalize.h"
#include "place.h"
#include "scanner.h"
#include "string_utils.h"
#include "token_types.h"
@@ -94,6 +95,19 @@ char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels
return cstring_array_to_strings(strings);
}
char **libpostal_place_languages(size_t num_components, char **labels, char **values, size_t *num_languages) {
language_classifier_response_t *lang_response = place_languages(num_components, labels, values);
char **languages = lang_response->languages;
lang_response->languages = NULL;
*num_languages = lang_response->num_languages;
lang_response->num_languages = 0;
language_classifier_response_destroy(lang_response);
return languages;
}
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
if (self == NULL) return;

View File

@@ -172,6 +172,9 @@ LIBPOSTAL_EXPORT bool libpostal_parser_print_features(bool print_features);
Deduping
*/
// Near-dupe hashing methods
typedef struct libpostal_near_dupe_hash_options {
bool with_name;
bool with_address;
@@ -193,6 +196,10 @@ LIBPOSTAL_EXPORT libpostal_near_dupe_hash_options_t libpostal_near_dupe_hash_def
LIBPOSTAL_EXPORT char **libpostal_near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t *num_hashes);
LIBPOSTAL_EXPORT char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t num_languages, char **languages, size_t *num_hashes);
// Dupe language classification
LIBPOSTAL_EXPORT char **libpostal_place_languages(size_t num_components, char **labels, char **values, size_t *num_languages);
// Setup/teardown methods
LIBPOSTAL_EXPORT bool libpostal_setup(void);