[api] Libpostal expand API will now detect language automatically using a high accuracy language classifier trained on OSM streets/addresses/toponyms. Hooray batch geocoding!

This commit is contained in:
Al
2016-01-27 03:20:55 -05:00
parent 71c51f2e45
commit 42d169feee
2 changed files with 30 additions and 0 deletions

View File

@@ -11,6 +11,7 @@
#include "collections.h"
#include "constants.h"
#include "geodb.h"
#include "language_classifier.h"
#include "numex.h"
#include "normalize.h"
#include "scanner.h"
@@ -811,6 +812,16 @@ char **expand_address(char *input, normalize_options_t options, size_t *n) {
size_t len = strlen(input);
language_classifier_response_t *lang_response = NULL;
if (options.num_languages == 0) {
lang_response = classify_languages(input);
if (lang_response != NULL) {
options.num_languages = lang_response->num_languages;
options.languages = lang_response->languages;
}
}
string_tree_t *tree = normalize_string_languages(input, normalize_string_options, options.num_languages, options.languages);
cstring_array *strings = cstring_array_new_size(len * 2);
@@ -860,6 +871,10 @@ char **expand_address(char *input, normalize_options_t options, size_t *n) {
kh_destroy(str_set, unique_strings);
if (lang_response != NULL) {
language_classifier_response_destroy(lang_response);
}
char_array_destroy(temp_string);
string_tree_destroy(tree);
@@ -930,6 +945,14 @@ bool libpostal_setup(void) {
return true;
}
bool libpostal_setup_language_classifier(void) {
if (!language_classifier_module_setup(NULL)) {
log_error("Error loading language classifier\n");
return false;
}
return true;
}
bool libpostal_setup_parser(void) {
if (!geodb_module_setup(NULL)) {
log_error("Error loading geodb module\n");
@@ -952,6 +975,10 @@ void libpostal_teardown(void) {
address_dictionary_module_teardown();
}
void libpostal_teardown_language_classifier(void) {
language_classifier_module_teardown();
}
void libpostal_teardown_parser(void) {
geodb_module_teardown();
address_parser_module_teardown();

View File

@@ -123,6 +123,9 @@ void libpostal_teardown(void);
bool libpostal_setup_parser(void);
void libpostal_teardown_parser(void);
bool libpostal_setup_language_classifier(void);
void libpostal_teardown_language_classifier(void);
#ifdef __cplusplus
}
#endif