[build] adding libpostal_setup_datadir, libpostal_setup_parser_datadir, libpostal_setup_language_classifier_datadir functions for configuring the datadir at runtime
This commit is contained in:
@@ -21,7 +21,8 @@
|
||||
|
||||
#define ALL_LANGUAGES "all"
|
||||
|
||||
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat"
|
||||
#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat"
|
||||
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE
|
||||
|
||||
#define NULL_CANONICAL_INDEX -1
|
||||
|
||||
|
||||
@@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) {
|
||||
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
|
||||
}
|
||||
|
||||
char *path_vjoin(int n, va_list args) {
|
||||
char_array *path = char_array_new();
|
||||
if (path == NULL) return NULL;
|
||||
char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args);
|
||||
return char_array_to_string(path);
|
||||
}
|
||||
|
||||
char *path_join(int n, ...) {
|
||||
va_list args;
|
||||
va_start(args, n);
|
||||
char *path = path_vjoin(n, args);
|
||||
va_end(args);
|
||||
return path;
|
||||
}
|
||||
|
||||
inline uint64_t file_deserialize_uint64(unsigned char *buf) {
|
||||
return ((uint64_t)buf[0] << 56) |
|
||||
((uint64_t)buf[1] << 48) |
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "libpostal_config.h"
|
||||
#include "string_utils.h"
|
||||
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
@@ -55,6 +56,9 @@ char *file_getline(FILE * f);
|
||||
|
||||
bool is_relative_path(struct dirent *ent);
|
||||
|
||||
char *path_join(int n, ...);
|
||||
char *path_vjoin(int n, va_list args);
|
||||
|
||||
uint64_t file_deserialize_uint64(unsigned char *buf);
|
||||
bool file_read_uint64(FILE *file, uint64_t *value);
|
||||
bool file_write_uint64(FILE *file, uint64_t value);
|
||||
|
||||
107
src/libpostal.c
107
src/libpostal.c
@@ -1054,45 +1054,106 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
|
||||
return parsed;
|
||||
}
|
||||
|
||||
bool libpostal_setup_datadir(char *datadir) {
|
||||
char *transliteration_path = NULL;
|
||||
char *numex_path = NULL;
|
||||
char *address_dictionary_path = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE);
|
||||
numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE);
|
||||
address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE);
|
||||
}
|
||||
|
||||
if (!transliteration_module_setup(transliteration_path)) {
|
||||
log_error("Error loading transliteration module, dir=%s\n", transliteration_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!numex_module_setup(numex_path)) {
|
||||
log_error("Error loading numex module, dir=%s\n", numex_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_dictionary_module_setup(address_dictionary_path)) {
|
||||
log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (transliteration_path != NULL) {
|
||||
free(transliteration_path);
|
||||
}
|
||||
|
||||
if (numex_path != NULL) {
|
||||
free(numex_path);
|
||||
}
|
||||
|
||||
if (address_dictionary_path != NULL) {
|
||||
free(address_dictionary_path);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup(void) {
|
||||
if (!transliteration_module_setup(NULL)) {
|
||||
log_error("Error loading transliteration module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
||||
return libpostal_setup_datadir(NULL);
|
||||
}
|
||||
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
char *language_classifier_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR);
|
||||
}
|
||||
|
||||
if (!language_classifier_module_setup(language_classifier_dir)) {
|
||||
log_error("Error loading language classifier, dir=%s\n", language_classifier_dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!numex_module_setup(NULL)) {
|
||||
log_error("Error loading numex module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_dictionary_module_setup(NULL)) {
|
||||
log_error("Error loading dictionary module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
||||
return false;
|
||||
if (language_classifier_dir != NULL) {
|
||||
free(language_classifier_dir);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup_language_classifier(void) {
|
||||
if (!language_classifier_module_setup(NULL)) {
|
||||
log_error("Error loading language classifier, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
||||
return libpostal_setup_language_classifier_datadir(NULL);
|
||||
}
|
||||
|
||||
bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
char *parser_dir = NULL;
|
||||
char *geodb_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR);
|
||||
geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR);
|
||||
}
|
||||
|
||||
if (!geodb_module_setup(geodb_dir)) {
|
||||
log_error("Error loading geodb module, dir=%s\n", geodb_dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_parser_module_setup(parser_dir)) {
|
||||
log_error("Error loading address parser module, dir=%s\n", parser_dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (parser_dir != NULL) {
|
||||
free(parser_dir);
|
||||
}
|
||||
|
||||
if (geodb_dir != NULL) {
|
||||
free(geodb_dir);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup_parser(void) {
|
||||
if (!geodb_module_setup(NULL)) {
|
||||
log_error("Error loading geodb module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_parser_module_setup(NULL)) {
|
||||
log_error("Error loading address parser module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return libpostal_setup_parser_datadir(NULL);
|
||||
}
|
||||
|
||||
void libpostal_teardown(void) {
|
||||
|
||||
@@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
|
||||
// Setup/teardown methods
|
||||
|
||||
bool libpostal_setup(void);
|
||||
bool libpostal_setup_datadir(char *datadir);
|
||||
void libpostal_teardown(void);
|
||||
|
||||
bool libpostal_setup_parser(void);
|
||||
bool libpostal_setup_parser_datadir(char *datadir);
|
||||
void libpostal_teardown_parser(void);
|
||||
|
||||
bool libpostal_setup_language_classifier(void);
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir);
|
||||
void libpostal_teardown_language_classifier(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -12,12 +12,20 @@
|
||||
#error LIBPOSTAL_DATA_DIR not defined!
|
||||
#endif
|
||||
|
||||
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser"
|
||||
#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries"
|
||||
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames"
|
||||
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb"
|
||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier"
|
||||
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration"
|
||||
#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser"
|
||||
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR
|
||||
#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions"
|
||||
#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR
|
||||
#define LIBPOSTAL_GEONAMES_SUBDIR "geonames"
|
||||
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR
|
||||
#define LIBPOSTAL_GEODB_SUBDIR "geodb"
|
||||
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR
|
||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier"
|
||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR
|
||||
#define LIBPOSTAL_NUMEX_SUBDIR "numex"
|
||||
#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR
|
||||
#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration"
|
||||
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR
|
||||
|
||||
#define GEODB_BLOOM_FILTER_SIZE 100000000
|
||||
#define GEODB_BLOOM_FILTER_ERROR 0.001
|
||||
|
||||
@@ -20,7 +20,8 @@
|
||||
#include "trie.h"
|
||||
#include "trie_search.h"
|
||||
|
||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat"
|
||||
#define NUMEX_DATA_FILE "numex.dat"
|
||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
||||
|
||||
#define LATIN_LANGUAGE_CODE "la"
|
||||
|
||||
|
||||
@@ -15,7 +15,8 @@
|
||||
|
||||
#define LATIN_ASCII "latin-ascii"
|
||||
|
||||
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat"
|
||||
#define TRANSLITERATION_DATA_FILE "transliteration.dat"
|
||||
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE
|
||||
|
||||
#define MAX_TRANS_NAME_LEN 100
|
||||
|
||||
|
||||
Reference in New Issue
Block a user