Merge pull request #154 from openvenues/setup_datadir_functions
Setup datadir functions
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
# -*- Autoconf -*-
|
# -*- Autoconf -*-
|
||||||
# Process this file with autoconf to produce a configure script.
|
# Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
AC_INIT([libpostal], [0.3])
|
AC_INIT([libpostal], [0.3.3])
|
||||||
|
|
||||||
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
||||||
AC_CONFIG_SRCDIR([src])
|
AC_CONFIG_SRCDIR([src])
|
||||||
|
|||||||
@@ -21,7 +21,8 @@
|
|||||||
|
|
||||||
#define ALL_LANGUAGES "all"
|
#define ALL_LANGUAGES "all"
|
||||||
|
|
||||||
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat"
|
#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat"
|
||||||
|
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE
|
||||||
|
|
||||||
#define NULL_CANONICAL_INDEX -1
|
#define NULL_CANONICAL_INDEX -1
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ void feature_array_add(cstring_array *features, size_t count, ...) {
|
|||||||
cstring_array_start_token(features);
|
cstring_array_start_token(features);
|
||||||
|
|
||||||
bool strip_separator = true;
|
bool strip_separator = true;
|
||||||
char_array_append_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args);
|
char_array_add_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) {
|
|||||||
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
|
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *path_vjoin(int n, va_list args) {
|
||||||
|
char_array *path = char_array_new();
|
||||||
|
if (path == NULL) return NULL;
|
||||||
|
char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args);
|
||||||
|
return char_array_to_string(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *path_join(int n, ...) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, n);
|
||||||
|
char *path = path_vjoin(n, args);
|
||||||
|
va_end(args);
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
inline uint64_t file_deserialize_uint64(unsigned char *buf) {
|
inline uint64_t file_deserialize_uint64(unsigned char *buf) {
|
||||||
return ((uint64_t)buf[0] << 56) |
|
return ((uint64_t)buf[0] << 56) |
|
||||||
((uint64_t)buf[1] << 48) |
|
((uint64_t)buf[1] << 48) |
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
#include "libpostal_config.h"
|
#include "libpostal_config.h"
|
||||||
|
#include "string_utils.h"
|
||||||
|
|
||||||
#ifdef HAVE_DIRENT_H
|
#ifdef HAVE_DIRENT_H
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
@@ -55,6 +56,9 @@ char *file_getline(FILE * f);
|
|||||||
|
|
||||||
bool is_relative_path(struct dirent *ent);
|
bool is_relative_path(struct dirent *ent);
|
||||||
|
|
||||||
|
char *path_join(int n, ...);
|
||||||
|
char *path_vjoin(int n, va_list args);
|
||||||
|
|
||||||
uint64_t file_deserialize_uint64(unsigned char *buf);
|
uint64_t file_deserialize_uint64(unsigned char *buf);
|
||||||
bool file_read_uint64(FILE *file, uint64_t *value);
|
bool file_read_uint64(FILE *file, uint64_t *value);
|
||||||
bool file_write_uint64(FILE *file, uint64_t value);
|
bool file_write_uint64(FILE *file, uint64_t value);
|
||||||
|
|||||||
107
src/libpostal.c
107
src/libpostal.c
@@ -1054,45 +1054,106 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
|
|||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool libpostal_setup_datadir(char *datadir) {
|
||||||
|
char *transliteration_path = NULL;
|
||||||
|
char *numex_path = NULL;
|
||||||
|
char *address_dictionary_path = NULL;
|
||||||
|
|
||||||
|
if (datadir != NULL) {
|
||||||
|
transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE);
|
||||||
|
numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE);
|
||||||
|
address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!transliteration_module_setup(transliteration_path)) {
|
||||||
|
log_error("Error loading transliteration module, dir=%s\n", transliteration_path);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!numex_module_setup(numex_path)) {
|
||||||
|
log_error("Error loading numex module, dir=%s\n", numex_path);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!address_dictionary_module_setup(address_dictionary_path)) {
|
||||||
|
log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (transliteration_path != NULL) {
|
||||||
|
free(transliteration_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numex_path != NULL) {
|
||||||
|
free(numex_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (address_dictionary_path != NULL) {
|
||||||
|
free(address_dictionary_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool libpostal_setup(void) {
|
bool libpostal_setup(void) {
|
||||||
if (!transliteration_module_setup(NULL)) {
|
return libpostal_setup_datadir(NULL);
|
||||||
log_error("Error loading transliteration module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
}
|
||||||
|
|
||||||
|
bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||||
|
char *language_classifier_dir = NULL;
|
||||||
|
|
||||||
|
if (datadir != NULL) {
|
||||||
|
language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!language_classifier_module_setup(language_classifier_dir)) {
|
||||||
|
log_error("Error loading language classifier, dir=%s\n", language_classifier_dir);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!numex_module_setup(NULL)) {
|
if (language_classifier_dir != NULL) {
|
||||||
log_error("Error loading numex module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
free(language_classifier_dir);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!address_dictionary_module_setup(NULL)) {
|
|
||||||
log_error("Error loading dictionary module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_language_classifier(void) {
|
bool libpostal_setup_language_classifier(void) {
|
||||||
if (!language_classifier_module_setup(NULL)) {
|
return libpostal_setup_language_classifier_datadir(NULL);
|
||||||
log_error("Error loading language classifier, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
}
|
||||||
|
|
||||||
|
bool libpostal_setup_parser_datadir(char *datadir) {
|
||||||
|
char *parser_dir = NULL;
|
||||||
|
char *geodb_dir = NULL;
|
||||||
|
|
||||||
|
if (datadir != NULL) {
|
||||||
|
parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR);
|
||||||
|
geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!geodb_module_setup(geodb_dir)) {
|
||||||
|
log_error("Error loading geodb module, dir=%s\n", geodb_dir);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!address_parser_module_setup(parser_dir)) {
|
||||||
|
log_error("Error loading address parser module, dir=%s\n", parser_dir);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser_dir != NULL) {
|
||||||
|
free(parser_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (geodb_dir != NULL) {
|
||||||
|
free(geodb_dir);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_parser(void) {
|
bool libpostal_setup_parser(void) {
|
||||||
if (!geodb_module_setup(NULL)) {
|
return libpostal_setup_parser_datadir(NULL);
|
||||||
log_error("Error loading geodb module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!address_parser_module_setup(NULL)) {
|
|
||||||
log_error("Error loading address parser module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void libpostal_teardown(void) {
|
void libpostal_teardown(void) {
|
||||||
|
|||||||
@@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
|
|||||||
// Setup/teardown methods
|
// Setup/teardown methods
|
||||||
|
|
||||||
bool libpostal_setup(void);
|
bool libpostal_setup(void);
|
||||||
|
bool libpostal_setup_datadir(char *datadir);
|
||||||
void libpostal_teardown(void);
|
void libpostal_teardown(void);
|
||||||
|
|
||||||
bool libpostal_setup_parser(void);
|
bool libpostal_setup_parser(void);
|
||||||
|
bool libpostal_setup_parser_datadir(char *datadir);
|
||||||
void libpostal_teardown_parser(void);
|
void libpostal_teardown_parser(void);
|
||||||
|
|
||||||
bool libpostal_setup_language_classifier(void);
|
bool libpostal_setup_language_classifier(void);
|
||||||
|
bool libpostal_setup_language_classifier_datadir(char *datadir);
|
||||||
void libpostal_teardown_language_classifier(void);
|
void libpostal_teardown_language_classifier(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@@ -12,12 +12,20 @@
|
|||||||
#error LIBPOSTAL_DATA_DIR not defined!
|
#error LIBPOSTAL_DATA_DIR not defined!
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser"
|
#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser"
|
||||||
#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries"
|
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR
|
||||||
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames"
|
#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions"
|
||||||
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb"
|
#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR
|
||||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier"
|
#define LIBPOSTAL_GEONAMES_SUBDIR "geonames"
|
||||||
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration"
|
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR
|
||||||
|
#define LIBPOSTAL_GEODB_SUBDIR "geodb"
|
||||||
|
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR
|
||||||
|
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier"
|
||||||
|
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR
|
||||||
|
#define LIBPOSTAL_NUMEX_SUBDIR "numex"
|
||||||
|
#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR
|
||||||
|
#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration"
|
||||||
|
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR
|
||||||
|
|
||||||
#define GEODB_BLOOM_FILTER_SIZE 100000000
|
#define GEODB_BLOOM_FILTER_SIZE 100000000
|
||||||
#define GEODB_BLOOM_FILTER_ERROR 0.001
|
#define GEODB_BLOOM_FILTER_ERROR 0.001
|
||||||
|
|||||||
@@ -20,7 +20,8 @@
|
|||||||
#include "trie.h"
|
#include "trie.h"
|
||||||
#include "trie_search.h"
|
#include "trie_search.h"
|
||||||
|
|
||||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat"
|
#define NUMEX_DATA_FILE "numex.dat"
|
||||||
|
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
||||||
|
|
||||||
#define LATIN_LANGUAGE_CODE "la"
|
#define LATIN_LANGUAGE_CODE "la"
|
||||||
|
|
||||||
|
|||||||
@@ -595,7 +595,7 @@ inline void char_array_add_len(char_array *array, char *str, size_t len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) {
|
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) {
|
||||||
if (count <= 0) {
|
if (count <= 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -625,7 +625,7 @@ void char_array_append_vjoined(char_array *array, char *separator, bool strip_se
|
|||||||
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, count);
|
va_start(args, count);
|
||||||
char_array_append_vjoined(array, separator, strip_separator, count, args);
|
char_array_add_vjoined(array, separator, strip_separator, count, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -633,7 +633,7 @@ inline void char_array_cat_joined(char_array *array, char *separator, bool strip
|
|||||||
char_array_strip_nul_byte(array);
|
char_array_strip_nul_byte(array);
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, count);
|
va_start(args, count);
|
||||||
char_array_append_vjoined(array, separator, strip_separator, count, args);
|
char_array_add_vjoined(array, separator, strip_separator, count, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -136,7 +136,7 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args);
|
|||||||
void char_array_cat_printf(char_array *array, char *format, ...);
|
void char_array_cat_printf(char_array *array, char *format, ...);
|
||||||
|
|
||||||
// Mainly for paths or delimited strings
|
// Mainly for paths or delimited strings
|
||||||
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
||||||
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||||
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,8 @@
|
|||||||
|
|
||||||
#define LATIN_ASCII "latin-ascii"
|
#define LATIN_ASCII "latin-ascii"
|
||||||
|
|
||||||
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat"
|
#define TRANSLITERATION_DATA_FILE "transliteration.dat"
|
||||||
|
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE
|
||||||
|
|
||||||
#define MAX_TRANS_NAME_LEN 100
|
#define MAX_TRANS_NAME_LEN 100
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user