[merge] merging master

This commit is contained in:
Al
2017-01-13 19:58:49 -05:00
27 changed files with 181 additions and 55 deletions

View File

@@ -148,6 +148,20 @@ int main(int argc, char **argv) {
} }
``` ```
Parser labels
-------------
The address parser can use any string labels that are defined in the training data, but these are the default labels, based on the fields defined in [OpenCage's address-formatting library](https://github.com/OpenCageData/address-formatting):
- **house**: venue name e.g. "Brooklyn Academy of Music", and building names e.g. "Empire State Building"
- **house_number**: usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity.
- **road**: street name(s)
- **suburb**: usually an unofficial neighborhood name like "Harlem", "South Bronx", or "Crown Heights"
- **city_district**: these are usually boroughs or districts within a city that serve some official purpose e.g. "Brooklyn" or "Hackney" or "Bratislava IV"
- **city**: any human settlement including cities, towns, villages, hamlets, localities, etc.
- **state_district**: usually a second-level administrative division or county.
- **state**: a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to "state" as well (convention used in OSM, GeoPlanet, etc.)
- **country**: sovereign nations and their dependent territories, anything with an [ISO-3166 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).
Examples of normalization Examples of normalization
------------------------- -------------------------

View File

@@ -1,7 +1,7 @@
# -*- Autoconf -*- # -*- Autoconf -*-
# Process this file with autoconf to produce a configure script. # Process this file with autoconf to produce a configure script.
AC_INIT([libpostal], [0.3]) AC_INIT([libpostal], [0.3.3])
AC_CONFIG_MACRO_DIRS([m4]) AC_CONFIG_MACRO_DIRS([m4])
@@ -47,10 +47,8 @@ AC_TYPE_UINT8_T
AC_CHECK_TYPES([ptrdiff_t]) AC_CHECK_TYPES([ptrdiff_t])
# Checks for library functions. # Checks for library functions.
AC_FUNC_MALLOC
AC_FUNC_MMAP AC_FUNC_MMAP
AC_FUNC_REALLOC AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup])
AC_CHECK_FUNCS([getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup])
AC_CONFIG_FILES([Makefile AC_CONFIG_FILES([Makefile
libpostal.pc libpostal.pc
@@ -88,4 +86,18 @@ AC_ARG_ENABLE([data-download],
AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"])
AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])],
[
if test "x$withval" = "xno"; then
CFLAGS_SCANNER_EXTRA=""
else
CFLAGS_SCANNER_EXTRA="$withval"
fi
],
[ CFLAGS_SCANNER_EXTRA="" ]
)
AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA])
AC_SUBST(CFLAGS_SCANNER_EXTRA)
AC_OUTPUT AC_OUTPUT

View File

@@ -21,9 +21,11 @@ libpostal_la_CFLAGS = $(CFLAGS_O2)
dist_bin_SCRIPTS = libpostal_data dist_bin_SCRIPTS = libpostal_data
# Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough # Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough
# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
noinst_LTLIBRARIES = libscanner.la noinst_LTLIBRARIES = libscanner.la
libscanner_la_SOURCES = scanner.c libscanner_la_SOURCES = scanner.c
libscanner_la_CFLAGS = $(CFLAGS_O0) libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA)
noinst_PROGRAMS = libpostal bench build_address_dictionary build_geodb build_numex_table build_trans_table address_parser_train address_parser_test address_parser language_classifier_train language_classifier language_classifier_test noinst_PROGRAMS = libpostal bench build_address_dictionary build_geodb build_numex_table build_trans_table address_parser_train address_parser_test address_parser language_classifier_train language_classifier language_classifier_test
libpostal_SOURCES = main.c json_encode.c libpostal_SOURCES = main.c json_encode.c

View File

@@ -288,7 +288,7 @@ phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) {
bool address_dictionary_init(void) { bool address_dictionary_init(void) {
if (address_dict != NULL) return false; if (address_dict != NULL) return false;
address_dict = malloc(sizeof(address_dictionary_t)); address_dict = calloc(1, sizeof(address_dictionary_t));
if (address_dict == NULL) return false; if (address_dict == NULL) return false;
address_dict->canonical = cstring_array_new(); address_dict->canonical = cstring_array_new();

View File

@@ -21,7 +21,8 @@
#define ALL_LANGUAGES "all" #define ALL_LANGUAGES "all"
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat" #define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat"
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE
#define NULL_CANONICAL_INDEX -1 #define NULL_CANONICAL_INDEX -1

View File

@@ -25,14 +25,13 @@ typedef enum {
ADDRESS_PARSER_SUFFIX_PHRASE ADDRESS_PARSER_SUFFIX_PHRASE
} address_parser_phrase_type_t; } address_parser_phrase_type_t;
static parser_options_t PARSER_DEFAULT_OPTIONS = { static parser_options_t PARSER_DEFAULT_OPTIONS = {
.rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD, .rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD,
.print_features = false .print_features = false
}; };
address_parser_t *address_parser_new_options(parser_options_t options) { address_parser_t *address_parser_new_options(parser_options_t options) {
address_parser_t *parser = malloc(sizeof(address_parser_t)); address_parser_t *parser = calloc(1, sizeof(address_parser_t));
parser->options = options; parser->options = options;
return parser; return parser;
} }

View File

@@ -93,7 +93,7 @@ averaged_perceptron_t *averaged_perceptron_read(FILE *f) {
return NULL; return NULL;
} }
averaged_perceptron_t *perceptron = malloc(sizeof(averaged_perceptron_t)); averaged_perceptron_t *perceptron = calloc(1, sizeof(averaged_perceptron_t));
if (!file_read_uint32(f, &perceptron->num_features) || if (!file_read_uint32(f, &perceptron->num_features) ||
!file_read_uint32(f, &perceptron->num_classes) || !file_read_uint32(f, &perceptron->num_classes) ||
@@ -216,4 +216,4 @@ void averaged_perceptron_destroy(averaged_perceptron_t *self) {
} }
free(self); free(self);
} }

View File

@@ -386,7 +386,7 @@ bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *se
} }
averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) { averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) {
averaged_perceptron_trainer_t *self = malloc(sizeof(averaged_perceptron_trainer_t)); averaged_perceptron_trainer_t *self = calloc(1, sizeof(averaged_perceptron_trainer_t));
if (self == NULL) return NULL; if (self == NULL) return NULL;

View File

@@ -56,7 +56,7 @@ int bloom_filter_add(bloom_filter_t *self, const char *key, size_t len) {
} }
bloom_filter_t *bloom_filter_new(uint64_t capacity, double error) { bloom_filter_t *bloom_filter_new(uint64_t capacity, double error) {
bloom_filter_t *bloom = malloc(sizeof(bloom_filter_t)); bloom_filter_t *bloom = calloc(1, sizeof(bloom_filter_t));
if (bloom == NULL) { if (bloom == NULL) {
return NULL; return NULL;
@@ -220,4 +220,4 @@ void bloom_filter_destroy(bloom_filter_t *self) {
} }
free(self); free(self);
} }

View File

@@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) {
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0; return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
} }
char *path_vjoin(int n, va_list args) {
char_array *path = char_array_new();
if (path == NULL) return NULL;
char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args);
return char_array_to_string(path);
}
char *path_join(int n, ...) {
va_list args;
va_start(args, n);
char *path = path_vjoin(n, args);
va_end(args);
return path;
}
inline uint64_t file_deserialize_uint64(unsigned char *buf) { inline uint64_t file_deserialize_uint64(unsigned char *buf) {
return ((uint64_t)buf[0] << 56) | return ((uint64_t)buf[0] << 56) |
((uint64_t)buf[1] << 48) | ((uint64_t)buf[1] << 48) |

View File

@@ -9,6 +9,7 @@
#include <sys/types.h> #include <sys/types.h>
#include "libpostal_config.h" #include "libpostal_config.h"
#include "string_utils.h"
#ifdef HAVE_DIRENT_H #ifdef HAVE_DIRENT_H
#include <dirent.h> #include <dirent.h>
@@ -55,6 +56,9 @@ char *file_getline(FILE * f);
bool is_relative_path(struct dirent *ent); bool is_relative_path(struct dirent *ent);
char *path_join(int n, ...);
char *path_vjoin(int n, va_list args);
uint64_t file_deserialize_uint64(unsigned char *buf); uint64_t file_deserialize_uint64(unsigned char *buf);
bool file_read_uint64(FILE *file, uint64_t *value); bool file_read_uint64(FILE *file, uint64_t *value);
bool file_write_uint64(FILE *file, uint64_t value); bool file_write_uint64(FILE *file, uint64_t value);

View File

@@ -47,7 +47,7 @@ void geodb_destroy(geodb_t *self) {
geodb_t *geodb_init(char *dir) { geodb_t *geodb_init(char *dir) {
if (dir == NULL) return NULL; if (dir == NULL) return NULL;
geodb_t *gdb = malloc(sizeof(geodb_t)); geodb_t *gdb = calloc(1, sizeof(geodb_t));
if (gdb == NULL) return NULL; if (gdb == NULL) return NULL;

View File

@@ -338,7 +338,7 @@ void geodb_builder_destroy(geodb_builder_t *self) {
} }
geodb_builder_t *geodb_builder_new(char *log_filename) { geodb_builder_t *geodb_builder_new(char *log_filename) {
geodb_builder_t *builder = malloc(sizeof(geodb_builder_t)); geodb_builder_t *builder = calloc(1, sizeof(geodb_builder_t));
if (builder == NULL) return NULL; if (builder == NULL) return NULL;

View File

@@ -1,7 +1,7 @@
#include "graph.h" #include "graph.h"
graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows) { graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows) {
graph_t *graph = malloc(sizeof(graph_t)); graph_t *graph = calloc(1, sizeof(graph_t));
graph->m = m; graph->m = m;
graph->fixed_rows = fixed_rows; graph->fixed_rows = fixed_rows;
graph->n = n; graph->n = n;

View File

@@ -35,7 +35,7 @@ void language_classifier_destroy(language_classifier_t *self) {
} }
language_classifier_t *language_classifier_new(void) { language_classifier_t *language_classifier_new(void) {
language_classifier_t *language_classifier = malloc(sizeof(language_classifier_t)); language_classifier_t *language_classifier = calloc(1, sizeof(language_classifier_t));
return language_classifier; return language_classifier;
} }

View File

@@ -1036,40 +1036,106 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
return parsed; return parsed;
} }
bool libpostal_setup_datadir(char *datadir) {
char *transliteration_path = NULL;
char *numex_path = NULL;
char *address_dictionary_path = NULL;
if (datadir != NULL) {
transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE);
numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE);
address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE);
}
if (!transliteration_module_setup(transliteration_path)) {
log_error("Error loading transliteration module, dir=%s\n", transliteration_path);
return false;
}
if (!numex_module_setup(numex_path)) {
log_error("Error loading numex module, dir=%s\n", numex_path);
return false;
}
if (!address_dictionary_module_setup(address_dictionary_path)) {
log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path);
return false;
}
if (transliteration_path != NULL) {
free(transliteration_path);
}
if (numex_path != NULL) {
free(numex_path);
}
if (address_dictionary_path != NULL) {
free(address_dictionary_path);
}
return true;
}
bool libpostal_setup(void) { bool libpostal_setup(void) {
if (!transliteration_module_setup(NULL)) { return libpostal_setup_datadir(NULL);
log_error("Error loading transliteration module\n"); }
bool libpostal_setup_language_classifier_datadir(char *datadir) {
char *language_classifier_dir = NULL;
if (datadir != NULL) {
language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR);
}
if (!language_classifier_module_setup(language_classifier_dir)) {
log_error("Error loading language classifier, dir=%s\n", language_classifier_dir);
return false; return false;
} }
if (!numex_module_setup(NULL)) { if (language_classifier_dir != NULL) {
log_error("Error loading numex module\n"); free(language_classifier_dir);
return false;
}
if (!address_dictionary_module_setup(NULL)) {
log_error("Error loading dictionary module\n");
return false;
} }
return true; return true;
} }
bool libpostal_setup_language_classifier(void) { bool libpostal_setup_language_classifier(void) {
if (!language_classifier_module_setup(NULL)) { return libpostal_setup_language_classifier_datadir(NULL);
log_error("Error loading language classifier\n"); }
bool libpostal_setup_parser_datadir(char *datadir) {
char *parser_dir = NULL;
char *geodb_dir = NULL;
if (datadir != NULL) {
parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR);
geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR);
}
if (!geodb_module_setup(geodb_dir)) {
log_error("Error loading geodb module, dir=%s\n", geodb_dir);
return false; return false;
} }
if (!address_parser_module_setup(parser_dir)) {
log_error("Error loading address parser module, dir=%s\n", parser_dir);
return false;
}
if (parser_dir != NULL) {
free(parser_dir);
}
if (geodb_dir != NULL) {
free(geodb_dir);
}
return true; return true;
} }
bool libpostal_setup_parser(void) { bool libpostal_setup_parser(void) {
if (!address_parser_module_setup(NULL)) { return libpostal_setup_parser_datadir(NULL);
log_error("Error loading address parser module\n");
return false;
}
return true;
} }
void libpostal_teardown(void) { void libpostal_teardown(void) {

View File

@@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
// Setup/teardown methods // Setup/teardown methods
bool libpostal_setup(void); bool libpostal_setup(void);
bool libpostal_setup_datadir(char *datadir);
void libpostal_teardown(void); void libpostal_teardown(void);
bool libpostal_setup_parser(void); bool libpostal_setup_parser(void);
bool libpostal_setup_parser_datadir(char *datadir);
void libpostal_teardown_parser(void); void libpostal_teardown_parser(void);
bool libpostal_setup_language_classifier(void); bool libpostal_setup_language_classifier(void);
bool libpostal_setup_language_classifier_datadir(char *datadir);
void libpostal_teardown_language_classifier(void); void libpostal_teardown_language_classifier(void);
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -12,12 +12,20 @@
#error LIBPOSTAL_DATA_DIR not defined! #error LIBPOSTAL_DATA_DIR not defined!
#endif #endif
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser" #define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser"
#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries" #define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames" #define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions"
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb" #define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier" #define LIBPOSTAL_GEONAMES_SUBDIR "geonames"
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration" #define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR
#define LIBPOSTAL_GEODB_SUBDIR "geodb"
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier"
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR
#define LIBPOSTAL_NUMEX_SUBDIR "numex"
#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR
#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration"
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR
#define GEODB_BLOOM_FILTER_SIZE 100000000 #define GEODB_BLOOM_FILTER_SIZE 100000000
#define GEODB_BLOOM_FILTER_ERROR 0.001 #define GEODB_BLOOM_FILTER_ERROR 0.001

View File

@@ -51,7 +51,7 @@ numex_table_t *numex_table_init(void) {
numex_table_t *numex_table = get_numex_table(); numex_table_t *numex_table = get_numex_table();
if (numex_table == NULL) { if (numex_table == NULL) {
numex_table = malloc(sizeof(numex_table_t)); numex_table = calloc(1, sizeof(numex_table_t));
if (numex_table == NULL) return NULL; if (numex_table == NULL) return NULL;

View File

@@ -20,7 +20,8 @@
#include "trie.h" #include "trie.h"
#include "trie_search.h" #include "trie_search.h"
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat" #define NUMEX_DATA_FILE "numex.dat"
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
#define LATIN_LANGUAGE_CODE "la" #define LATIN_LANGUAGE_CODE "la"

View File

@@ -2,7 +2,7 @@
#include "klib/ksort.h" #include "klib/ksort.h"
sparse_matrix_t *sparse_matrix_new_shape(size_t m, size_t n) { sparse_matrix_t *sparse_matrix_new_shape(size_t m, size_t n) {
sparse_matrix_t *matrix = malloc(sizeof(sparse_matrix_t)); sparse_matrix_t *matrix = calloc(1, sizeof(sparse_matrix_t));
if (matrix == NULL) return NULL; if (matrix == NULL) return NULL;
matrix->m = m; matrix->m = m;
matrix->n = n; matrix->n = n;

View File

@@ -782,16 +782,17 @@ cstring_array *cstring_array_new_size(size_t size) {
cstring_array *cstring_array_from_char_array(char_array *str) { cstring_array *cstring_array_from_char_array(char_array *str) {
cstring_array *array = malloc(sizeof(cstring_array)); cstring_array *array = malloc(sizeof(cstring_array));
if (array == NULL) return NULL; if (array == NULL || str == NULL) return NULL;
array->str = str; array->str = str;
array->indices = uint32_array_new_size(1); array->indices = uint32_array_new_size(1);
uint32_array_push(array->indices, 0); uint32_array_push(array->indices, 0);
char *ptr = str->a; char *ptr = str->a;
uint32_t i = 0; if (str->n > 0) {
for (i = 0; i < str->n - 1; i++, ptr++) { for (uint32_t i = 0; i < str->n - 1; i++, ptr++) {
if (*ptr == '\0') { if (*ptr == '\0') {
uint32_array_push(array->indices, i + 1); uint32_array_push(array->indices, i + 1);
}
} }
} }
return array; return array;

View File

@@ -141,7 +141,6 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args);
void char_array_cat_printf(char_array *array, char *format, ...); void char_array_cat_printf(char_array *array, char *format, ...);
// Mainly for paths or delimited strings // Mainly for paths or delimited strings
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);

View File

@@ -1087,7 +1087,7 @@ transliteration_table_t *transliteration_table_init(void) {
transliteration_table_t *trans_table = get_transliteration_table(); transliteration_table_t *trans_table = get_transliteration_table();
if (trans_table == NULL) { if (trans_table == NULL) {
trans_table = malloc(sizeof(transliteration_table_t)); trans_table = calloc(1, sizeof(transliteration_table_t));
trans_table->trie = trie_new(); trans_table->trie = trie_new();
if (trans_table->trie == NULL) { if (trans_table->trie == NULL) {

View File

@@ -17,7 +17,8 @@
#define LATIN_ASCII_SIMPLE "latin-ascii-simple" #define LATIN_ASCII_SIMPLE "latin-ascii-simple"
#define HTML_ESCAPE "html-escape" #define HTML_ESCAPE "html-escape"
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat" #define TRANSLITERATION_DATA_FILE "transliteration.dat"
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE
#define MAX_TRANS_NAME_LEN 100 #define MAX_TRANS_NAME_LEN 100

View File

@@ -32,7 +32,7 @@ Constructors
*/ */
static trie_t *trie_new_empty(uint8_t *alphabet, uint32_t alphabet_size) { static trie_t *trie_new_empty(uint8_t *alphabet, uint32_t alphabet_size) {
trie_t *self = malloc(sizeof(trie_t)); trie_t *self = calloc(1, sizeof(trie_t));
if (!self) if (!self)
goto exit_no_malloc; goto exit_no_malloc;

View File

@@ -33,7 +33,7 @@ static inline void _aligned_free(void *p)
name *array = malloc(sizeof(name)); \ name *array = malloc(sizeof(name)); \
if (array == NULL) return NULL; \ if (array == NULL) return NULL; \
array->n = array->m = 0; \ array->n = array->m = 0; \
array->a = malloc(size * sizeof(type)); \ array->a = malloc((size > 0 ? size : 1) * sizeof(type)); \
if (array->a == NULL) return NULL; \ if (array->a == NULL) return NULL; \
array->m = size; \ array->m = size; \
return array; \ return array; \