diff --git a/README.md b/README.md index 4ad4009c..8f9792d4 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,20 @@ int main(int argc, char **argv) { } ``` +Parser labels +------------- + +The address parser can use any string labels that are defined in the training data, but these are the default labels, based on the fields defined in [OpenCage's address-formatting library](https://github.com/OpenCageData/address-formatting): + +- **house**: venue name e.g. "Brooklyn Academy of Music", and building names e.g. "Empire State Building" +- **house_number**: usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity. +- **road**: street name(s) +- **suburb**: usually an unofficial neighborhood name like "Harlem", "South Bronx", or "Crown Heights" +- **city_district**: these are usually boroughs or districts within a city that serve some official purpose e.g. "Brooklyn" or "Hackney" or "Bratislava IV" +- **city**: any human settlement including cities, towns, villages, hamlets, localities, etc. +- **state_district**: usually a second-level administrative division or county. +- **state**: a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to "state" as well (convention used in OSM, GeoPlanet, etc.) +- **country**: sovereign nations and their dependent territories, anything with an [ISO-3166 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2). Examples of normalization ------------------------- diff --git a/configure.ac b/configure.ac index 67809398..b790c546 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. -AC_INIT([libpostal], [0.3]) +AC_INIT([libpostal], [0.3.3]) AC_CONFIG_MACRO_DIRS([m4]) @@ -47,10 +47,8 @@ AC_TYPE_UINT8_T AC_CHECK_TYPES([ptrdiff_t]) # Checks for library functions. -AC_FUNC_MALLOC AC_FUNC_MMAP -AC_FUNC_REALLOC -AC_CHECK_FUNCS([getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup]) +AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup]) AC_CONFIG_FILES([Makefile libpostal.pc @@ -88,4 +86,18 @@ AC_ARG_ENABLE([data-download], AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) +AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])], +[ + if test "x$withval" = "xno"; then + CFLAGS_SCANNER_EXTRA="" + else + CFLAGS_SCANNER_EXTRA="$withval" + fi +], +[ CFLAGS_SCANNER_EXTRA="" ] +) + +AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA]) +AC_SUBST(CFLAGS_SCANNER_EXTRA) + AC_OUTPUT diff --git a/src/Makefile.am b/src/Makefile.am index bd730d5b..7223921a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -21,9 +21,11 @@ libpostal_la_CFLAGS = $(CFLAGS_O2) dist_bin_SCRIPTS = libpostal_data # Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough +# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding +# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). noinst_LTLIBRARIES = libscanner.la libscanner_la_SOURCES = scanner.c -libscanner_la_CFLAGS = $(CFLAGS_O0) +libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) noinst_PROGRAMS = libpostal bench build_address_dictionary build_geodb build_numex_table build_trans_table address_parser_train address_parser_test address_parser language_classifier_train language_classifier language_classifier_test libpostal_SOURCES = main.c json_encode.c diff --git a/src/address_dictionary.c b/src/address_dictionary.c index 0a6a2e71..a6df42a6 100644 --- a/src/address_dictionary.c +++ b/src/address_dictionary.c @@ -288,7 +288,7 @@ phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) { bool address_dictionary_init(void) { if (address_dict != NULL) return false; - address_dict = malloc(sizeof(address_dictionary_t)); + address_dict = calloc(1, sizeof(address_dictionary_t)); if (address_dict == NULL) return false; address_dict->canonical = cstring_array_new(); diff --git a/src/address_dictionary.h b/src/address_dictionary.h index 0cdbb0a3..cc5e8748 100644 --- a/src/address_dictionary.h +++ b/src/address_dictionary.h @@ -21,7 +21,8 @@ #define ALL_LANGUAGES "all" -#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat" +#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat" +#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE #define NULL_CANONICAL_INDEX -1 diff --git a/src/address_parser.c b/src/address_parser.c index e97d5aa2..0b9b4d7c 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -25,14 +25,13 @@ typedef enum { ADDRESS_PARSER_SUFFIX_PHRASE } address_parser_phrase_type_t; - static parser_options_t PARSER_DEFAULT_OPTIONS = { .rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD, .print_features = false }; address_parser_t *address_parser_new_options(parser_options_t options) { - address_parser_t *parser = malloc(sizeof(address_parser_t)); + address_parser_t *parser = calloc(1, sizeof(address_parser_t)); parser->options = options; return parser; } diff --git a/src/averaged_perceptron.c b/src/averaged_perceptron.c index a66470e4..59a58f2e 100644 --- a/src/averaged_perceptron.c +++ b/src/averaged_perceptron.c @@ -93,7 +93,7 @@ averaged_perceptron_t *averaged_perceptron_read(FILE *f) { return NULL; } - averaged_perceptron_t *perceptron = malloc(sizeof(averaged_perceptron_t)); + averaged_perceptron_t *perceptron = calloc(1, sizeof(averaged_perceptron_t)); if (!file_read_uint32(f, &perceptron->num_features) || !file_read_uint32(f, &perceptron->num_classes) || @@ -216,4 +216,4 @@ void averaged_perceptron_destroy(averaged_perceptron_t *self) { } free(self); -} \ No newline at end of file +} diff --git a/src/averaged_perceptron_trainer.c b/src/averaged_perceptron_trainer.c index 235b62b7..f2ad4352 100644 --- a/src/averaged_perceptron_trainer.c +++ b/src/averaged_perceptron_trainer.c @@ -386,7 +386,7 @@ bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *se } averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) { - averaged_perceptron_trainer_t *self = malloc(sizeof(averaged_perceptron_trainer_t)); + averaged_perceptron_trainer_t *self = calloc(1, sizeof(averaged_perceptron_trainer_t)); if (self == NULL) return NULL; diff --git a/src/bloom.c b/src/bloom.c index 109f9db9..3e0671b9 100644 --- a/src/bloom.c +++ b/src/bloom.c @@ -56,7 +56,7 @@ int bloom_filter_add(bloom_filter_t *self, const char *key, size_t len) { } bloom_filter_t *bloom_filter_new(uint64_t capacity, double error) { - bloom_filter_t *bloom = malloc(sizeof(bloom_filter_t)); + bloom_filter_t *bloom = calloc(1, sizeof(bloom_filter_t)); if (bloom == NULL) { return NULL; @@ -220,4 +220,4 @@ void bloom_filter_destroy(bloom_filter_t *self) { } free(self); -} \ No newline at end of file +} diff --git a/src/file_utils.c b/src/file_utils.c index dfd3c814..af0aa3b8 100644 --- a/src/file_utils.c +++ b/src/file_utils.c @@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) { return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0; } +char *path_vjoin(int n, va_list args) { + char_array *path = char_array_new(); + if (path == NULL) return NULL; + char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args); + return char_array_to_string(path); +} + +char *path_join(int n, ...) { + va_list args; + va_start(args, n); + char *path = path_vjoin(n, args); + va_end(args); + return path; +} + inline uint64_t file_deserialize_uint64(unsigned char *buf) { return ((uint64_t)buf[0] << 56) | ((uint64_t)buf[1] << 48) | diff --git a/src/file_utils.h b/src/file_utils.h index cb2004ac..2292751a 100644 --- a/src/file_utils.h +++ b/src/file_utils.h @@ -9,6 +9,7 @@ #include #include "libpostal_config.h" +#include "string_utils.h" #ifdef HAVE_DIRENT_H #include @@ -55,6 +56,9 @@ char *file_getline(FILE * f); bool is_relative_path(struct dirent *ent); +char *path_join(int n, ...); +char *path_vjoin(int n, va_list args); + uint64_t file_deserialize_uint64(unsigned char *buf); bool file_read_uint64(FILE *file, uint64_t *value); bool file_write_uint64(FILE *file, uint64_t value); diff --git a/src/geodb.c b/src/geodb.c index 8b0dfecb..26c91da5 100644 --- a/src/geodb.c +++ b/src/geodb.c @@ -47,7 +47,7 @@ void geodb_destroy(geodb_t *self) { geodb_t *geodb_init(char *dir) { if (dir == NULL) return NULL; - geodb_t *gdb = malloc(sizeof(geodb_t)); + geodb_t *gdb = calloc(1, sizeof(geodb_t)); if (gdb == NULL) return NULL; diff --git a/src/geodb_builder.c b/src/geodb_builder.c index 7275a176..0ab991f0 100644 --- a/src/geodb_builder.c +++ b/src/geodb_builder.c @@ -338,7 +338,7 @@ void geodb_builder_destroy(geodb_builder_t *self) { } geodb_builder_t *geodb_builder_new(char *log_filename) { - geodb_builder_t *builder = malloc(sizeof(geodb_builder_t)); + geodb_builder_t *builder = calloc(1, sizeof(geodb_builder_t)); if (builder == NULL) return NULL; diff --git a/src/graph.c b/src/graph.c index 2b85dfc6..7403e17a 100644 --- a/src/graph.c +++ b/src/graph.c @@ -1,7 +1,7 @@ #include "graph.h" graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows) { - graph_t *graph = malloc(sizeof(graph_t)); + graph_t *graph = calloc(1, sizeof(graph_t)); graph->m = m; graph->fixed_rows = fixed_rows; graph->n = n; diff --git a/src/language_classifier.c b/src/language_classifier.c index 35da504f..d5cfdc0c 100644 --- a/src/language_classifier.c +++ b/src/language_classifier.c @@ -35,7 +35,7 @@ void language_classifier_destroy(language_classifier_t *self) { } language_classifier_t *language_classifier_new(void) { - language_classifier_t *language_classifier = malloc(sizeof(language_classifier_t)); + language_classifier_t *language_classifier = calloc(1, sizeof(language_classifier_t)); return language_classifier; } diff --git a/src/libpostal.c b/src/libpostal.c index 29b039da..7298cb14 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -1036,40 +1036,106 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t return parsed; } +bool libpostal_setup_datadir(char *datadir) { + char *transliteration_path = NULL; + char *numex_path = NULL; + char *address_dictionary_path = NULL; + + if (datadir != NULL) { + transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE); + numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE); + address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE); + } + + if (!transliteration_module_setup(transliteration_path)) { + log_error("Error loading transliteration module, dir=%s\n", transliteration_path); + return false; + } + + if (!numex_module_setup(numex_path)) { + log_error("Error loading numex module, dir=%s\n", numex_path); + return false; + } + + if (!address_dictionary_module_setup(address_dictionary_path)) { + log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path); + return false; + } + + if (transliteration_path != NULL) { + free(transliteration_path); + } + + if (numex_path != NULL) { + free(numex_path); + } + + if (address_dictionary_path != NULL) { + free(address_dictionary_path); + } + + return true; +} + bool libpostal_setup(void) { - if (!transliteration_module_setup(NULL)) { - log_error("Error loading transliteration module\n"); + return libpostal_setup_datadir(NULL); +} + +bool libpostal_setup_language_classifier_datadir(char *datadir) { + char *language_classifier_dir = NULL; + + if (datadir != NULL) { + language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR); + } + + if (!language_classifier_module_setup(language_classifier_dir)) { + log_error("Error loading language classifier, dir=%s\n", language_classifier_dir); return false; } - if (!numex_module_setup(NULL)) { - log_error("Error loading numex module\n"); - return false; - } - - if (!address_dictionary_module_setup(NULL)) { - log_error("Error loading dictionary module\n"); - return false; + if (language_classifier_dir != NULL) { + free(language_classifier_dir); } return true; } bool libpostal_setup_language_classifier(void) { - if (!language_classifier_module_setup(NULL)) { - log_error("Error loading language classifier\n"); + return libpostal_setup_language_classifier_datadir(NULL); +} + +bool libpostal_setup_parser_datadir(char *datadir) { + char *parser_dir = NULL; + char *geodb_dir = NULL; + + if (datadir != NULL) { + parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR); + geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR); + } + + if (!geodb_module_setup(geodb_dir)) { + log_error("Error loading geodb module, dir=%s\n", geodb_dir); return false; } + + if (!address_parser_module_setup(parser_dir)) { + log_error("Error loading address parser module, dir=%s\n", parser_dir); + return false; + } + + if (parser_dir != NULL) { + free(parser_dir); + } + + if (geodb_dir != NULL) { + free(geodb_dir); + } + return true; } bool libpostal_setup_parser(void) { - if (!address_parser_module_setup(NULL)) { - log_error("Error loading address parser module\n"); - return false; - } - - return true; + return libpostal_setup_parser_datadir(NULL); } void libpostal_teardown(void) { diff --git a/src/libpostal.h b/src/libpostal.h index 35ac66c4..ae6af6e8 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t // Setup/teardown methods bool libpostal_setup(void); +bool libpostal_setup_datadir(char *datadir); void libpostal_teardown(void); bool libpostal_setup_parser(void); +bool libpostal_setup_parser_datadir(char *datadir); void libpostal_teardown_parser(void); bool libpostal_setup_language_classifier(void); +bool libpostal_setup_language_classifier_datadir(char *datadir); void libpostal_teardown_language_classifier(void); #ifdef __cplusplus diff --git a/src/libpostal_config.h b/src/libpostal_config.h index 6b07a9b8..4d935665 100644 --- a/src/libpostal_config.h +++ b/src/libpostal_config.h @@ -12,12 +12,20 @@ #error LIBPOSTAL_DATA_DIR not defined! #endif -#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser" -#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries" -#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames" -#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb" -#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier" -#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration" +#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser" +#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR +#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions" +#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR +#define LIBPOSTAL_GEONAMES_SUBDIR "geonames" +#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR +#define LIBPOSTAL_GEODB_SUBDIR "geodb" +#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR +#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier" +#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR +#define LIBPOSTAL_NUMEX_SUBDIR "numex" +#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR +#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration" +#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR #define GEODB_BLOOM_FILTER_SIZE 100000000 #define GEODB_BLOOM_FILTER_ERROR 0.001 diff --git a/src/numex.c b/src/numex.c index 7c815080..f4bdf4d7 100644 --- a/src/numex.c +++ b/src/numex.c @@ -51,7 +51,7 @@ numex_table_t *numex_table_init(void) { numex_table_t *numex_table = get_numex_table(); if (numex_table == NULL) { - numex_table = malloc(sizeof(numex_table_t)); + numex_table = calloc(1, sizeof(numex_table_t)); if (numex_table == NULL) return NULL; diff --git a/src/numex.h b/src/numex.h index 421435ec..9d8d9f4a 100644 --- a/src/numex.h +++ b/src/numex.h @@ -20,7 +20,8 @@ #include "trie.h" #include "trie_search.h" -#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat" +#define NUMEX_DATA_FILE "numex.dat" +#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE #define LATIN_LANGUAGE_CODE "la" diff --git a/src/sparse_matrix.c b/src/sparse_matrix.c index 6a5911f8..c765dc36 100644 --- a/src/sparse_matrix.c +++ b/src/sparse_matrix.c @@ -2,7 +2,7 @@ #include "klib/ksort.h" sparse_matrix_t *sparse_matrix_new_shape(size_t m, size_t n) { - sparse_matrix_t *matrix = malloc(sizeof(sparse_matrix_t)); + sparse_matrix_t *matrix = calloc(1, sizeof(sparse_matrix_t)); if (matrix == NULL) return NULL; matrix->m = m; matrix->n = n; diff --git a/src/string_utils.c b/src/string_utils.c index 69958cad..5c5c6281 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -782,16 +782,17 @@ cstring_array *cstring_array_new_size(size_t size) { cstring_array *cstring_array_from_char_array(char_array *str) { cstring_array *array = malloc(sizeof(cstring_array)); - if (array == NULL) return NULL; + if (array == NULL || str == NULL) return NULL; array->str = str; array->indices = uint32_array_new_size(1); uint32_array_push(array->indices, 0); char *ptr = str->a; - uint32_t i = 0; - for (i = 0; i < str->n - 1; i++, ptr++) { - if (*ptr == '\0') { - uint32_array_push(array->indices, i + 1); + if (str->n > 0) { + for (uint32_t i = 0; i < str->n - 1; i++, ptr++) { + if (*ptr == '\0') { + uint32_array_push(array->indices, i + 1); + } } } return array; diff --git a/src/string_utils.h b/src/string_utils.h index 537d6a20..aec4a9d4 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -141,7 +141,6 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args); void char_array_cat_printf(char_array *array, char *format, ...); // Mainly for paths or delimited strings -void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); diff --git a/src/transliterate.c b/src/transliterate.c index 8e9edb42..368356f3 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -1087,7 +1087,7 @@ transliteration_table_t *transliteration_table_init(void) { transliteration_table_t *trans_table = get_transliteration_table(); if (trans_table == NULL) { - trans_table = malloc(sizeof(transliteration_table_t)); + trans_table = calloc(1, sizeof(transliteration_table_t)); trans_table->trie = trie_new(); if (trans_table->trie == NULL) { diff --git a/src/transliterate.h b/src/transliterate.h index 8742a2a6..ab559393 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -17,7 +17,8 @@ #define LATIN_ASCII_SIMPLE "latin-ascii-simple" #define HTML_ESCAPE "html-escape" -#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat" +#define TRANSLITERATION_DATA_FILE "transliteration.dat" +#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE #define MAX_TRANS_NAME_LEN 100 diff --git a/src/trie.c b/src/trie.c index 0c0a0bc2..35b4254c 100644 --- a/src/trie.c +++ b/src/trie.c @@ -32,7 +32,7 @@ Constructors */ static trie_t *trie_new_empty(uint8_t *alphabet, uint32_t alphabet_size) { - trie_t *self = malloc(sizeof(trie_t)); + trie_t *self = calloc(1, sizeof(trie_t)); if (!self) goto exit_no_malloc; diff --git a/src/vector.h b/src/vector.h index 01958962..ad3a75fa 100644 --- a/src/vector.h +++ b/src/vector.h @@ -33,7 +33,7 @@ static inline void _aligned_free(void *p) name *array = malloc(sizeof(name)); \ if (array == NULL) return NULL; \ array->n = array->m = 0; \ - array->a = malloc(size * sizeof(type)); \ + array->a = malloc((size > 0 ? size : 1) * sizeof(type)); \ if (array->a == NULL) return NULL; \ array->m = size; \ return array; \