From 26aeb0ebec546899168d98f9fe0b25de2bc7a8c5 Mon Sep 17 00:00:00 2001 From: Rinigus Date: Thu, 5 Jan 2017 07:34:24 +0200 Subject: [PATCH 1/8] drop AC_FUNC_MALLOC and _REALLOC and check for them as regular functions; add extra cflags for scanner --- configure.ac | 18 +++++++++++++++--- src/Makefile.am | 4 +++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index f7cbc42d..321e8f0a 100644 --- a/configure.ac +++ b/configure.ac @@ -45,10 +45,8 @@ AC_TYPE_UINT8_T AC_CHECK_TYPES([ptrdiff_t]) # Checks for library functions. -AC_FUNC_MALLOC AC_FUNC_MMAP -AC_FUNC_REALLOC -AC_CHECK_FUNCS([getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup]) +AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup]) AC_CONFIG_FILES([Makefile libpostal.pc @@ -70,4 +68,18 @@ AC_ARG_ENABLE([data-download], AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) +AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])], +[ + if test "x$withval" = "xno"; then + CFLAGS_SCANNER_EXTRA="" + else + CFLAGS_SCANNER_EXTRA="$withval" + fi +], +[ CFLAGS_SCANNER_EXTRA="" ] +) + +AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA]) +AC_SUBST(CFLAGS_SCANNER_EXTRA) + AC_OUTPUT diff --git a/src/Makefile.am b/src/Makefile.am index 82c9cecc..887f7c78 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -22,9 +22,11 @@ libpostal_la_CFLAGS = $(CFLAGS_O2) dist_bin_SCRIPTS = libpostal_data # Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough +# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding +# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). noinst_LTLIBRARIES = libscanner.la libscanner_la_SOURCES = scanner.c -libscanner_la_CFLAGS = $(CFLAGS_O0) +libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) noinst_PROGRAMS = libpostal bench build_address_dictionary build_geodb build_numex_table build_trans_table address_parser_train address_parser_test address_parser language_classifier_train language_classifier language_classifier_test libpostal_SOURCES = main.c json_encode.c From a2b84a01771564386b45dc7f542a1babfbfaaa85 Mon Sep 17 00:00:00 2001 From: Al Barrentine Date: Sat, 7 Jan 2017 14:17:31 -0500 Subject: [PATCH 2/8] [docs][ci skip] Adding parser label definitions to the README --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 4ad4009c..8f9792d4 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,20 @@ int main(int argc, char **argv) { } ``` +Parser labels +------------- + +The address parser can use any string labels that are defined in the training data, but these are the default labels, based on the fields defined in [OpenCage's address-formatting library](https://github.com/OpenCageData/address-formatting): + +- **house**: venue name e.g. "Brooklyn Academy of Music", and building names e.g. "Empire State Building" +- **house_number**: usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity. +- **road**: street name(s) +- **suburb**: usually an unofficial neighborhood name like "Harlem", "South Bronx", or "Crown Heights" +- **city_district**: these are usually boroughs or districts within a city that serve some official purpose e.g. "Brooklyn" or "Hackney" or "Bratislava IV" +- **city**: any human settlement including cities, towns, villages, hamlets, localities, etc. +- **state_district**: usually a second-level administrative division or county. +- **state**: a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to "state" as well (convention used in OSM, GeoPlanet, etc.) +- **country**: sovereign nations and their dependent territories, anything with an [ISO-3166 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2). Examples of normalization ------------------------- From 953a26e54e2869c70f26db5e53e0bbe8ac9d4b1c Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 9 Jan 2017 14:42:36 -0500 Subject: [PATCH 3/8] [utils] char_array_add_vjoined to stay consistent (add_* methods NUL termiante) --- src/features.c | 2 +- src/string_utils.c | 6 +++--- src/string_utils.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/features.c b/src/features.c index 198ee278..ada3586b 100644 --- a/src/features.c +++ b/src/features.c @@ -12,7 +12,7 @@ void feature_array_add(cstring_array *features, size_t count, ...) { cstring_array_start_token(features); bool strip_separator = true; - char_array_append_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args); + char_array_add_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args); va_end(args); } diff --git a/src/string_utils.c b/src/string_utils.c index 52f94893..7bfecb09 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -595,7 +595,7 @@ inline void char_array_add_len(char_array *array, char *str, size_t len) { } -void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) { +void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) { if (count <= 0) { return; } @@ -625,7 +625,7 @@ void char_array_append_vjoined(char_array *array, char *separator, bool strip_se inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) { va_list args; va_start(args, count); - char_array_append_vjoined(array, separator, strip_separator, count, args); + char_array_add_vjoined(array, separator, strip_separator, count, args); va_end(args); } @@ -633,7 +633,7 @@ inline void char_array_cat_joined(char_array *array, char *separator, bool strip char_array_strip_nul_byte(array); va_list args; va_start(args, count); - char_array_append_vjoined(array, separator, strip_separator, count, args); + char_array_add_vjoined(array, separator, strip_separator, count, args); va_end(args); } diff --git a/src/string_utils.h b/src/string_utils.h index fac4f99e..c77035ee 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -136,7 +136,7 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args); void char_array_cat_printf(char_array *array, char *format, ...); // Mainly for paths or delimited strings -void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); +void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); From a3506131fec8466b82711d8ec262301e8b39c84f Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 9 Jan 2017 16:11:26 -0500 Subject: [PATCH 4/8] [build] adding libpostal_setup_datadir, libpostal_setup_parser_datadir, libpostal_setup_language_classifier_datadir functions for configuring the datadir at runtime --- src/address_dictionary.h | 3 +- src/file_utils.c | 15 ++++++ src/file_utils.h | 4 ++ src/libpostal.c | 107 ++++++++++++++++++++++++++++++--------- src/libpostal.h | 3 ++ src/libpostal_config.h | 20 +++++--- src/numex.h | 3 +- src/transliterate.h | 3 +- 8 files changed, 126 insertions(+), 32 deletions(-) diff --git a/src/address_dictionary.h b/src/address_dictionary.h index c174b002..ccd99b05 100644 --- a/src/address_dictionary.h +++ b/src/address_dictionary.h @@ -21,7 +21,8 @@ #define ALL_LANGUAGES "all" -#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat" +#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat" +#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE #define NULL_CANONICAL_INDEX -1 diff --git a/src/file_utils.c b/src/file_utils.c index 1747bbd8..4a320d83 100644 --- a/src/file_utils.c +++ b/src/file_utils.c @@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) { return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0; } +char *path_vjoin(int n, va_list args) { + char_array *path = char_array_new(); + if (path == NULL) return NULL; + char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args); + return char_array_to_string(path); +} + +char *path_join(int n, ...) { + va_list args; + va_start(args, n); + char *path = path_vjoin(n, args); + va_end(args); + return path; +} + inline uint64_t file_deserialize_uint64(unsigned char *buf) { return ((uint64_t)buf[0] << 56) | ((uint64_t)buf[1] << 48) | diff --git a/src/file_utils.h b/src/file_utils.h index 13fa1e39..b6648500 100644 --- a/src/file_utils.h +++ b/src/file_utils.h @@ -9,6 +9,7 @@ #include #include "libpostal_config.h" +#include "string_utils.h" #ifdef HAVE_DIRENT_H #include @@ -55,6 +56,9 @@ char *file_getline(FILE * f); bool is_relative_path(struct dirent *ent); +char *path_join(int n, ...); +char *path_vjoin(int n, va_list args); + uint64_t file_deserialize_uint64(unsigned char *buf); bool file_read_uint64(FILE *file, uint64_t *value); bool file_write_uint64(FILE *file, uint64_t value); diff --git a/src/libpostal.c b/src/libpostal.c index 978f9b70..06401146 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -1054,45 +1054,106 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t return parsed; } +bool libpostal_setup_datadir(char *datadir) { + char *transliteration_path = NULL; + char *numex_path = NULL; + char *address_dictionary_path = NULL; + + if (datadir != NULL) { + transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE); + numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE); + address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE); + } + + if (!transliteration_module_setup(transliteration_path)) { + log_error("Error loading transliteration module, dir=%s\n", transliteration_path); + return false; + } + + if (!numex_module_setup(numex_path)) { + log_error("Error loading numex module, dir=%s\n", numex_path); + return false; + } + + if (!address_dictionary_module_setup(address_dictionary_path)) { + log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path); + return false; + } + + if (transliteration_path != NULL) { + free(transliteration_path); + } + + if (numex_path != NULL) { + free(numex_path); + } + + if (address_dictionary_path != NULL) { + free(address_dictionary_path); + } + + return true; +} + bool libpostal_setup(void) { - if (!transliteration_module_setup(NULL)) { - log_error("Error loading transliteration module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR); + return libpostal_setup_datadir(NULL); +} + +bool libpostal_setup_language_classifier_datadir(char *datadir) { + char *language_classifier_dir = NULL; + + if (datadir != NULL) { + language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR); + } + + if (!language_classifier_module_setup(language_classifier_dir)) { + log_error("Error loading language classifier, dir=%s\n", language_classifier_dir); return false; } - if (!numex_module_setup(NULL)) { - log_error("Error loading numex module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR); - return false; - } - - if (!address_dictionary_module_setup(NULL)) { - log_error("Error loading dictionary module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR); - return false; + if (language_classifier_dir != NULL) { + free(language_classifier_dir); } return true; } bool libpostal_setup_language_classifier(void) { - if (!language_classifier_module_setup(NULL)) { - log_error("Error loading language classifier, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR); + return libpostal_setup_language_classifier_datadir(NULL); +} + +bool libpostal_setup_parser_datadir(char *datadir) { + char *parser_dir = NULL; + char *geodb_dir = NULL; + + if (datadir != NULL) { + parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR); + geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR); + } + + if (!geodb_module_setup(geodb_dir)) { + log_error("Error loading geodb module, dir=%s\n", geodb_dir); return false; } + + if (!address_parser_module_setup(parser_dir)) { + log_error("Error loading address parser module, dir=%s\n", parser_dir); + return false; + } + + if (parser_dir != NULL) { + free(parser_dir); + } + + if (geodb_dir != NULL) { + free(geodb_dir); + } + return true; } bool libpostal_setup_parser(void) { - if (!geodb_module_setup(NULL)) { - log_error("Error loading geodb module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR); - return false; - } - - if (!address_parser_module_setup(NULL)) { - log_error("Error loading address parser module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR); - return false; - } - - return true; + return libpostal_setup_parser_datadir(NULL); } void libpostal_teardown(void) { diff --git a/src/libpostal.h b/src/libpostal.h index 30646e59..178d6225 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t // Setup/teardown methods bool libpostal_setup(void); +bool libpostal_setup_datadir(char *datadir); void libpostal_teardown(void); bool libpostal_setup_parser(void); +bool libpostal_setup_parser_datadir(char *datadir); void libpostal_teardown_parser(void); bool libpostal_setup_language_classifier(void); +bool libpostal_setup_language_classifier_datadir(char *datadir); void libpostal_teardown_language_classifier(void); #ifdef __cplusplus diff --git a/src/libpostal_config.h b/src/libpostal_config.h index 6b07a9b8..4d935665 100644 --- a/src/libpostal_config.h +++ b/src/libpostal_config.h @@ -12,12 +12,20 @@ #error LIBPOSTAL_DATA_DIR not defined! #endif -#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser" -#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries" -#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames" -#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb" -#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier" -#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration" +#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser" +#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR +#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions" +#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR +#define LIBPOSTAL_GEONAMES_SUBDIR "geonames" +#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR +#define LIBPOSTAL_GEODB_SUBDIR "geodb" +#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR +#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier" +#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR +#define LIBPOSTAL_NUMEX_SUBDIR "numex" +#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR +#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration" +#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR #define GEODB_BLOOM_FILTER_SIZE 100000000 #define GEODB_BLOOM_FILTER_ERROR 0.001 diff --git a/src/numex.h b/src/numex.h index 421435ec..9d8d9f4a 100644 --- a/src/numex.h +++ b/src/numex.h @@ -20,7 +20,8 @@ #include "trie.h" #include "trie_search.h" -#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat" +#define NUMEX_DATA_FILE "numex.dat" +#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE #define LATIN_LANGUAGE_CODE "la" diff --git a/src/transliterate.h b/src/transliterate.h index 885f9989..79230a56 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -15,7 +15,8 @@ #define LATIN_ASCII "latin-ascii" -#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat" +#define TRANSLITERATION_DATA_FILE "transliteration.dat" +#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE #define MAX_TRANS_NAME_LEN 100 From bbc91722cbf0b0069d09cb70bed4aec97a42224e Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 9 Jan 2017 16:14:07 -0500 Subject: [PATCH 5/8] [version] bump version to 0.3.3 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 321e8f0a..885aa434 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. -AC_INIT([libpostal], [0.3]) +AC_INIT([libpostal], [0.3.3]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src]) From e1f258171fb4a6d415d40241be00d7eda43f6110 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 13 Jan 2017 16:52:41 -0500 Subject: [PATCH 6/8] [fix] handle cstring_array_from_char_array where char_array is NULL or 0-length --- src/string_utils.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/string_utils.c b/src/string_utils.c index 7bfecb09..64984564 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -711,16 +711,17 @@ cstring_array *cstring_array_new_size(size_t size) { cstring_array *cstring_array_from_char_array(char_array *str) { cstring_array *array = malloc(sizeof(cstring_array)); - if (array == NULL) return NULL; + if (array == NULL || str == NULL) return NULL; array->str = str; array->indices = uint32_array_new_size(1); uint32_array_push(array->indices, 0); char *ptr = str->a; - uint32_t i = 0; - for (i = 0; i < str->n - 1; i++, ptr++) { - if (*ptr == '\0') { - uint32_array_push(array->indices, i + 1); + if (str->n > 0) { + for (uint32_t i = 0; i < str->n - 1; i++, ptr++) { + if (*ptr == '\0') { + uint32_array_push(array->indices, i + 1); + } } } return array; From 1398df1260ed9d6a76c2c097e8d41223e58a2f5e Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 13 Jan 2017 17:49:31 -0500 Subject: [PATCH 7/8] [fix] accept 0 for array_new_size --- src/vector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vector.h b/src/vector.h index 25b7896d..8a626ea6 100644 --- a/src/vector.h +++ b/src/vector.h @@ -34,7 +34,7 @@ static inline void _aligned_free(void *p) name *array = malloc(sizeof(name)); \ if (array == NULL) return NULL; \ array->n = array->m = 0; \ - array->a = malloc(size * sizeof(type)); \ + array->a = malloc((size > 0 ? size : 1) * sizeof(type)); \ if (array->a == NULL) return NULL; \ array->m = size; \ return array; \ From df89387b5c6472335df9ce387de15f5f1b65af20 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 13 Jan 2017 18:06:41 -0500 Subject: [PATCH 8/8] [fix] calloc instead of malloc when performing initialization on structs that may fail halfway and need to clean up while partially initialized (calloc will set all the bytes to zero so the member pointers are NULL instead of garbage memory) --- src/address_dictionary.c | 2 +- src/address_parser.c | 2 +- src/averaged_perceptron.c | 4 ++-- src/averaged_perceptron_trainer.c | 2 +- src/bloom.c | 4 ++-- src/geodb.c | 2 +- src/geodb_builder.c | 2 +- src/graph.c | 2 +- src/language_classifier.c | 2 +- src/numex.c | 2 +- src/sparse_matrix.c | 2 +- src/transliterate.c | 2 +- src/trie.c | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/address_dictionary.c b/src/address_dictionary.c index b0d5e345..a41fb135 100644 --- a/src/address_dictionary.c +++ b/src/address_dictionary.c @@ -284,7 +284,7 @@ phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) { bool address_dictionary_init(void) { if (address_dict != NULL) return false; - address_dict = malloc(sizeof(address_dictionary_t)); + address_dict = calloc(1, sizeof(address_dictionary_t)); if (address_dict == NULL) return false; address_dict->canonical = cstring_array_new(); diff --git a/src/address_parser.c b/src/address_parser.c index 507bee2a..187bada5 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -17,7 +17,7 @@ static address_parser_t *parser = NULL; address_parser_t *address_parser_new(void) { - address_parser_t *parser = malloc(sizeof(address_parser_t)); + address_parser_t *parser = calloc(1, sizeof(address_parser_t)); return parser; } diff --git a/src/averaged_perceptron.c b/src/averaged_perceptron.c index a66470e4..59a58f2e 100644 --- a/src/averaged_perceptron.c +++ b/src/averaged_perceptron.c @@ -93,7 +93,7 @@ averaged_perceptron_t *averaged_perceptron_read(FILE *f) { return NULL; } - averaged_perceptron_t *perceptron = malloc(sizeof(averaged_perceptron_t)); + averaged_perceptron_t *perceptron = calloc(1, sizeof(averaged_perceptron_t)); if (!file_read_uint32(f, &perceptron->num_features) || !file_read_uint32(f, &perceptron->num_classes) || @@ -216,4 +216,4 @@ void averaged_perceptron_destroy(averaged_perceptron_t *self) { } free(self); -} \ No newline at end of file +} diff --git a/src/averaged_perceptron_trainer.c b/src/averaged_perceptron_trainer.c index 83693ef8..e10ae499 100644 --- a/src/averaged_perceptron_trainer.c +++ b/src/averaged_perceptron_trainer.c @@ -389,7 +389,7 @@ bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *se } averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) { - averaged_perceptron_trainer_t *self = malloc(sizeof(averaged_perceptron_trainer_t)); + averaged_perceptron_trainer_t *self = calloc(1, sizeof(averaged_perceptron_trainer_t)); if (self == NULL) return NULL; diff --git a/src/bloom.c b/src/bloom.c index 109f9db9..3e0671b9 100644 --- a/src/bloom.c +++ b/src/bloom.c @@ -56,7 +56,7 @@ int bloom_filter_add(bloom_filter_t *self, const char *key, size_t len) { } bloom_filter_t *bloom_filter_new(uint64_t capacity, double error) { - bloom_filter_t *bloom = malloc(sizeof(bloom_filter_t)); + bloom_filter_t *bloom = calloc(1, sizeof(bloom_filter_t)); if (bloom == NULL) { return NULL; @@ -220,4 +220,4 @@ void bloom_filter_destroy(bloom_filter_t *self) { } free(self); -} \ No newline at end of file +} diff --git a/src/geodb.c b/src/geodb.c index 8b0dfecb..26c91da5 100644 --- a/src/geodb.c +++ b/src/geodb.c @@ -47,7 +47,7 @@ void geodb_destroy(geodb_t *self) { geodb_t *geodb_init(char *dir) { if (dir == NULL) return NULL; - geodb_t *gdb = malloc(sizeof(geodb_t)); + geodb_t *gdb = calloc(1, sizeof(geodb_t)); if (gdb == NULL) return NULL; diff --git a/src/geodb_builder.c b/src/geodb_builder.c index 327ce812..489a02c4 100644 --- a/src/geodb_builder.c +++ b/src/geodb_builder.c @@ -338,7 +338,7 @@ void geodb_builder_destroy(geodb_builder_t *self) { } geodb_builder_t *geodb_builder_new(char *log_filename) { - geodb_builder_t *builder = malloc(sizeof(geodb_builder_t)); + geodb_builder_t *builder = calloc(1, sizeof(geodb_builder_t)); if (builder == NULL) return NULL; diff --git a/src/graph.c b/src/graph.c index 2b85dfc6..7403e17a 100644 --- a/src/graph.c +++ b/src/graph.c @@ -1,7 +1,7 @@ #include "graph.h" graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows) { - graph_t *graph = malloc(sizeof(graph_t)); + graph_t *graph = calloc(1, sizeof(graph_t)); graph->m = m; graph->fixed_rows = fixed_rows; graph->n = n; diff --git a/src/language_classifier.c b/src/language_classifier.c index bd8fb0cd..9efa4ddc 100644 --- a/src/language_classifier.c +++ b/src/language_classifier.c @@ -35,7 +35,7 @@ void language_classifier_destroy(language_classifier_t *self) { } language_classifier_t *language_classifier_new(void) { - language_classifier_t *language_classifier = malloc(sizeof(language_classifier_t)); + language_classifier_t *language_classifier = calloc(1, sizeof(language_classifier_t)); return language_classifier; } diff --git a/src/numex.c b/src/numex.c index 7c815080..f4bdf4d7 100644 --- a/src/numex.c +++ b/src/numex.c @@ -51,7 +51,7 @@ numex_table_t *numex_table_init(void) { numex_table_t *numex_table = get_numex_table(); if (numex_table == NULL) { - numex_table = malloc(sizeof(numex_table_t)); + numex_table = calloc(1, sizeof(numex_table_t)); if (numex_table == NULL) return NULL; diff --git a/src/sparse_matrix.c b/src/sparse_matrix.c index dc9fa7fc..6f225a94 100644 --- a/src/sparse_matrix.c +++ b/src/sparse_matrix.c @@ -2,7 +2,7 @@ #include "klib/ksort.h" sparse_matrix_t *sparse_matrix_new_shape(size_t m, size_t n) { - sparse_matrix_t *matrix = malloc(sizeof(sparse_matrix_t)); + sparse_matrix_t *matrix = calloc(1, sizeof(sparse_matrix_t)); if (matrix == NULL) return NULL; matrix->m = m; matrix->n = n; diff --git a/src/transliterate.c b/src/transliterate.c index 7a9dd4f5..644dd110 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -1087,7 +1087,7 @@ transliteration_table_t *transliteration_table_init(void) { transliteration_table_t *trans_table = get_transliteration_table(); if (trans_table == NULL) { - trans_table = malloc(sizeof(transliteration_table_t)); + trans_table = calloc(1, sizeof(transliteration_table_t)); trans_table->trie = trie_new(); if (trans_table->trie == NULL) { diff --git a/src/trie.c b/src/trie.c index 0c0a0bc2..35b4254c 100644 --- a/src/trie.c +++ b/src/trie.c @@ -32,7 +32,7 @@ Constructors */ static trie_t *trie_new_empty(uint8_t *alphabet, uint32_t alphabet_size) { - trie_t *self = malloc(sizeof(trie_t)); + trie_t *self = calloc(1, sizeof(trie_t)); if (!self) goto exit_no_malloc;