[merge] merging master
This commit is contained in:
14
README.md
14
README.md
@@ -148,6 +148,20 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
```
|
||||
|
||||
Parser labels
|
||||
-------------
|
||||
|
||||
The address parser can use any string labels that are defined in the training data, but these are the default labels, based on the fields defined in [OpenCage's address-formatting library](https://github.com/OpenCageData/address-formatting):
|
||||
|
||||
- **house**: venue name e.g. "Brooklyn Academy of Music", and building names e.g. "Empire State Building"
|
||||
- **house_number**: usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity.
|
||||
- **road**: street name(s)
|
||||
- **suburb**: usually an unofficial neighborhood name like "Harlem", "South Bronx", or "Crown Heights"
|
||||
- **city_district**: these are usually boroughs or districts within a city that serve some official purpose e.g. "Brooklyn" or "Hackney" or "Bratislava IV"
|
||||
- **city**: any human settlement including cities, towns, villages, hamlets, localities, etc.
|
||||
- **state_district**: usually a second-level administrative division or county.
|
||||
- **state**: a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to "state" as well (convention used in OSM, GeoPlanet, etc.)
|
||||
- **country**: sovereign nations and their dependent territories, anything with an [ISO-3166 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).
|
||||
|
||||
Examples of normalization
|
||||
-------------------------
|
||||
|
||||
20
configure.ac
20
configure.ac
@@ -1,7 +1,7 @@
|
||||
# -*- Autoconf -*-
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_INIT([libpostal], [0.3])
|
||||
AC_INIT([libpostal], [0.3.3])
|
||||
|
||||
AC_CONFIG_MACRO_DIRS([m4])
|
||||
|
||||
@@ -47,10 +47,8 @@ AC_TYPE_UINT8_T
|
||||
AC_CHECK_TYPES([ptrdiff_t])
|
||||
|
||||
# Checks for library functions.
|
||||
AC_FUNC_MALLOC
|
||||
AC_FUNC_MMAP
|
||||
AC_FUNC_REALLOC
|
||||
AC_CHECK_FUNCS([getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup])
|
||||
AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset munmap regcomp setlocale sqrt strdup strndup])
|
||||
|
||||
AC_CONFIG_FILES([Makefile
|
||||
libpostal.pc
|
||||
@@ -88,4 +86,18 @@ AC_ARG_ENABLE([data-download],
|
||||
|
||||
AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"])
|
||||
|
||||
AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])],
|
||||
[
|
||||
if test "x$withval" = "xno"; then
|
||||
CFLAGS_SCANNER_EXTRA=""
|
||||
else
|
||||
CFLAGS_SCANNER_EXTRA="$withval"
|
||||
fi
|
||||
],
|
||||
[ CFLAGS_SCANNER_EXTRA="" ]
|
||||
)
|
||||
|
||||
AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA])
|
||||
AC_SUBST(CFLAGS_SCANNER_EXTRA)
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
@@ -21,9 +21,11 @@ libpostal_la_CFLAGS = $(CFLAGS_O2)
|
||||
dist_bin_SCRIPTS = libpostal_data
|
||||
|
||||
# Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough
|
||||
# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding
|
||||
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
|
||||
noinst_LTLIBRARIES = libscanner.la
|
||||
libscanner_la_SOURCES = scanner.c
|
||||
libscanner_la_CFLAGS = $(CFLAGS_O0)
|
||||
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA)
|
||||
|
||||
noinst_PROGRAMS = libpostal bench build_address_dictionary build_geodb build_numex_table build_trans_table address_parser_train address_parser_test address_parser language_classifier_train language_classifier language_classifier_test
|
||||
libpostal_SOURCES = main.c json_encode.c
|
||||
|
||||
@@ -288,7 +288,7 @@ phrase_t search_address_dictionaries_suffix(char *str, size_t len, char *lang) {
|
||||
bool address_dictionary_init(void) {
|
||||
if (address_dict != NULL) return false;
|
||||
|
||||
address_dict = malloc(sizeof(address_dictionary_t));
|
||||
address_dict = calloc(1, sizeof(address_dictionary_t));
|
||||
if (address_dict == NULL) return false;
|
||||
|
||||
address_dict->canonical = cstring_array_new();
|
||||
|
||||
@@ -21,7 +21,8 @@
|
||||
|
||||
#define ALL_LANGUAGES "all"
|
||||
|
||||
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat"
|
||||
#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat"
|
||||
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE
|
||||
|
||||
#define NULL_CANONICAL_INDEX -1
|
||||
|
||||
|
||||
@@ -25,14 +25,13 @@ typedef enum {
|
||||
ADDRESS_PARSER_SUFFIX_PHRASE
|
||||
} address_parser_phrase_type_t;
|
||||
|
||||
|
||||
static parser_options_t PARSER_DEFAULT_OPTIONS = {
|
||||
.rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD,
|
||||
.print_features = false
|
||||
};
|
||||
|
||||
address_parser_t *address_parser_new_options(parser_options_t options) {
|
||||
address_parser_t *parser = malloc(sizeof(address_parser_t));
|
||||
address_parser_t *parser = calloc(1, sizeof(address_parser_t));
|
||||
parser->options = options;
|
||||
return parser;
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ averaged_perceptron_t *averaged_perceptron_read(FILE *f) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
averaged_perceptron_t *perceptron = malloc(sizeof(averaged_perceptron_t));
|
||||
averaged_perceptron_t *perceptron = calloc(1, sizeof(averaged_perceptron_t));
|
||||
|
||||
if (!file_read_uint32(f, &perceptron->num_features) ||
|
||||
!file_read_uint32(f, &perceptron->num_classes) ||
|
||||
@@ -216,4 +216,4 @@ void averaged_perceptron_destroy(averaged_perceptron_t *self) {
|
||||
}
|
||||
|
||||
free(self);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -386,7 +386,7 @@ bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *se
|
||||
}
|
||||
|
||||
averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) {
|
||||
averaged_perceptron_trainer_t *self = malloc(sizeof(averaged_perceptron_trainer_t));
|
||||
averaged_perceptron_trainer_t *self = calloc(1, sizeof(averaged_perceptron_trainer_t));
|
||||
|
||||
if (self == NULL) return NULL;
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ int bloom_filter_add(bloom_filter_t *self, const char *key, size_t len) {
|
||||
}
|
||||
|
||||
bloom_filter_t *bloom_filter_new(uint64_t capacity, double error) {
|
||||
bloom_filter_t *bloom = malloc(sizeof(bloom_filter_t));
|
||||
bloom_filter_t *bloom = calloc(1, sizeof(bloom_filter_t));
|
||||
|
||||
if (bloom == NULL) {
|
||||
return NULL;
|
||||
@@ -220,4 +220,4 @@ void bloom_filter_destroy(bloom_filter_t *self) {
|
||||
}
|
||||
|
||||
free(self);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) {
|
||||
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
|
||||
}
|
||||
|
||||
char *path_vjoin(int n, va_list args) {
|
||||
char_array *path = char_array_new();
|
||||
if (path == NULL) return NULL;
|
||||
char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args);
|
||||
return char_array_to_string(path);
|
||||
}
|
||||
|
||||
char *path_join(int n, ...) {
|
||||
va_list args;
|
||||
va_start(args, n);
|
||||
char *path = path_vjoin(n, args);
|
||||
va_end(args);
|
||||
return path;
|
||||
}
|
||||
|
||||
inline uint64_t file_deserialize_uint64(unsigned char *buf) {
|
||||
return ((uint64_t)buf[0] << 56) |
|
||||
((uint64_t)buf[1] << 48) |
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "libpostal_config.h"
|
||||
#include "string_utils.h"
|
||||
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
@@ -55,6 +56,9 @@ char *file_getline(FILE * f);
|
||||
|
||||
bool is_relative_path(struct dirent *ent);
|
||||
|
||||
char *path_join(int n, ...);
|
||||
char *path_vjoin(int n, va_list args);
|
||||
|
||||
uint64_t file_deserialize_uint64(unsigned char *buf);
|
||||
bool file_read_uint64(FILE *file, uint64_t *value);
|
||||
bool file_write_uint64(FILE *file, uint64_t value);
|
||||
|
||||
@@ -47,7 +47,7 @@ void geodb_destroy(geodb_t *self) {
|
||||
geodb_t *geodb_init(char *dir) {
|
||||
if (dir == NULL) return NULL;
|
||||
|
||||
geodb_t *gdb = malloc(sizeof(geodb_t));
|
||||
geodb_t *gdb = calloc(1, sizeof(geodb_t));
|
||||
|
||||
if (gdb == NULL) return NULL;
|
||||
|
||||
|
||||
@@ -338,7 +338,7 @@ void geodb_builder_destroy(geodb_builder_t *self) {
|
||||
}
|
||||
|
||||
geodb_builder_t *geodb_builder_new(char *log_filename) {
|
||||
geodb_builder_t *builder = malloc(sizeof(geodb_builder_t));
|
||||
geodb_builder_t *builder = calloc(1, sizeof(geodb_builder_t));
|
||||
|
||||
if (builder == NULL) return NULL;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "graph.h"
|
||||
|
||||
graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows) {
|
||||
graph_t *graph = malloc(sizeof(graph_t));
|
||||
graph_t *graph = calloc(1, sizeof(graph_t));
|
||||
graph->m = m;
|
||||
graph->fixed_rows = fixed_rows;
|
||||
graph->n = n;
|
||||
|
||||
@@ -35,7 +35,7 @@ void language_classifier_destroy(language_classifier_t *self) {
|
||||
}
|
||||
|
||||
language_classifier_t *language_classifier_new(void) {
|
||||
language_classifier_t *language_classifier = malloc(sizeof(language_classifier_t));
|
||||
language_classifier_t *language_classifier = calloc(1, sizeof(language_classifier_t));
|
||||
return language_classifier;
|
||||
}
|
||||
|
||||
|
||||
102
src/libpostal.c
102
src/libpostal.c
@@ -1036,40 +1036,106 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
|
||||
return parsed;
|
||||
}
|
||||
|
||||
bool libpostal_setup_datadir(char *datadir) {
|
||||
char *transliteration_path = NULL;
|
||||
char *numex_path = NULL;
|
||||
char *address_dictionary_path = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE);
|
||||
numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE);
|
||||
address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE);
|
||||
}
|
||||
|
||||
if (!transliteration_module_setup(transliteration_path)) {
|
||||
log_error("Error loading transliteration module, dir=%s\n", transliteration_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!numex_module_setup(numex_path)) {
|
||||
log_error("Error loading numex module, dir=%s\n", numex_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_dictionary_module_setup(address_dictionary_path)) {
|
||||
log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (transliteration_path != NULL) {
|
||||
free(transliteration_path);
|
||||
}
|
||||
|
||||
if (numex_path != NULL) {
|
||||
free(numex_path);
|
||||
}
|
||||
|
||||
if (address_dictionary_path != NULL) {
|
||||
free(address_dictionary_path);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup(void) {
|
||||
if (!transliteration_module_setup(NULL)) {
|
||||
log_error("Error loading transliteration module\n");
|
||||
return libpostal_setup_datadir(NULL);
|
||||
}
|
||||
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
char *language_classifier_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR);
|
||||
}
|
||||
|
||||
if (!language_classifier_module_setup(language_classifier_dir)) {
|
||||
log_error("Error loading language classifier, dir=%s\n", language_classifier_dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!numex_module_setup(NULL)) {
|
||||
log_error("Error loading numex module\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_dictionary_module_setup(NULL)) {
|
||||
log_error("Error loading dictionary module\n");
|
||||
return false;
|
||||
if (language_classifier_dir != NULL) {
|
||||
free(language_classifier_dir);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup_language_classifier(void) {
|
||||
if (!language_classifier_module_setup(NULL)) {
|
||||
log_error("Error loading language classifier\n");
|
||||
return libpostal_setup_language_classifier_datadir(NULL);
|
||||
}
|
||||
|
||||
bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
char *parser_dir = NULL;
|
||||
char *geodb_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR);
|
||||
geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR);
|
||||
}
|
||||
|
||||
if (!geodb_module_setup(geodb_dir)) {
|
||||
log_error("Error loading geodb module, dir=%s\n", geodb_dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!address_parser_module_setup(parser_dir)) {
|
||||
log_error("Error loading address parser module, dir=%s\n", parser_dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (parser_dir != NULL) {
|
||||
free(parser_dir);
|
||||
}
|
||||
|
||||
if (geodb_dir != NULL) {
|
||||
free(geodb_dir);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup_parser(void) {
|
||||
if (!address_parser_module_setup(NULL)) {
|
||||
log_error("Error loading address parser module\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return libpostal_setup_parser_datadir(NULL);
|
||||
}
|
||||
|
||||
void libpostal_teardown(void) {
|
||||
|
||||
@@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
|
||||
// Setup/teardown methods
|
||||
|
||||
bool libpostal_setup(void);
|
||||
bool libpostal_setup_datadir(char *datadir);
|
||||
void libpostal_teardown(void);
|
||||
|
||||
bool libpostal_setup_parser(void);
|
||||
bool libpostal_setup_parser_datadir(char *datadir);
|
||||
void libpostal_teardown_parser(void);
|
||||
|
||||
bool libpostal_setup_language_classifier(void);
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir);
|
||||
void libpostal_teardown_language_classifier(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -12,12 +12,20 @@
|
||||
#error LIBPOSTAL_DATA_DIR not defined!
|
||||
#endif
|
||||
|
||||
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser"
|
||||
#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries"
|
||||
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames"
|
||||
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb"
|
||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier"
|
||||
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration"
|
||||
#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser"
|
||||
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR
|
||||
#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions"
|
||||
#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR
|
||||
#define LIBPOSTAL_GEONAMES_SUBDIR "geonames"
|
||||
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR
|
||||
#define LIBPOSTAL_GEODB_SUBDIR "geodb"
|
||||
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR
|
||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier"
|
||||
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR
|
||||
#define LIBPOSTAL_NUMEX_SUBDIR "numex"
|
||||
#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR
|
||||
#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration"
|
||||
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR
|
||||
|
||||
#define GEODB_BLOOM_FILTER_SIZE 100000000
|
||||
#define GEODB_BLOOM_FILTER_ERROR 0.001
|
||||
|
||||
@@ -51,7 +51,7 @@ numex_table_t *numex_table_init(void) {
|
||||
numex_table_t *numex_table = get_numex_table();
|
||||
|
||||
if (numex_table == NULL) {
|
||||
numex_table = malloc(sizeof(numex_table_t));
|
||||
numex_table = calloc(1, sizeof(numex_table_t));
|
||||
|
||||
if (numex_table == NULL) return NULL;
|
||||
|
||||
|
||||
@@ -20,7 +20,8 @@
|
||||
#include "trie.h"
|
||||
#include "trie_search.h"
|
||||
|
||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat"
|
||||
#define NUMEX_DATA_FILE "numex.dat"
|
||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
||||
|
||||
#define LATIN_LANGUAGE_CODE "la"
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#include "klib/ksort.h"
|
||||
|
||||
sparse_matrix_t *sparse_matrix_new_shape(size_t m, size_t n) {
|
||||
sparse_matrix_t *matrix = malloc(sizeof(sparse_matrix_t));
|
||||
sparse_matrix_t *matrix = calloc(1, sizeof(sparse_matrix_t));
|
||||
if (matrix == NULL) return NULL;
|
||||
matrix->m = m;
|
||||
matrix->n = n;
|
||||
|
||||
@@ -782,16 +782,17 @@ cstring_array *cstring_array_new_size(size_t size) {
|
||||
|
||||
cstring_array *cstring_array_from_char_array(char_array *str) {
|
||||
cstring_array *array = malloc(sizeof(cstring_array));
|
||||
if (array == NULL) return NULL;
|
||||
if (array == NULL || str == NULL) return NULL;
|
||||
|
||||
array->str = str;
|
||||
array->indices = uint32_array_new_size(1);
|
||||
uint32_array_push(array->indices, 0);
|
||||
char *ptr = str->a;
|
||||
uint32_t i = 0;
|
||||
for (i = 0; i < str->n - 1; i++, ptr++) {
|
||||
if (*ptr == '\0') {
|
||||
uint32_array_push(array->indices, i + 1);
|
||||
if (str->n > 0) {
|
||||
for (uint32_t i = 0; i < str->n - 1; i++, ptr++) {
|
||||
if (*ptr == '\0') {
|
||||
uint32_array_push(array->indices, i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
return array;
|
||||
|
||||
@@ -141,7 +141,6 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args);
|
||||
void char_array_cat_printf(char_array *array, char *format, ...);
|
||||
|
||||
// Mainly for paths or delimited strings
|
||||
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
||||
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
||||
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
|
||||
@@ -1087,7 +1087,7 @@ transliteration_table_t *transliteration_table_init(void) {
|
||||
transliteration_table_t *trans_table = get_transliteration_table();
|
||||
|
||||
if (trans_table == NULL) {
|
||||
trans_table = malloc(sizeof(transliteration_table_t));
|
||||
trans_table = calloc(1, sizeof(transliteration_table_t));
|
||||
|
||||
trans_table->trie = trie_new();
|
||||
if (trans_table->trie == NULL) {
|
||||
|
||||
@@ -17,7 +17,8 @@
|
||||
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
||||
#define HTML_ESCAPE "html-escape"
|
||||
|
||||
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat"
|
||||
#define TRANSLITERATION_DATA_FILE "transliteration.dat"
|
||||
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE
|
||||
|
||||
#define MAX_TRANS_NAME_LEN 100
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ Constructors
|
||||
*/
|
||||
|
||||
static trie_t *trie_new_empty(uint8_t *alphabet, uint32_t alphabet_size) {
|
||||
trie_t *self = malloc(sizeof(trie_t));
|
||||
trie_t *self = calloc(1, sizeof(trie_t));
|
||||
if (!self)
|
||||
goto exit_no_malloc;
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ static inline void _aligned_free(void *p)
|
||||
name *array = malloc(sizeof(name)); \
|
||||
if (array == NULL) return NULL; \
|
||||
array->n = array->m = 0; \
|
||||
array->a = malloc(size * sizeof(type)); \
|
||||
array->a = malloc((size > 0 ? size : 1) * sizeof(type)); \
|
||||
if (array->a == NULL) return NULL; \
|
||||
array->m = size; \
|
||||
return array; \
|
||||
|
||||
Reference in New Issue
Block a user