From ecd07b18c118fc1e52ea30d9a91d7dc6f049258c Mon Sep 17 00:00:00 2001 From: Iestyn Pryce Date: Fri, 19 May 2017 22:31:56 +0100 Subject: [PATCH 1/5] Fix log_* formats which expect size_t but receive uint32_t. --- src/address_dictionary.c | 2 +- src/ftrl.c | 8 ++++---- src/language_classifier_test.c | 4 ++-- src/language_classifier_train.c | 4 ++-- src/libpostal.c | 4 ++-- src/log/log.h | 1 + src/logistic_regression.c | 4 ++-- src/stochastic_gradient_descent.c | 6 +++--- 8 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/address_dictionary.c b/src/address_dictionary.c index a6df42a6..ed1116f6 100644 --- a/src/address_dictionary.c +++ b/src/address_dictionary.c @@ -15,7 +15,7 @@ address_dictionary_t *get_address_dictionary(void) { address_expansion_value_t *address_dictionary_get_expansions(uint32_t i) { if (address_dict == NULL || address_dict->values == NULL || i > address_dict->values->n) { - log_error("i=%zu, address_dict->values->n=%zu\n", i, address_dict->values->n); + log_error("i=%" PRIu32 ", address_dict->values->n=%zu\n", i, address_dict->values->n); log_error(ADDRESS_DICTIONARY_SETUP_ERROR); return NULL; } diff --git a/src/ftrl.c b/src/ftrl.c index 8f44d368..8405730f 100644 --- a/src/ftrl.c +++ b/src/ftrl.c @@ -243,7 +243,7 @@ sparse_matrix_t *ftrl_weights_finalize_sparse(ftrl_trainer_t *self) { double lambda2 = self->lambda2; sparse_matrix_t *weights = sparse_matrix_new(); - log_info("weights->m = %zu\n", weights->m); + log_info("weights->m = %" PRIu32 "\n", weights->m); size_t i_start = 0; @@ -259,7 +259,7 @@ sparse_matrix_t *ftrl_weights_finalize_sparse(ftrl_trainer_t *self) { sparse_matrix_finalize_row(weights); i_start = 1; } - log_info("after intercept weights->m = %zu\n", weights->m); + log_info("after intercept weights->m = %" PRIu32 "\n", weights->m); for (size_t i = i_start; i < m; i++) { double *row = double_matrix_get_row(self->z, (size_t)i); @@ -275,7 +275,7 @@ sparse_matrix_t *ftrl_weights_finalize_sparse(ftrl_trainer_t *self) { sparse_matrix_finalize_row(weights); if (i % 1000 == 0 && i > 0) { - log_info("adding rows, weights->m = %zu\n", weights->m); + log_info("adding rows, weights->m = %" PRIu32 "\n", weights->m); } } @@ -295,4 +295,4 @@ void ftrl_trainer_destroy(ftrl_trainer_t *self) { } free(self); -} \ No newline at end of file +} diff --git a/src/language_classifier_test.c b/src/language_classifier_test.c index 729e5d12..55f9a548 100644 --- a/src/language_classifier_test.c +++ b/src/language_classifier_test.c @@ -53,7 +53,7 @@ double test_accuracy(char *filename) { } - log_info("total=%zu\n", total); + log_info("total=%" PRIu32 "\n", total); trie_destroy(label_ids); @@ -85,4 +85,4 @@ int main(int argc, char **argv) { double accuracy = test_accuracy(filename); log_info("Done. Accuracy: %f\n", accuracy); -} \ No newline at end of file +} diff --git a/src/language_classifier_train.c b/src/language_classifier_train.c index 03cd4c1d..b5306f4a 100644 --- a/src/language_classifier_train.c +++ b/src/language_classifier_train.c @@ -599,13 +599,13 @@ static language_classifier_t *trainer_finalize(logistic_regression_trainer_t *tr sparse_matrix_t *sparse_weights = logistic_regression_trainer_final_weights_sparse(trainer); classifier->weights_type = MATRIX_SPARSE; classifier->weights.sparse = sparse_weights; - log_info("Weights sparse: %zu rows (m=%u), %zu cols, %zu elements\n", sparse_weights->indptr->n, sparse_weights->m, sparse_weights->n, sparse_weights->data->n); + log_info("Weights sparse: %zu rows (m=%u), %" PRIu32 " cols, %zu elements\n", sparse_weights->indptr->n, sparse_weights->m, sparse_weights->n, sparse_weights->data->n); } } else if (trainer->optimizer_type == LOGISTIC_REGRESSION_OPTIMIZER_FTRL) { sparse_matrix_t *sparse_weights = logistic_regression_trainer_final_weights_sparse(trainer); classifier->weights_type = MATRIX_SPARSE; classifier->weights.sparse = sparse_weights; - log_info("Weights sparse: %zu rows (m=%u), %zu cols, %zu elements\n", sparse_weights->indptr->n, sparse_weights->m, sparse_weights->n, sparse_weights->data->n); + log_info("Weights sparse: %zu rows (m=%u), %" PRIu32 " cols, %zu elements\n", sparse_weights->indptr->n, sparse_weights->m, sparse_weights->n, sparse_weights->data->n); } diff --git a/src/libpostal.c b/src/libpostal.c index b05acea5..b68aacb7 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -665,7 +665,7 @@ static bool add_affix_expansions(string_tree_t *tree, char *str, char *lang, tok } } else if (have_suffix) { - log_debug("suffix.start=%zu\n", suffix.start); + log_debug("suffix.start=%" PRId32 "\n", suffix.start); root_len = suffix.start; root_token = (token_t){token.offset, root_len, token.type}; log_debug("root_len=%zu\n", root_len); @@ -887,7 +887,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_ log_debug("Adding alternatives for single normalization\n"); alternatives = add_string_alternatives(tokenized_str, options); - log_debug("num strings = %zu\n", string_tree_num_strings(alternatives)); + log_debug("num strings = %" PRIu32 "\n", string_tree_num_strings(alternatives)); if (alternatives == NULL) { log_debug("alternatives = NULL\n"); diff --git a/src/log/log.h b/src/log/log.h index d971ed40..7e5e454c 100644 --- a/src/log/log.h +++ b/src/log/log.h @@ -3,6 +3,7 @@ #include #include +#include #include #define LOG_LEVEL_DEBUG 10 diff --git a/src/logistic_regression.c b/src/logistic_regression.c index 08d340fc..329c0c0b 100644 --- a/src/logistic_regression.c +++ b/src/logistic_regression.c @@ -13,7 +13,7 @@ bool logistic_regression_model_expectation_sparse(sparse_matrix_t *theta, sparse } if (sparse_matrix_dot_sparse(x, theta, p_y) != 0) { - log_error("x->m = %zu, x->n = %zu, theta->m = %zu, theta->n = %zu, p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); + log_error("x->m = %" PRIu32 ", x->n = %" PRIu32 ", theta->m = %" PRIu32 ", theta->n = %" PRIu32 ", p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); return false; } @@ -31,7 +31,7 @@ bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix } if (sparse_matrix_dot_dense(x, theta, p_y) != 0) { - log_error("x->m = %zu, x->n = %zu, theta->m = %zu, theta->n = %zu, p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); + log_error("x->m = %" PRIu32 ", x->n = %" PRIu32 ", theta->m = %" PRIu32 ", theta->n = %" PRIu32 ", p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); return false; } diff --git a/src/stochastic_gradient_descent.c b/src/stochastic_gradient_descent.c index e94ffc55..19bca2a7 100644 --- a/src/stochastic_gradient_descent.c +++ b/src/stochastic_gradient_descent.c @@ -203,7 +203,7 @@ bool stochastic_gradient_descent_update_sparse(sgd_trainer_t *self, double_matri lambda_update = lambda / (double)batch_size * gamma_t; if (t > self->penalties->n) { - log_info("t = %zu, penalties->n = %zu\n", t, self->penalties->n); + log_info("t = %" PRIu32 ", penalties->n = %zu\n", t, self->penalties->n); return false; } penalty = self->penalties->a[t]; @@ -219,7 +219,7 @@ bool stochastic_gradient_descent_update_sparse(sgd_trainer_t *self, double_matri if (self->iterations > 0) { if (last_updated >= self->penalties->n) { - log_info("col = %u, t = %zu, last_updated = %zu, penalties->n = %zu\n", col, t, last_updated, self->penalties->n); + log_info("col = %u, t = %" PRIu32 ", last_updated = %" PRIu32 ", penalties->n = %zu\n", col, t, last_updated, self->penalties->n); return false; } @@ -376,7 +376,7 @@ bool stochastic_gradient_descent_set_regularized_weights(sgd_trainer_t *self, do uint32_t last_updated = updates[i]; if (last_updated >= self->penalties->n) { - log_error("last_updated (%zu) >= self->penalties-> (%zu)\n", last_updated, self->penalties->n); + log_error("last_updated (%" PRIu32 ") >= self->penalties-> (%zu)\n", last_updated, self->penalties->n); return false; } double last_update_penalty = penalties[last_updated]; From 6aa3cb61fda8aba41c5ac43fd2fc00c3601a8b0d Mon Sep 17 00:00:00 2001 From: Iestyn Pryce Date: Sun, 21 May 2017 10:29:34 +0100 Subject: [PATCH 2/5] Fix log_* formats which expect long long int but receive int64_t. --- src/trie_search.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/trie_search.c b/src/trie_search.c index a45955dc..8518db89 100644 --- a/src/trie_search.c +++ b/src/trie_search.c @@ -800,17 +800,17 @@ bool token_phrase_memberships(phrase_array *phrases, int64_array *phrase_members for (; i < phrase.start; i++) { int64_array_push(phrase_memberships, NULL_PHRASE_MEMBERSHIP); - log_debug("token i=%lld, null phrase membership\n", i); + log_debug("token i=%" PRId64 ", null phrase membership\n", i); } for (i = phrase.start; i < phrase.start + phrase.len; i++) { - log_debug("token i=%lld, phrase membership=%lld\n", i, j); + log_debug("token i=%" PRId64 ", phrase membership=%" PRId64 "\n", i, j); int64_array_push(phrase_memberships, j); } } for (; i < len; i++) { - log_debug("token i=%lld, null phrase membership\n", i); + log_debug("token i=%" PRId64 ", null phrase membership\n", i); int64_array_push(phrase_memberships, NULL_PHRASE_MEMBERSHIP); } From 73d27caeb977ede9878322fb1ea0a5786278777a Mon Sep 17 00:00:00 2001 From: Iestyn Pryce Date: Sun, 21 May 2017 10:57:20 +0100 Subject: [PATCH 3/5] Fix log_* formats which expect long long uint but receive uint64_t. --- src/address_parser_train.c | 4 ++-- src/averaged_perceptron_trainer.c | 2 +- src/crf_trainer_averaged_perceptron.c | 2 +- src/numex.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/address_parser_train.c b/src/address_parser_train.c index 2252f2cd..fd2ef3fd 100644 --- a/src/address_parser_train.c +++ b/src/address_parser_train.c @@ -1090,7 +1090,7 @@ bool address_parser_train_epoch(address_parser_t *self, void *trainer, char *fil uint64_t prev_errors = errors; errors = address_parser_train_num_errors(self, trainer); - log_info("Iter %d: Did %zu examples with %llu errors\n", iteration, examples, errors - prev_errors); + log_info("Iter %d: Did %zu examples with %" PRIu64 " errors\n", iteration, examples, errors - prev_errors); } } @@ -1211,7 +1211,7 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } min_updates = arg_min_updates; - log_info("min_updates = %llu\n", min_updates); + log_info("min_updates = %" PRIu64 "\n", min_updates); } else if (kwarg == ADDRESS_PARSER_TRAIN_ARG_MODEL_TYPE) { if (string_equals(arg, "crf")) { model_type = ADDRESS_PARSER_TYPE_CRF; diff --git a/src/averaged_perceptron_trainer.c b/src/averaged_perceptron_trainer.c index c150af67..eede8f76 100644 --- a/src/averaged_perceptron_trainer.c +++ b/src/averaged_perceptron_trainer.c @@ -148,7 +148,7 @@ averaged_perceptron_t *averaged_perceptron_trainer_finalize(averaged_perceptron_ log_info("Finalizing trainer, num_features=%u\n", self->num_features); - log_info("Pruning weights with < min_updates = %llu\n", self->min_updates); + log_info("Pruning weights with < min_updates = %" PRIu64 "\n", self->min_updates); for (feature_id = 0; feature_id < self->num_features; feature_id++) { k = kh_get(feature_class_weights, self->weights, feature_id); diff --git a/src/crf_trainer_averaged_perceptron.c b/src/crf_trainer_averaged_perceptron.c index e411e416..fdd7f0c0 100644 --- a/src/crf_trainer_averaged_perceptron.c +++ b/src/crf_trainer_averaged_perceptron.c @@ -755,7 +755,7 @@ crf_t *crf_averaged_perceptron_trainer_finalize(crf_averaged_perceptron_trainer_ uint32_t next_feature_id = 0; uint64_t *update_counts = self->update_counts->a; - log_info("Pruning weights with < min_updates = %llu\n", self->min_updates); + log_info("Pruning weights with < min_updates = %" PRIu64 "\n", self->min_updates); for (feature_id = 0; feature_id < num_features; feature_id++) { k = kh_get(feature_class_weights, self->weights, feature_id); diff --git a/src/numex.c b/src/numex.c index f2a0b156..802991b7 100644 --- a/src/numex.c +++ b/src/numex.c @@ -421,7 +421,7 @@ bool numex_table_read(FILE *f) { goto exit_numex_table_load_error; } - log_debug("read num_languages = %llu\n", num_languages); + log_debug("read num_languages = %" PRIu64 "\n", num_languages); int i = 0; @@ -443,7 +443,7 @@ bool numex_table_read(FILE *f) { goto exit_numex_table_load_error; } - log_debug("read num_rules = %llu\n", num_rules); + log_debug("read num_rules = %" PRIu64 "\n", num_rules); numex_rule_t rule; From d8239a9cc4bb61a9c12b643c275fba1c69a65ea8 Mon Sep 17 00:00:00 2001 From: Iestyn Pryce Date: Sun, 21 May 2017 11:14:21 +0100 Subject: [PATCH 4/5] Revert format regression introduced in ecd07b18c118fc1e52ea30d9a91d7dc6f049258c --- src/logistic_regression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/logistic_regression.c b/src/logistic_regression.c index 329c0c0b..26e41003 100644 --- a/src/logistic_regression.c +++ b/src/logistic_regression.c @@ -31,7 +31,7 @@ bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix } if (sparse_matrix_dot_dense(x, theta, p_y) != 0) { - log_error("x->m = %" PRIu32 ", x->n = %" PRIu32 ", theta->m = %" PRIu32 ", theta->n = %" PRIu32 ", p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); + log_error("x->m = %" PRIu32 ", x->n = %" PRIu32 ", theta->m = %zu, theta->n = %zu, p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); return false; } From 87cf7b5bca7fd4bec90a7dcea3c67a0904c6ba21 Mon Sep 17 00:00:00 2001 From: Iestyn Pryce Date: Sun, 21 May 2017 11:58:37 +0100 Subject: [PATCH 5/5] Add portable way of formatting khint_t type (from klib) --- src/address_parser_train.c | 4 ++-- src/klib/khash.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/address_parser_train.c b/src/address_parser_train.c index fd2ef3fd..3a1d22de 100644 --- a/src/address_parser_train.c +++ b/src/address_parser_train.c @@ -662,7 +662,7 @@ address_parser_t *address_parser_init(char *filename) { } - log_info("Done with vocab, total size=%zu\n", kh_size(vocab)); + log_info("Done with vocab, total size=%" PRIkh32 "\n", kh_size(vocab)); for (k = kh_begin(vocab); k != kh_end(vocab); ++k) { token = (char *)kh_key(vocab, k); @@ -676,7 +676,7 @@ address_parser_t *address_parser_init(char *filename) { } } - log_info("After pruning vocab size=%zu\n", kh_size(vocab)); + log_info("After pruning vocab size=%" PRIkh32 "\n", kh_size(vocab)); log_info("Creating phrases trie\n"); diff --git a/src/klib/khash.h b/src/klib/khash.h index e4e25fa5..554eb85d 100644 --- a/src/klib/khash.h +++ b/src/klib/khash.h @@ -101,14 +101,18 @@ int main() { #if UINT_MAX == 0xffffffffu typedef unsigned int khint32_t; +#define PRIkh32 "u" #elif ULONG_MAX == 0xffffffffu typedef unsigned long khint32_t; +#define PRIkh32 "lu" #endif #if ULONG_MAX == ULLONG_MAX typedef unsigned long khint64_t; +#define PRIkh64 "lu" #else typedef unsigned long long khint64_t; +#define PRIkh64 "llu" #endif #ifndef kh_inline