diff --git a/src/logistic_regression_trainer.c b/src/logistic_regression_trainer.c index df67afc1..dc0843bf 100644 --- a/src/logistic_regression_trainer.c +++ b/src/logistic_regression_trainer.c @@ -1,4 +1,5 @@ #include "logistic_regression_trainer.h" +#include "sparse_matrix_utils.h" void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) { if (self == NULL) return; @@ -15,6 +16,22 @@ void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) { matrix_destroy(self->weights); } + if (self->last_updated != NULL) { + uint32_array_destroy(self->last_updated); + } + + if (self->unique_columns != NULL) { + kh_destroy(int_set, self->unique_columns); + } + + if (self->batch_columns != NULL) { + uint32_array_destroy(self->batch_columns); + } + + if (self->gradient != NULL) { + matrix_destroy(self->gradient); + } + free(self); } @@ -33,6 +50,13 @@ logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_ trainer->weights = matrix_new_zeros(trainer->num_features, trainer->num_labels); + trainer->gradient = matrix_new_zeros(trainer->num_features, trainer->num_labels); + + trainer->unique_columns = kh_init(int_set); + trainer->batch_columns = uint32_array_new_size(trainer->num_features); + + trainer->last_updated = uint32_array_new_zeros(trainer->num_features); + trainer->lambda = DEFAULT_LAMBDA; trainer->iters = 0; trainer->epochs = 0; @@ -75,12 +99,12 @@ double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *sel return cost; } - bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels) { size_t m = self->weights->m; size_t n = self->weights->n; - matrix_t *gradient = matrix_new_zeros(m, n); + // Optimize + matrix_t *gradient = self->gradient; sparse_matrix_t *x = feature_matrix(self->feature_ids, features); uint32_array *y = label_vector(self->label_ids, labels); @@ -89,21 +113,40 @@ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self bool ret = false; - if (!logistic_regression_gradient(self->weights, gradient, x, y, p_y, self->lambda)) { + if (!sparse_matrix_add_unique_columns(x, self->unique_columns, self->batch_columns)) { + log_error("Unique columns failed\n"); + goto exit_matrices_created; + } + + if (self->lambda > 0.0 && !stochastic_gradient_descent_sparse_regularize_weights(self->weights, self->batch_columns, self->last_updated, self->iters, self->lambda)) { + log_error("Error regularizing weights\n"); + goto exit_matrices_created; + } + + if (!logistic_regression_gradient_sparse(self->weights, gradient, x, y, p_y, self->batch_columns, self->lambda)) { log_error("Gradient failed\n"); goto exit_matrices_created; } size_t data_len = m * n; - ret = stochastic_gradient_descent(self->weights, gradient, self->gamma); + ret = stochastic_gradient_descent_sparse(self->weights, gradient, self->batch_columns, self->gamma); self->iters++; exit_matrices_created: - matrix_destroy(gradient); matrix_destroy(p_y); uint32_array_destroy(y); sparse_matrix_destroy(x); return ret; } + +bool logistic_regression_trainer_finalize(logistic_regression_trainer_t *self) { + if (self == NULL) return false; + + if (self->lambda > 0.0) { + return stochastic_gradient_descent_sparse_finalize_weights(self->weights, self->last_updated, self->iters, self->lambda); + } + + return true; +} diff --git a/src/logistic_regression_trainer.h b/src/logistic_regression_trainer.h index f00dc12e..a552ac41 100644 --- a/src/logistic_regression_trainer.h +++ b/src/logistic_regression_trainer.h @@ -4,6 +4,7 @@ #include #include +#include #include "averaged_perceptron_tagger.h" #include "collections.h" @@ -34,6 +35,9 @@ typedef struct logistic_regression_trainer { khash_t(str_uint32) *label_ids; // Hashtable mapping labels to array indices size_t num_labels; // Number of labels matrix_t *weights; // Matrix of logistic regression weights + matrix_t *gradient; // Gradient matrix to be reused + khash_t(int_set) *unique_columns; // Unique columns set + uint32_array *batch_columns; // Unique columns as array uint32_array *last_updated; // Array of length N indicating the last time each feature was updated double lambda; // Regularization weight uint32_t iters; // Number of iterations, used to decay learning rate @@ -47,6 +51,7 @@ logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels); double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels); +bool logistic_regression_trainer_finalize(logistic_regression_trainer_t *self); void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self);