[classification] Pre-allocating memory in logistic regression trainer, storing last updated timestamps for sparse stochastic gradient descent and using the new gradient API
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
#include "logistic_regression_trainer.h"
|
#include "logistic_regression_trainer.h"
|
||||||
|
#include "sparse_matrix_utils.h"
|
||||||
|
|
||||||
void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) {
|
void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) {
|
||||||
if (self == NULL) return;
|
if (self == NULL) return;
|
||||||
@@ -15,6 +16,22 @@ void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) {
|
|||||||
matrix_destroy(self->weights);
|
matrix_destroy(self->weights);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (self->last_updated != NULL) {
|
||||||
|
uint32_array_destroy(self->last_updated);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->unique_columns != NULL) {
|
||||||
|
kh_destroy(int_set, self->unique_columns);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->batch_columns != NULL) {
|
||||||
|
uint32_array_destroy(self->batch_columns);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->gradient != NULL) {
|
||||||
|
matrix_destroy(self->gradient);
|
||||||
|
}
|
||||||
|
|
||||||
free(self);
|
free(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,6 +50,13 @@ logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_
|
|||||||
|
|
||||||
trainer->weights = matrix_new_zeros(trainer->num_features, trainer->num_labels);
|
trainer->weights = matrix_new_zeros(trainer->num_features, trainer->num_labels);
|
||||||
|
|
||||||
|
trainer->gradient = matrix_new_zeros(trainer->num_features, trainer->num_labels);
|
||||||
|
|
||||||
|
trainer->unique_columns = kh_init(int_set);
|
||||||
|
trainer->batch_columns = uint32_array_new_size(trainer->num_features);
|
||||||
|
|
||||||
|
trainer->last_updated = uint32_array_new_zeros(trainer->num_features);
|
||||||
|
|
||||||
trainer->lambda = DEFAULT_LAMBDA;
|
trainer->lambda = DEFAULT_LAMBDA;
|
||||||
trainer->iters = 0;
|
trainer->iters = 0;
|
||||||
trainer->epochs = 0;
|
trainer->epochs = 0;
|
||||||
@@ -75,12 +99,12 @@ double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *sel
|
|||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels) {
|
bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels) {
|
||||||
size_t m = self->weights->m;
|
size_t m = self->weights->m;
|
||||||
size_t n = self->weights->n;
|
size_t n = self->weights->n;
|
||||||
|
|
||||||
matrix_t *gradient = matrix_new_zeros(m, n);
|
// Optimize
|
||||||
|
matrix_t *gradient = self->gradient;
|
||||||
|
|
||||||
sparse_matrix_t *x = feature_matrix(self->feature_ids, features);
|
sparse_matrix_t *x = feature_matrix(self->feature_ids, features);
|
||||||
uint32_array *y = label_vector(self->label_ids, labels);
|
uint32_array *y = label_vector(self->label_ids, labels);
|
||||||
@@ -89,21 +113,40 @@ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self
|
|||||||
|
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
if (!logistic_regression_gradient(self->weights, gradient, x, y, p_y, self->lambda)) {
|
if (!sparse_matrix_add_unique_columns(x, self->unique_columns, self->batch_columns)) {
|
||||||
|
log_error("Unique columns failed\n");
|
||||||
|
goto exit_matrices_created;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->lambda > 0.0 && !stochastic_gradient_descent_sparse_regularize_weights(self->weights, self->batch_columns, self->last_updated, self->iters, self->lambda)) {
|
||||||
|
log_error("Error regularizing weights\n");
|
||||||
|
goto exit_matrices_created;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!logistic_regression_gradient_sparse(self->weights, gradient, x, y, p_y, self->batch_columns, self->lambda)) {
|
||||||
log_error("Gradient failed\n");
|
log_error("Gradient failed\n");
|
||||||
goto exit_matrices_created;
|
goto exit_matrices_created;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t data_len = m * n;
|
size_t data_len = m * n;
|
||||||
|
|
||||||
ret = stochastic_gradient_descent(self->weights, gradient, self->gamma);
|
ret = stochastic_gradient_descent_sparse(self->weights, gradient, self->batch_columns, self->gamma);
|
||||||
|
|
||||||
self->iters++;
|
self->iters++;
|
||||||
|
|
||||||
exit_matrices_created:
|
exit_matrices_created:
|
||||||
matrix_destroy(gradient);
|
|
||||||
matrix_destroy(p_y);
|
matrix_destroy(p_y);
|
||||||
uint32_array_destroy(y);
|
uint32_array_destroy(y);
|
||||||
sparse_matrix_destroy(x);
|
sparse_matrix_destroy(x);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool logistic_regression_trainer_finalize(logistic_regression_trainer_t *self) {
|
||||||
|
if (self == NULL) return false;
|
||||||
|
|
||||||
|
if (self->lambda > 0.0) {
|
||||||
|
return stochastic_gradient_descent_sparse_finalize_weights(self->weights, self->last_updated, self->iters, self->lambda);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
#include "averaged_perceptron_tagger.h"
|
#include "averaged_perceptron_tagger.h"
|
||||||
#include "collections.h"
|
#include "collections.h"
|
||||||
@@ -34,6 +35,9 @@ typedef struct logistic_regression_trainer {
|
|||||||
khash_t(str_uint32) *label_ids; // Hashtable mapping labels to array indices
|
khash_t(str_uint32) *label_ids; // Hashtable mapping labels to array indices
|
||||||
size_t num_labels; // Number of labels
|
size_t num_labels; // Number of labels
|
||||||
matrix_t *weights; // Matrix of logistic regression weights
|
matrix_t *weights; // Matrix of logistic regression weights
|
||||||
|
matrix_t *gradient; // Gradient matrix to be reused
|
||||||
|
khash_t(int_set) *unique_columns; // Unique columns set
|
||||||
|
uint32_array *batch_columns; // Unique columns as array
|
||||||
uint32_array *last_updated; // Array of length N indicating the last time each feature was updated
|
uint32_array *last_updated; // Array of length N indicating the last time each feature was updated
|
||||||
double lambda; // Regularization weight
|
double lambda; // Regularization weight
|
||||||
uint32_t iters; // Number of iterations, used to decay learning rate
|
uint32_t iters; // Number of iterations, used to decay learning rate
|
||||||
@@ -47,6 +51,7 @@ logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_
|
|||||||
|
|
||||||
bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels);
|
bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels);
|
||||||
double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels);
|
double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels);
|
||||||
|
bool logistic_regression_trainer_finalize(logistic_regression_trainer_t *self);
|
||||||
|
|
||||||
void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self);
|
void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user