From 8f054eeeb1c72d76c3b3a9a966d08416b06e8610 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 8 Jan 2016 01:06:02 -0500 Subject: [PATCH] [classification] Training structures for logistic regression and stochastic (minibatch) gradient descent update --- src/logistic_regression_trainer.c | 109 ++++++++++++++++++++++++++++++ src/logistic_regression_trainer.h | 53 +++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 src/logistic_regression_trainer.c create mode 100644 src/logistic_regression_trainer.h diff --git a/src/logistic_regression_trainer.c b/src/logistic_regression_trainer.c new file mode 100644 index 00000000..df67afc1 --- /dev/null +++ b/src/logistic_regression_trainer.c @@ -0,0 +1,109 @@ +#include "logistic_regression_trainer.h" + +void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) { + if (self == NULL) return; + + if (self->feature_ids != NULL) { + trie_destroy(self->feature_ids); + } + + if (self->label_ids != NULL) { + kh_destroy(str_uint32, self->label_ids); + } + + if (self->weights != NULL) { + matrix_destroy(self->weights); + } + + free(self); +} + +logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_ids, khash_t(str_uint32) *label_ids) { + if (feature_ids == NULL || label_ids == NULL) return NULL; + + logistic_regression_trainer_t *trainer = malloc(sizeof(logistic_regression_trainer_t)); + if (trainer == NULL) return NULL; + + trainer->feature_ids = feature_ids; + // Add one feature for the bias unit + trainer->num_features = trie_num_keys(feature_ids) + 1; + + trainer->label_ids = label_ids; + trainer->num_labels = kh_size(label_ids); + + trainer->weights = matrix_new_zeros(trainer->num_features, trainer->num_labels); + + trainer->lambda = DEFAULT_LAMBDA; + trainer->iters = 0; + trainer->epochs = 0; + trainer->gamma_0 = DEFAULT_GAMMA_0; + trainer->gamma = DEFAULT_GAMMA; + + return trainer; + +exit_trainer_created: + logistic_regression_trainer_destroy(trainer); + return NULL; +} + + +static matrix_t *model_expectation(sparse_matrix_t *x, matrix_t *theta) { + matrix_t *p_y = matrix_new_zeros(x->m, theta->n); + if (p_y == NULL) return NULL; + + if(logistic_regression_model_expectation(theta, x, p_y)) { + return p_y; + } else { + matrix_destroy(p_y); + return NULL; + } +} + +double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels) { + size_t m = self->weights->m; + size_t n = self->weights->n; + + sparse_matrix_t *x = feature_matrix(self->feature_ids, features); + uint32_array *y = label_vector(self->label_ids, labels); + matrix_t *p_y = matrix_new_zeros(x->m, n); + + double cost = logistic_regression_cost_function(self->weights, x, y, p_y, self->lambda); + + matrix_destroy(p_y); + uint32_array_destroy(y); + sparse_matrix_destroy(x); + return cost; +} + + +bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels) { + size_t m = self->weights->m; + size_t n = self->weights->n; + + matrix_t *gradient = matrix_new_zeros(m, n); + + sparse_matrix_t *x = feature_matrix(self->feature_ids, features); + uint32_array *y = label_vector(self->label_ids, labels); + + matrix_t *p_y = matrix_new_zeros(x->m, n); + + bool ret = false; + + if (!logistic_regression_gradient(self->weights, gradient, x, y, p_y, self->lambda)) { + log_error("Gradient failed\n"); + goto exit_matrices_created; + } + + size_t data_len = m * n; + + ret = stochastic_gradient_descent(self->weights, gradient, self->gamma); + + self->iters++; + +exit_matrices_created: + matrix_destroy(gradient); + matrix_destroy(p_y); + uint32_array_destroy(y); + sparse_matrix_destroy(x); + return ret; +} diff --git a/src/logistic_regression_trainer.h b/src/logistic_regression_trainer.h new file mode 100644 index 00000000..f00dc12e --- /dev/null +++ b/src/logistic_regression_trainer.h @@ -0,0 +1,53 @@ + +#ifndef LOGISTIC_REGRESSION_TRAINER_H +#define LOGISTIC_REGRESSION_TRAINER_H + +#include +#include + +#include "averaged_perceptron_tagger.h" +#include "collections.h" +#include "features.h" +#include "logistic_regression.h" +#include "minibatch.h" +#include "sparse_matrix.h" +#include "string_utils.h" +#include "stochastic_gradient_descent.h" +#include "tokens.h" +#include "trie.h" + +#define DEFAULT_GAMMA_SCHEDULE {0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0} +#define DEFAUlT_LAMBDA_SCHEDULE {0.0, 1e-5, 1e-4, 0.001, 0.01, 0.1, \ + 0.2, 0.5, 1.0, 2.0, 5.0, 10.0} + +#define DEFAULT_GAMMA_0 1.0 +#define DEFAULT_LAMBDA 0.0 +#define DEFAULT_GAMMA 0.1 + +/** + * Helper struct for training logistic regression model + */ + +typedef struct logistic_regression_trainer { + trie_t *feature_ids; // Trie mapping features to array indices + size_t num_features; // Number of features + khash_t(str_uint32) *label_ids; // Hashtable mapping labels to array indices + size_t num_labels; // Number of labels + matrix_t *weights; // Matrix of logistic regression weights + uint32_array *last_updated; // Array of length N indicating the last time each feature was updated + double lambda; // Regularization weight + uint32_t iters; // Number of iterations, used to decay learning rate + uint32_t epochs; // Number of epochs + double gamma_0; // Initial learning rate + double gamma; // Simple scalar learning rate +} logistic_regression_trainer_t; + + +logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_ids, khash_t(str_uint32) *label_ids); + +bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels); +double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *self, feature_count_array *features, cstring_array *labels); + +void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self); + +#endif