diff --git a/src/logistic_regression.c b/src/logistic_regression.c new file mode 100644 index 00000000..fa1d131c --- /dev/null +++ b/src/logistic_regression.c @@ -0,0 +1,119 @@ +#include + +#include "logistic_regression.h" +#include "logistic.h" +#include "file_utils.h" + +#define NEAR_ZERO_WEIGHT 1e-6 + +bool logistic_regression_model_expectation(matrix_t *theta, sparse_matrix_t *x, matrix_t *p_y) { + if (theta == NULL || x == NULL || p_y == NULL) return false; + + if (sparse_matrix_dot_dense(x, theta, p_y) != 0) { + return false; + } + + softmax_matrix(p_y); + + return true; +} + + +double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda) { + size_t m = x->m; + size_t n = x->n; + if (m != y->n) return -1.0; + + if (!matrix_resize(p_y, x->m, theta->n)) { + return -1.0; + } + + if (!logistic_regression_model_expectation(theta, x, p_y)) { + return -1.0; + } + + double *expected_values = p_y->values; + double cost = 0.0; + + // - \frac{1}{m} \left[ \sum_{i=1}^{m} \sum_{j=1}^{k} 1\left\{y^{(i)} = j\right\} \log \frac{e^{\theta_j^T x^{(i)}}}{\sum_{l=1}^k e^{ \theta_l^T x^{(i)} }}\right] + + for (size_t i = 0; i < p_y->m; i++) { + uint32_t y_i = y->a[i]; + double value = matrix_get(p_y, i, y_i); + if (value > NEAR_ZERO_WEIGHT) { + cost += log(value); + } + } + + cost *= -(1.0 / m); + + if (lambda > 0.0) { + double reg_cost = 0.0; + for (size_t i = 1; i < theta->m; i++) { + for (size_t j = 0; j < theta->n; j++) { + double theta_ij = matrix_get(theta, i, j); + reg_cost += theta_ij * theta_ij; + } + + } + + cost += reg_cost * (lambda / 2.0); + } + + return cost; + +} + +bool logistic_regression_gradient(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda) { + size_t m = x->m; + size_t n = x->n; + if (m != y->n) return false; + + if (!matrix_resize(p_y, x->m, theta->n) || !matrix_resize(p_y, x->m, theta->n)) { + return false; + } + + matrix_zero(gradient); + + if (!logistic_regression_model_expectation(theta, x, p_y)) { + return false; + } + + size_t num_classes = p_y->n; + + double residual; + + uint32_t row; + uint32_t col; + double data; + + uint32_t i, j; + + bool regularize = lambda > 0.0; + + // gradient = -(1. / m) * x.T.dot(y - p_y) + lambda * theta + + sparse_matrix_foreach(x, row, col, data, { + uint32_t y_i = y->a[row]; + for (j = 0; j < num_classes; j++) { + residual = (y_i == j ? 1.0 : 0.0) - matrix_get(p_y, row, j); + double value = data * residual; + matrix_add_scalar(gradient, col, j, value); + } + }) + + matrix_mul(gradient, -1.0 / m); + + if (regularize) { + for (i = 1; i < m; i++) { + for (j = 0; j < n; j++) { + double theta_ij = matrix_get(theta, i, j); + double reg_value = theta_ij * lambda; + matrix_add_scalar(gradient, i, j, reg_value); + } + } + + } + + return true; +} diff --git a/src/logistic_regression.h b/src/logistic_regression.h new file mode 100644 index 00000000..18e13297 --- /dev/null +++ b/src/logistic_regression.h @@ -0,0 +1,33 @@ +/* +logistic_regression.h +--------------------- + +Cost function and gradient for multinomial logistic regression model. + +Multinomial logistic regression is a generalization of regular logistic +regression in which we predict a probability distribution over classes using: + +exp(x_i) / sum(exp(x)) + +This is sometimes referred to as the softmax function and thus the model +may be called softmax regression. + +*/ + +#ifndef LOGISTIC_REGRESSION_MODEL_H +#define LOGISTIC_REGRESSION_MODEL_H + +#include +#include +#include +#include + +#include "collections.h" +#include "matrix.h" +#include "sparse_matrix.h" + +bool logistic_regression_model_expectation(matrix_t *theta, sparse_matrix_t *x, matrix_t *p_y); +double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda); +bool logistic_regression_gradient(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda); + +#endif