From a4431dbb27f383eecee344c7fab7b5c5a3bb1884 Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 2 Apr 2017 14:32:14 -0400 Subject: [PATCH] [classification] removing regularization update from gradient computation in logistic regression, as that's now handled by the optimizer --- src/logistic_regression.c | 152 ++++++++++++-------------------------- src/logistic_regression.h | 7 +- 2 files changed, 52 insertions(+), 107 deletions(-) diff --git a/src/logistic_regression.c b/src/logistic_regression.c index c5c1e028..08d340fc 100644 --- a/src/logistic_regression.c +++ b/src/logistic_regression.c @@ -4,12 +4,16 @@ #include "logistic.h" #include "file_utils.h" -#define NEAR_ZERO_WEIGHT 1e-6 +bool logistic_regression_model_expectation_sparse(sparse_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y) { + if (theta == NULL || x == NULL || p_y == NULL) { + if (theta == NULL) log_error("theta = NULL\n"); + if (x == NULL) log_error("x = NULL\n"); + if (p_y == NULL) log_error("p_y = NULL\n"); + return false; + } -bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y) { - if (theta == NULL || x == NULL || p_y == NULL) return false; - - if (sparse_matrix_dot_dense(x, theta, p_y) != 0) { + if (sparse_matrix_dot_sparse(x, theta, p_y) != 0) { + log_error("x->m = %zu, x->n = %zu, theta->m = %zu, theta->n = %zu, p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); return false; } @@ -18,16 +22,41 @@ bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix return true; } -double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda) { +bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y) { + if (theta == NULL || x == NULL || p_y == NULL) { + if (theta == NULL) log_error("theta = NULL\n"); + if (x == NULL) log_error("x = NULL\n"); + if (p_y == NULL) log_error("p_y = NULL\n"); + return false; + } + + if (sparse_matrix_dot_dense(x, theta, p_y) != 0) { + log_error("x->m = %zu, x->n = %zu, theta->m = %zu, theta->n = %zu, p_y->m = %zu, p_y->n = %zu\n", x->m, x->n, theta->m, theta->n, p_y->m, p_y->n); + return false; + } + + softmax_matrix(p_y); + + return true; +} + +double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y) { size_t m = x->m; size_t n = x->n; - if (m != y->n) return -1.0; - - if (!double_matrix_resize(p_y, x->m, theta->n)) { + if (m != y->n) { + log_error("m = %zu, y->n = %zu\n", m, y->n); return -1.0; } + if (!double_matrix_resize_aligned(p_y, x->m, theta->n, 16)) { + log_error("resize_aligned failed\n"); + return -1.0; + } + + double_matrix_zero(p_y); + if (!logistic_regression_model_expectation(theta, x, p_y)) { + log_error("model expectation failed\n"); return -1.0; } @@ -37,39 +66,23 @@ double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t for (size_t i = 0; i < p_y->m; i++) { uint32_t y_i = y->a[i]; double value = double_matrix_get(p_y, i, y_i); - if (value > NEAR_ZERO_WEIGHT) { - cost += log(value); - } + cost += log(value); } cost *= -(1.0 / m); - if (lambda > 0.0) { - double reg_cost = 0.0; - for (size_t i = 1; i < theta->m; i++) { - for (size_t j = 0; j < theta->n; j++) { - double theta_ij = double_matrix_get(theta, i, j); - reg_cost += theta_ij * theta_ij; - } - - } - - cost += reg_cost * (lambda / 2.0); - } - return cost; - } -static bool logistic_regression_gradient_params(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, - uint32_array *x_cols, double lambda) { +bool logistic_regression_gradient(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y) { size_t m = x->m; size_t n = x->n; - if (m != y->n) return false; + if (m != y->n || theta->m != gradient->m || theta->n != gradient->n) return false; - if (!double_matrix_resize(p_y, x->m, theta->n) || !double_matrix_resize(p_y, x->m, theta->n)) { + if (!double_matrix_resize_aligned(p_y, x->m, theta->n, 16)) { return false; } + double_matrix_zero(p_y); if (!logistic_regression_model_expectation(theta, x, p_y)) { return false; @@ -85,93 +98,26 @@ static bool logistic_regression_gradient_params(double_matrix_t *theta, double_m uint32_t col; double data; - - bool regularize = lambda > 0.0; + double_matrix_zero(gradient); double *theta_values = theta->values; double *predicted_values = p_y->values; double *gradient_values = gradient->values; - // Zero the relevant rows of the gradient - if (x_cols != NULL) { - double *gradient_i; - - size_t batch_rows = x_cols->n; - uint32_t *cols = x_cols->a; - for (i = 0; i < batch_rows; i++) { - col = cols[i]; - gradient_i = double_matrix_get_row(gradient, col); - double_array_zero(gradient_i, num_classes); - } - } else { - double_matrix_zero(gradient); - } - - // gradient = -(1. / m) * x.T.dot(y - p_y) + lambda * theta + // gradient = -(1. / m) * x.T.dot(y - p_y) sparse_matrix_foreach(x, row, col, data, { uint32_t y_i = y->a[row]; for (j = 0; j < num_classes; j++) { - - double class_prob = predicted_values[row * num_classes + j]; + double class_prob = double_matrix_get(p_y, row, j); double residual = (y_i == j ? 1.0 : 0.0) - class_prob; - gradient_values[col * num_classes + j] += data * residual; + double gradient_update = data * residual; + double_matrix_add_scalar(gradient, col, j, gradient_update); } }) double scale = -1.0 / m; - - // Scale the vector by -1 / m using only the unique columns in X - // Useful for stochastic and minibatch gradients - if (x_cols != NULL) { - size_t batch_rows = x_cols->n; - uint32_t *cols = x_cols->a; - for (i = 0; i < batch_rows; i++) { - col = cols[i]; - for (j = 0; j < num_classes; j++) { - gradient_values[col * num_classes + j] *= scale; - } - } - } else { - double_matrix_mul(gradient, scale); - } - - - // Update the only the relevant columns in x - if (regularize) { - size_t num_rows = num_features; - uint32_t *cols = NULL; - - if (x_cols != NULL) { - cols = x_cols->a; - num_rows = x_cols->n; - } - - for (i = 0; i < num_rows; i++) { - col = x_cols != NULL ? cols[i] : i; - - for (j = 0; j < num_classes; j++) { - size_t idx = col * num_classes + j; - double theta_ij = theta_values[idx]; - double reg_update = theta_ij * lambda; - double current_value = gradient_values[idx]; - double updated_value = current_value + reg_update; - if ((updated_value > 0) == (current_value > 0)) { - gradient_values[idx] = updated_value; - } - } - } - } + double_matrix_mul(gradient, scale); return true; } - -inline bool logistic_regression_gradient_sparse(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, - uint32_array *x_cols, double lambda) { - return logistic_regression_gradient_params(theta, gradient, x, y, p_y, x_cols, lambda); -} - - -inline bool logistic_regression_gradient(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda) { - return logistic_regression_gradient_params(theta, gradient, x, y, p_y, NULL, lambda); -} diff --git a/src/logistic_regression.h b/src/logistic_regression.h index 7d798e45..8b37c7b2 100644 --- a/src/logistic_regression.h +++ b/src/logistic_regression.h @@ -27,9 +27,8 @@ may be called softmax regression. #include "sparse_matrix.h" bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y); -double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda); -bool logistic_regression_gradient(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda); -bool logistic_regression_gradient_sparse(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, - uint32_array *x_cols, double lambda); +bool logistic_regression_model_expectation_sparse(sparse_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y); +double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y); +bool logistic_regression_gradient(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y); #endif