diff --git a/src/logistic_regression_trainer.c b/src/logistic_regression_trainer.c index dc0843bf..775636d7 100644 --- a/src/logistic_regression_trainer.c +++ b/src/logistic_regression_trainer.c @@ -118,7 +118,7 @@ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self goto exit_matrices_created; } - if (self->lambda > 0.0 && !stochastic_gradient_descent_sparse_regularize_weights(self->weights, self->batch_columns, self->last_updated, self->iters, self->lambda)) { + if (self->lambda > 0.0 && !stochastic_gradient_descent_sparse_regularize_weights(self->weights, self->batch_columns, self->last_updated, self->iters, self->lambda, self->gamma_0)) { log_error("Error regularizing weights\n"); goto exit_matrices_created; } @@ -145,7 +145,7 @@ bool logistic_regression_trainer_finalize(logistic_regression_trainer_t *self) { if (self == NULL) return false; if (self->lambda > 0.0) { - return stochastic_gradient_descent_sparse_finalize_weights(self->weights, self->last_updated, self->iters, self->lambda); + return stochastic_gradient_descent_sparse_finalize_weights(self->weights, self->last_updated, self->iters, self->lambda, self->gamma_0); } return true; diff --git a/src/stochastic_gradient_descent.c b/src/stochastic_gradient_descent.c index b9ffc921..feb2207e 100644 --- a/src/stochastic_gradient_descent.c +++ b/src/stochastic_gradient_descent.c @@ -79,13 +79,18 @@ twice (three times counting the finalization step), while still getting roughly results as though we had done the per-iteration weight updates. */ -static inline void regularize_row(double *theta_i, size_t n, double lambda, uint32_t last_updated, uint32_t t) { + +inline double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t) { + return gamma_0 / (1.0 + lambda * gamma_0 * (double)t); +} + +static inline void regularize_row(double *theta_i, size_t n, double lambda, uint32_t last_updated, uint32_t t, double gamma) { uint32_t timesteps = t - last_updated; - double update = exp(-lambda * timesteps); + double update = exp(gamma * -lambda * timesteps); double_array_mul(theta_i, update, n); } -bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda) { +bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) { if (lambda > 0.0) { uint32_t *updates = last_updated->a; @@ -98,7 +103,7 @@ bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint uint32_t row = rows[i]; double *theta_i = matrix_get_row(theta, row); uint32_t last_updated = updates[row]; - regularize_row(theta_i, n, lambda, last_updated, t); + regularize_row(theta_i, n, lambda, last_updated, t, gamma_0); updates[row] = t; } @@ -107,7 +112,7 @@ bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint return true; } -inline bool stochastic_gradient_descent_sparse_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda) { +inline bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) { if (lambda > 0.0) { uint32_t *updates = last_updated->a; size_t m = theta->m; @@ -116,14 +121,10 @@ inline bool stochastic_gradient_descent_sparse_finalize_weights(matrix_t *theta, for (size_t i = 0; i < m; i++) { double *theta_i = matrix_get_row(theta, i); uint32_t last_updated = updates[i]; - regularize_row(theta_i, n, lambda, last_updated, t); + regularize_row(theta_i, n, lambda, last_updated, t, gamma_0); + updates[i] = t; } } return true; } - - -inline double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t) { - return gamma_0 / (1.0 + lambda * gamma_0 * (double)t); -} \ No newline at end of file diff --git a/src/stochastic_gradient_descent.h b/src/stochastic_gradient_descent.h index 07adf3b1..e0e35287 100644 --- a/src/stochastic_gradient_descent.h +++ b/src/stochastic_gradient_descent.h @@ -18,8 +18,9 @@ gamma_t = gamma_0(1 + gamma_0 * lambda * t)^-1 bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma); bool stochastic_gradient_descent_sparse(matrix_t *theta, matrix_t *gradient, uint32_array *update_indices, double gamma); -bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda); -bool stochastic_gradient_descent_sparse_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda); +bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0); +bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0); double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t); + #endif \ No newline at end of file