diff --git a/src/logistic_regression_trainer.c b/src/logistic_regression_trainer.c
index dc0843bf..775636d7 100644
--- a/src/logistic_regression_trainer.c
+++ b/src/logistic_regression_trainer.c
@@ -118,7 +118,7 @@ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self
         goto exit_matrices_created;
     }
 
-    if (self->lambda > 0.0 && !stochastic_gradient_descent_sparse_regularize_weights(self->weights, self->batch_columns, self->last_updated, self->iters, self->lambda)) {
+    if (self->lambda > 0.0 && !stochastic_gradient_descent_sparse_regularize_weights(self->weights, self->batch_columns, self->last_updated, self->iters, self->lambda, self->gamma_0)) {
         log_error("Error regularizing weights\n");
         goto exit_matrices_created;
     }
@@ -145,7 +145,7 @@ bool logistic_regression_trainer_finalize(logistic_regression_trainer_t *self) {
     if (self == NULL) return false;
 
     if (self->lambda > 0.0) {
-        return stochastic_gradient_descent_sparse_finalize_weights(self->weights, self->last_updated, self->iters, self->lambda);
+        return stochastic_gradient_descent_sparse_finalize_weights(self->weights, self->last_updated, self->iters, self->lambda, self->gamma_0);
     }
 
     return true;
diff --git a/src/stochastic_gradient_descent.c b/src/stochastic_gradient_descent.c
index b9ffc921..feb2207e 100644
--- a/src/stochastic_gradient_descent.c
+++ b/src/stochastic_gradient_descent.c
@@ -79,13 +79,18 @@ twice (three times counting the finalization step), while still getting roughly
 results as though we had done the per-iteration weight updates.
 */
 
-static inline void regularize_row(double *theta_i, size_t n, double lambda, uint32_t last_updated, uint32_t t) {
+
+inline double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t) {
+    return gamma_0 / (1.0 + lambda * gamma_0 * (double)t);
+}
+
+static inline void regularize_row(double *theta_i, size_t n, double lambda, uint32_t last_updated, uint32_t t, double gamma) {
     uint32_t timesteps = t - last_updated;
-    double update = exp(-lambda * timesteps);
+    double update = exp(gamma * -lambda * timesteps);
     double_array_mul(theta_i, update, n);
 }
 
-bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda) {
+bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) {
     if (lambda > 0.0) {        
         uint32_t *updates = last_updated->a;
 
@@ -98,7 +103,7 @@ bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint
             uint32_t row = rows[i];
             double *theta_i = matrix_get_row(theta, row);
             uint32_t last_updated = updates[row];
-            regularize_row(theta_i, n, lambda, last_updated, t);
+            regularize_row(theta_i, n, lambda, last_updated, t, gamma_0);
             updates[row] = t;
         }
 
@@ -107,7 +112,7 @@ bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint
     return true;
 }
 
-inline bool stochastic_gradient_descent_sparse_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda) {
+inline bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) {
     if (lambda > 0.0) {
         uint32_t *updates = last_updated->a;
         size_t m = theta->m;
@@ -116,14 +121,10 @@ inline bool stochastic_gradient_descent_sparse_finalize_weights(matrix_t *theta,
         for (size_t i = 0; i < m; i++) {
             double *theta_i = matrix_get_row(theta, i);
             uint32_t last_updated = updates[i];
-            regularize_row(theta_i, n, lambda, last_updated, t);
+            regularize_row(theta_i, n, lambda, last_updated, t, gamma_0);
+
             updates[i] = t;
         }
     }
     return true;
 }
-
-
-inline double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t) {
-    return gamma_0 / (1.0 + lambda * gamma_0 * (double)t);
-}
\ No newline at end of file
diff --git a/src/stochastic_gradient_descent.h b/src/stochastic_gradient_descent.h
index 07adf3b1..e0e35287 100644
--- a/src/stochastic_gradient_descent.h
+++ b/src/stochastic_gradient_descent.h
@@ -18,8 +18,9 @@ gamma_t = gamma_0(1 + gamma_0 * lambda * t)^-1
 
 bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma);
 bool stochastic_gradient_descent_sparse(matrix_t *theta, matrix_t *gradient, uint32_array *update_indices, double gamma);
-bool stochastic_gradient_descent_sparse_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda);
-bool stochastic_gradient_descent_sparse_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda);
+bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0);
+bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0);
 double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t);
 
+
 #endif
\ No newline at end of file