[optimization] Using adapted learning rate in stochastic gradient descent (if lambda > 0)

2016-01-17 20:59:47 -05:00
parent 7b300639f1
commit 7d727fc8f0
1 changed files with 5 additions and 2 deletions
--- a/src/stochastic_gradient_descent.c
+++ b/src/stochastic_gradient_descent.c
@@ -103,7 +103,9 @@ bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_arra
            uint32_t row = rows[i];
            double *theta_i = matrix_get_row(theta, row);
            uint32_t last_updated = updates[row];
-            regularize_row(theta_i, n, lambda, last_updated, t, gamma_0);
+
            double gamma_t  = stochastic_gradient_descent_gamma_t(gamma_0, lambda, t - last_updated);
            regularize_row(theta_i, n, lambda, last_updated, t, gamma_t);
            updates[row] = t;
        }
@@ -121,8 +123,9 @@ inline bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32
        for (size_t i = 0; i < m; i++) {
            double *theta_i = matrix_get_row(theta, i);
            uint32_t last_updated = updates[i];
            regularize_row(theta_i, n, lambda, last_updated, t, gamma_0);
            double gamma_t  = stochastic_gradient_descent_gamma_t(gamma_0, lambda, t - last_updated);
            regularize_row(theta_i, n, lambda, last_updated, t, gamma_t);
            updates[i] = t;
        }
    }