diff --git a/src/stochastic_gradient_descent.c b/src/stochastic_gradient_descent.c new file mode 100644 index 00000000..e56057b0 --- /dev/null +++ b/src/stochastic_gradient_descent.c @@ -0,0 +1,25 @@ +#include "stochastic_gradient_descent.h" + +bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma) { + if (gradient->m != theta->m || gradient->n != theta->n) { + return false; + } + + size_t m = gradient->m; + size_t n = gradient->n; + + for (size_t i = 0; i < m; i++) { + for (size_t j = 0; j < n; j++) { + double grad_ij = matrix_get(gradient, i, j); + matrix_sub_scalar(theta, i, j, gamma * grad_ij); + } + } + + return true; +} + +inline bool stochastic_gradient_descent_scheduled(matrix_t *theta, matrix_t *gradient, float lambda, uint32_t t, double gamma_0) { + double gamma = gamma_0 / (1.0 + lambda * gamma_0 * (double)t); + + return stochastic_gradient_descent(theta, gradient, gamma); +} diff --git a/src/stochastic_gradient_descent.h b/src/stochastic_gradient_descent.h new file mode 100644 index 00000000..8f699993 --- /dev/null +++ b/src/stochastic_gradient_descent.h @@ -0,0 +1,22 @@ +/* +Stochastic gradient descent implementation + +Based on Leon Bottou's Stochastic Gradient Descent Tricks: +http://leon.bottou.org/publications/pdf/tricks-2012.pdf + +Learning rate calculated as: +gamma_t = gamma_0(1 + gamma_0 * lambda * t)^-1 + +*/ +#ifndef STOCHASTIC_GRADIENT_DESCENT_H +#define STOCHASTIC_GRADIENT_DESCENT_H + +#include +#include + +#include "matrix.h" + +bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double alpha); +bool stochastic_gradient_descent_scheduled(matrix_t *theta, matrix_t *gradient, float lambda, uint32_t t, double gamma_0); + +#endif \ No newline at end of file