From 8b7052971174f2aed16607b7972c972363099239 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 8 Jan 2016 00:54:17 -0500 Subject: [PATCH] [optimization] Stochastic gradient descent with gain schedule a la Leon Bottou --- src/stochastic_gradient_descent.c | 25 +++++++++++++++++++++++++ src/stochastic_gradient_descent.h | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 src/stochastic_gradient_descent.c create mode 100644 src/stochastic_gradient_descent.h diff --git a/src/stochastic_gradient_descent.c b/src/stochastic_gradient_descent.c new file mode 100644 index 00000000..e56057b0 --- /dev/null +++ b/src/stochastic_gradient_descent.c @@ -0,0 +1,25 @@ +#include "stochastic_gradient_descent.h" + +bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma) { + if (gradient->m != theta->m || gradient->n != theta->n) { + return false; + } + + size_t m = gradient->m; + size_t n = gradient->n; + + for (size_t i = 0; i < m; i++) { + for (size_t j = 0; j < n; j++) { + double grad_ij = matrix_get(gradient, i, j); + matrix_sub_scalar(theta, i, j, gamma * grad_ij); + } + } + + return true; +} + +inline bool stochastic_gradient_descent_scheduled(matrix_t *theta, matrix_t *gradient, float lambda, uint32_t t, double gamma_0) { + double gamma = gamma_0 / (1.0 + lambda * gamma_0 * (double)t); + + return stochastic_gradient_descent(theta, gradient, gamma); +} diff --git a/src/stochastic_gradient_descent.h b/src/stochastic_gradient_descent.h new file mode 100644 index 00000000..8f699993 --- /dev/null +++ b/src/stochastic_gradient_descent.h @@ -0,0 +1,22 @@ +/* +Stochastic gradient descent implementation + +Based on Leon Bottou's Stochastic Gradient Descent Tricks: +http://leon.bottou.org/publications/pdf/tricks-2012.pdf + +Learning rate calculated as: +gamma_t = gamma_0(1 + gamma_0 * lambda * t)^-1 + +*/ +#ifndef STOCHASTIC_GRADIENT_DESCENT_H +#define STOCHASTIC_GRADIENT_DESCENT_H + +#include +#include + +#include "matrix.h" + +bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double alpha); +bool stochastic_gradient_descent_scheduled(matrix_t *theta, matrix_t *gradient, float lambda, uint32_t t, double gamma_0); + +#endif \ No newline at end of file