diff --git a/src/stochastic_gradient_descent.c b/src/stochastic_gradient_descent.c
new file mode 100644
index 00000000..e56057b0
--- /dev/null
+++ b/src/stochastic_gradient_descent.c
@@ -0,0 +1,25 @@
+#include "stochastic_gradient_descent.h"
+
+bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma) {
+    if (gradient->m != theta->m || gradient->n != theta->n) {
+        return false;
+    }
+
+    size_t m = gradient->m;
+    size_t n = gradient->n;
+
+    for (size_t i = 0; i < m; i++) {
+        for (size_t j = 0; j < n; j++) {
+            double grad_ij = matrix_get(gradient, i, j);
+            matrix_sub_scalar(theta, i, j, gamma * grad_ij);
+        }
+    }
+
+    return true;
+}
+
+inline bool stochastic_gradient_descent_scheduled(matrix_t *theta, matrix_t *gradient, float lambda, uint32_t t, double gamma_0) {
+    double gamma = gamma_0 / (1.0 + lambda * gamma_0 * (double)t);
+
+    return stochastic_gradient_descent(theta, gradient, gamma);
+}
diff --git a/src/stochastic_gradient_descent.h b/src/stochastic_gradient_descent.h
new file mode 100644
index 00000000..8f699993
--- /dev/null
+++ b/src/stochastic_gradient_descent.h
@@ -0,0 +1,22 @@
+/*
+Stochastic gradient descent implementation
+
+Based on Leon Bottou's Stochastic Gradient Descent Tricks:
+http://leon.bottou.org/publications/pdf/tricks-2012.pdf
+
+Learning rate calculated as:
+gamma_t = gamma_0(1 + gamma_0 * lambda * t)^-1
+
+*/
+#ifndef STOCHASTIC_GRADIENT_DESCENT_H
+#define STOCHASTIC_GRADIENT_DESCENT_H
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "matrix.h"
+
+bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double alpha);
+bool stochastic_gradient_descent_scheduled(matrix_t *theta, matrix_t *gradient, float lambda, uint32_t t, double gamma_0);
+
+#endif
\ No newline at end of file