[math] Generic dense matrix implementation using BLAS calls for matrix-matrix multiplication if available

2016-08-06 00:40:01 -04:00
parent d4a792f33c
commit 46cd725c13
19 changed files with 469 additions and 425 deletions
--- a/src/file_utils.c
+++ b/src/file_utils.c
@@ -132,6 +132,48 @@ bool file_write_double(FILE *file, double value) {
    return file_write_uint64(file, ud.u);
 }

+typedef union {
+    uint32_t u;
+    float f;
+} uint32_float_t;
+
+bool file_read_float(FILE *file, float *value) {
+    uint32_float_t uf;
+
+    if (!file_read_uint32(file, &uf.u)) {
+        return false;
+    }
+    *value = uf.f;
+    return true;
+}
+
+bool file_read_float_array(FILE *file, float *value, size_t n) {
+    unsigned char *buf = malloc(n * sizeof(uint32_t));
+
+    if (buf == NULL) return false;
+
+    bool ret = false;
+
+    if (fread(buf, sizeof(uint32_t), n, file) == n) {
+        uint32_float_t uf;
+
+        for (size_t i = 0, byte_offset = 0; i < n; i++, byte_offset += sizeof(uint32_t)) {
+            unsigned char *ptr = buf + byte_offset;
+            uf.u = file_deserialize_uint32(ptr);
+            value[i] = uf.f;
+        }
+        ret = true;
+    }
+    free(buf);
+    return ret;
+}
+
+bool file_write_float(FILE *file, float value) {
+    uint32_float_t uf;
+    uf.f = value;
+    return file_write_uint32(file, uf.u);
+
+}

 inline uint32_t file_deserialize_uint32(unsigned char *buf) {
    return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
--- a/src/file_utils.h
+++ b/src/file_utils.h
@@ -61,6 +61,11 @@ bool file_write_uint64(FILE *file, uint64_t value);

 bool file_read_uint64_array(FILE *file, uint64_t *value, size_t n);

+bool file_read_float(FILE *file, float *value);
+bool file_write_float(FILE *file, float value);
+
+bool file_read_float_array(FILE *file, float *value, size_t n);
+
 bool file_read_double(FILE *file, double *value);
 bool file_write_double(FILE *file, double value);

--- a/src/language_classifier.c
+++ b/src/language_classifier.c
@@ -28,7 +28,7 @@ void language_classifier_destroy(language_classifier_t *self) {
    }

    if (self->weights != NULL) {
-        matrix_destroy(self->weights);
+        double_matrix_destroy(self->weights);
    }

    free(self);
@@ -83,11 +83,11 @@ language_classifier_response_t *classify_languages(char *address) {
    sparse_matrix_t *x = feature_vector(classifier->features, feature_counts);

    size_t n = classifier->num_labels;
-    matrix_t *p_y = matrix_new_zeros(1, n);
+    double_matrix_t *p_y = double_matrix_new_zeros(1, n);

    language_classifier_response_t *response = NULL;
    if (logistic_regression_model_expectation(classifier->weights, x, p_y)) {
-        double *predictions = matrix_get_row(p_y, 0);
+        double *predictions = double_matrix_get_row(p_y, 0);
        size_t *indices = double_array_argsort(predictions, n);
        size_t num_languages = 0;
        size_t i;
@@ -126,7 +126,7 @@ language_classifier_response_t *classify_languages(char *address) {
    }

    sparse_matrix_destroy(x);
-    matrix_destroy(p_y);
+    double_matrix_destroy(p_y);
    token_array_destroy(tokens);
    char_array_destroy(feature_array);
    const char *key;
@@ -190,7 +190,7 @@ language_classifier_t *language_classifier_read(FILE *f) {
    }
    classifier->num_labels = cstring_array_num_strings(classifier->labels);

-    matrix_t *weights = matrix_read(f);
+    double_matrix_t *weights = double_matrix_read(f);

    if (weights == NULL) {
        goto exit_classifier_created;
@@ -228,7 +228,7 @@ bool language_classifier_write(language_classifier_t *self, FILE *f) {
        !file_write_uint64(f, self->num_features) ||
        !file_write_uint64(f, self->labels->str->n) ||
        !file_write_chars(f, (const char *)self->labels->str->a, self->labels->str->n) ||
-        !matrix_write(self->weights, f)) {
+        !double_matrix_write(self->weights, f)) {
        return false;
    }

@@ -277,5 +277,6 @@ void language_classifier_module_teardown(void) {
    if (language_classifier != NULL) {
        language_classifier_destroy(language_classifier);
    }
+    language_classifier = NULL;
 }

--- a/src/language_classifier.h
+++ b/src/language_classifier.h
@@ -21,7 +21,7 @@ typedef struct language_classifier {
    size_t num_features;
    trie_t *features;
    cstring_array *labels;
-    matrix_t *weights;
+    double_matrix_t *weights;
 } language_classifier_t;


--- a/src/language_classifier_train.c
+++ b/src/language_classifier_train.c
@@ -13,7 +13,7 @@
 #include "sparse_matrix.h"
 #include "sparse_matrix_utils.h"

-#define LANGUAGE_CLASSIFIER_FEATURE_COUNT_THRESHOLD 1.0
+#define LANGUAGE_CLASSIFIER_FEATURE_COUNT_THRESHOLD 5.0
 #define LANGUAGE_CLASSIFIER_LABEL_COUNT_THRESHOLD 100

 #define LOG_BATCH_INTERVAL 10
@@ -134,13 +134,13 @@ double compute_cv_accuracy(logistic_regression_trainer_t *trainer, char *filenam
    uint32_t correct = 0;
    uint32_t total = 0;

-    matrix_t *p_y = matrix_new_zeros(LANGUAGE_CLASSIFIER_DEFAULT_BATCH_SIZE, trainer->num_labels);
+    double_matrix_t *p_y = double_matrix_new_zeros(LANGUAGE_CLASSIFIER_DEFAULT_BATCH_SIZE, trainer->num_labels);

    while ((minibatch = language_classifier_data_set_get_minibatch(data_set, trainer->label_ids)) != NULL) {
        sparse_matrix_t *x = feature_matrix(trainer->feature_ids, minibatch->features);
        uint32_array *y = label_vector(trainer->label_ids, minibatch->labels);

-        matrix_resize(p_y, x->m, trainer->num_labels);
+        double_matrix_resize(p_y, x->m, trainer->num_labels);

        if (!logistic_regression_model_expectation(trainer->weights, x, p_y)) {
            log_error("Predict cv batch failed\n");
@@ -149,7 +149,7 @@ double compute_cv_accuracy(logistic_regression_trainer_t *trainer, char *filenam

        double *row;
        for (size_t i = 0; i < p_y->m; i++) {
-            row = matrix_get_row(p_y, i);
+            row = double_matrix_get_row(p_y, i);

            int64_t predicted = double_array_argmax(row, p_y->n);
            if (predicted < 0) {
@@ -171,7 +171,7 @@ double compute_cv_accuracy(logistic_regression_trainer_t *trainer, char *filenam
    }

    language_classifier_data_set_destroy(data_set);
-    matrix_destroy(p_y);
+    double_matrix_destroy(p_y);

    double accuracy = (double)correct / total;
    return accuracy;
--- a/src/logistic.c
+++ b/src/logistic.c
@@ -23,7 +23,7 @@ inline void softmax_vector(double *x, size_t n) {
 }


-void softmax_matrix(matrix_t *matrix) {
+void softmax_matrix(double_matrix_t *matrix) {
    size_t num_rows = matrix->m;
    size_t num_cols = matrix->n;

--- a/src/logistic.h
+++ b/src/logistic.h
@@ -9,6 +9,6 @@
 double sigmoid(double x);
 void sigmoid_vector(double *x, size_t n);
 void softmax_vector(double *x, size_t n);
-void softmax_matrix(matrix_t *matrix);
+void softmax_matrix(double_matrix_t *matrix);

 #endif
--- a/src/logistic_regression.c
+++ b/src/logistic_regression.c
@@ -6,7 +6,7 @@

 #define NEAR_ZERO_WEIGHT 1e-6

-bool logistic_regression_model_expectation(matrix_t *theta, sparse_matrix_t *x, matrix_t *p_y) {
+bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y) {
    if (theta == NULL || x == NULL || p_y == NULL) return false;

    if (sparse_matrix_dot_dense(x, theta, p_y) != 0) {
@@ -18,12 +18,12 @@ bool logistic_regression_model_expectation(matrix_t *theta, sparse_matrix_t *x,
    return true;
 }

-double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda) {
+double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda) {
    size_t m = x->m;
    size_t n = x->n;
    if (m != y->n) return -1.0;

-    if (!matrix_resize(p_y, x->m, theta->n)) {
+    if (!double_matrix_resize(p_y, x->m, theta->n)) {
        return -1.0;
    }

@@ -36,7 +36,7 @@ double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, ui

    for (size_t i = 0; i < p_y->m; i++) {
        uint32_t y_i = y->a[i];
-        double value = matrix_get(p_y, i, y_i);
+        double value = double_matrix_get(p_y, i, y_i);
        if (value > NEAR_ZERO_WEIGHT) {
            cost += log(value);
        }
@@ -48,7 +48,7 @@ double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, ui
        double reg_cost = 0.0;
        for (size_t i = 1; i < theta->m; i++) {
            for (size_t j = 0; j < theta->n; j++) {
-                double theta_ij = matrix_get(theta, i, j);
+                double theta_ij = double_matrix_get(theta, i, j);
                reg_cost += theta_ij * theta_ij;
            }

@@ -61,13 +61,13 @@ double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, ui

 }

-static bool logistic_regression_gradient_params(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, 
+static bool logistic_regression_gradient_params(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, 
                                                uint32_array *x_cols, double lambda) {
    size_t m = x->m;
    size_t n = x->n;
    if (m != y->n) return false;

-    if (!matrix_resize(p_y, x->m, theta->n) || !matrix_resize(p_y, x->m, theta->n)) {
+    if (!double_matrix_resize(p_y, x->m, theta->n) || !double_matrix_resize(p_y, x->m, theta->n)) {
        return false;
    }

@@ -100,11 +100,11 @@ static bool logistic_regression_gradient_params(matrix_t *theta, matrix_t *gradi
        uint32_t *cols = x_cols->a;
        for (i = 0; i < batch_rows; i++) {
            col = cols[i];
-            gradient_i = matrix_get_row(gradient, col);
+            gradient_i = double_matrix_get_row(gradient, col);
            double_array_zero(gradient_i, num_classes);
        }
    } else {
-        matrix_zero(gradient);
+        double_matrix_zero(gradient);
    }

    // gradient = -(1. / m) * x.T.dot(y - p_y) + lambda * theta
@@ -133,7 +133,7 @@ static bool logistic_regression_gradient_params(matrix_t *theta, matrix_t *gradi
            }
        }
    } else {
-        matrix_mul(gradient, scale);
+        double_matrix_mul(gradient, scale);
    }


@@ -166,12 +166,12 @@ static bool logistic_regression_gradient_params(matrix_t *theta, matrix_t *gradi
    return true;
 }

-inline bool logistic_regression_gradient_sparse(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, 
+inline bool logistic_regression_gradient_sparse(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, 
                                                uint32_array *x_cols, double lambda) {
    return logistic_regression_gradient_params(theta, gradient, x, y, p_y, x_cols, lambda);
 }


-inline bool logistic_regression_gradient(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda) {
+inline bool logistic_regression_gradient(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda) {
    return logistic_regression_gradient_params(theta, gradient, x, y, p_y, NULL, lambda);
 }
--- a/src/logistic_regression.h
+++ b/src/logistic_regression.h
@@ -26,10 +26,10 @@ may be called softmax regression.
 #include "matrix.h"
 #include "sparse_matrix.h"

-bool logistic_regression_model_expectation(matrix_t *theta, sparse_matrix_t *x, matrix_t *p_y);
-double logistic_regression_cost_function(matrix_t *theta, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda);
-bool logistic_regression_gradient(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, double lambda);
-bool logistic_regression_gradient_sparse(matrix_t *theta, matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, matrix_t *p_y, 
+bool logistic_regression_model_expectation(double_matrix_t *theta, sparse_matrix_t *x, double_matrix_t *p_y);
+double logistic_regression_cost_function(double_matrix_t *theta, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda);
+bool logistic_regression_gradient(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, double lambda);
+bool logistic_regression_gradient_sparse(double_matrix_t *theta, double_matrix_t *gradient, sparse_matrix_t *x, uint32_array *y, double_matrix_t *p_y, 
                                         uint32_array *x_cols, double lambda);

 #endif
--- a/src/logistic_regression_trainer.c
+++ b/src/logistic_regression_trainer.c
@@ -13,7 +13,7 @@ void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) {
    }

    if (self->weights != NULL) {
-        matrix_destroy(self->weights);
+        double_matrix_destroy(self->weights);
    }

    if (self->last_updated != NULL) {
@@ -29,7 +29,7 @@ void logistic_regression_trainer_destroy(logistic_regression_trainer_t *self) {
    }

    if (self->gradient != NULL) {
-        matrix_destroy(self->gradient);
+        double_matrix_destroy(self->gradient);
    }

    free(self);
@@ -48,9 +48,9 @@ logistic_regression_trainer_t *logistic_regression_trainer_init(trie_t *feature_
    trainer->label_ids = label_ids;
    trainer->num_labels = kh_size(label_ids);

-    trainer->weights = matrix_new_zeros(trainer->num_features, trainer->num_labels);
+    trainer->weights = double_matrix_new_zeros(trainer->num_features, trainer->num_labels);

-    trainer->gradient = matrix_new_zeros(trainer->num_features, trainer->num_labels);
+    trainer->gradient = double_matrix_new_zeros(trainer->num_features, trainer->num_labels);

    trainer->unique_columns = kh_init(int_set);
    trainer->batch_columns = uint32_array_new_size(trainer->num_features);
@@ -70,14 +70,14 @@ exit_trainer_created:
 }


-static matrix_t *model_expectation(sparse_matrix_t *x, matrix_t *theta) {
-    matrix_t *p_y = matrix_new_zeros(x->m, theta->n);
+static double_matrix_t *model_expectation(sparse_matrix_t *x, double_matrix_t *theta) {
+    double_matrix_t *p_y = double_matrix_new_zeros(x->m, theta->n);
    if (p_y == NULL) return NULL;

    if(logistic_regression_model_expectation(theta, x, p_y)) {
        return p_y;
    } else {
-        matrix_destroy(p_y);
+        double_matrix_destroy(p_y);
        return NULL;
    }
 }
@@ -88,11 +88,11 @@ double logistic_regression_trainer_batch_cost(logistic_regression_trainer_t *sel

    sparse_matrix_t *x = feature_matrix(self->feature_ids, features);
    uint32_array *y = label_vector(self->label_ids, labels);
-    matrix_t *p_y = matrix_new_zeros(x->m, n);
+    double_matrix_t *p_y = double_matrix_new_zeros(x->m, n);

    double cost = logistic_regression_cost_function(self->weights, x, y, p_y, self->lambda);

-    matrix_destroy(p_y);
+    double_matrix_destroy(p_y);
    uint32_array_destroy(y);
    sparse_matrix_destroy(x);
    return cost;    
@@ -103,12 +103,12 @@ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self
    size_t n = self->weights->n;

    // Optimize
-    matrix_t *gradient = self->gradient;
+    double_matrix_t *gradient = self->gradient;

    sparse_matrix_t *x = feature_matrix(self->feature_ids, features);
    uint32_array *y = label_vector(self->label_ids, labels);

-    matrix_t *p_y = matrix_new_zeros(x->m, n);
+    double_matrix_t *p_y = double_matrix_new_zeros(x->m, n);

    bool ret = false;

@@ -135,7 +135,7 @@ bool logistic_regression_trainer_train_batch(logistic_regression_trainer_t *self
    self->iters++;

 exit_matrices_created:
-    matrix_destroy(p_y);
+    double_matrix_destroy(p_y);
    uint32_array_destroy(y);
    sparse_matrix_destroy(x);
    return ret;
--- a/src/logistic_regression_trainer.h
+++ b/src/logistic_regression_trainer.h
@@ -26,8 +26,8 @@ typedef struct logistic_regression_trainer {
    size_t num_features;                // Number of features
    khash_t(str_uint32) *label_ids;     // Hashtable mapping labels to array indices
    size_t num_labels;                  // Number of labels
-    matrix_t *weights;                  // Matrix of logistic regression weights
-    matrix_t *gradient;                 // Gradient matrix to be reused
+    double_matrix_t *weights;                  // Matrix of logistic regression weights
+    double_matrix_t *gradient;                 // Gradient matrix to be reused
    khash_t(int_set) *unique_columns;   // Unique columns set
    uint32_array *batch_columns;        // Unique columns as array
    uint32_array *last_updated;         // Array of length N indicating the last time each feature was updated
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -1,320 +0,0 @@
-#include "matrix.h"
-#include "file_utils.h"
-
-matrix_t *matrix_new(size_t m, size_t n) {
-    matrix_t *matrix = malloc(sizeof(matrix_t));
-
-    if (matrix == NULL) {
-        return NULL;
-    }
-
-    matrix->m = m;
-    matrix->n = n;
-
-    matrix->values = malloc(sizeof(double) * m * n);
-    if (matrix->values == NULL) {
-        free(matrix);
-        return NULL;
-    }
-
-    return matrix;
-
-}
-
-bool matrix_resize(matrix_t *self, size_t m, size_t n) {
-    if (self == NULL) return false;
-
-    // If the new size is smaller, don't reallocate, just ignore the extra values
-    if (m * n > (self->m * self->n)) {
-        self->values = realloc(self->values, sizeof(double) * m * n);
-        if (self->values == NULL) {
-            return false;
-        }        
-    }
-
-    self->m = m;
-    self->n = n;
-
-    // Set all values to 0
-    matrix_zero(self);
-
-    return true;
-}
-
-matrix_t *matrix_new_copy(matrix_t *self) {
-    matrix_t *cpy = matrix_new(self->m, self->n);
-    size_t num_values = self->m * self->n;
-    memcpy(cpy->values, self->values, num_values);
-
-    return cpy;
-}
-
-bool matrix_copy(matrix_t *self, matrix_t *other) {
-    if (self->m != other->m || self->n != other->n) {
-        return false;
-    }
-    size_t num_values = self->n * self->n;
-
-    memcpy(other->values, self->values, num_values * sizeof(double));
-    return true;
-}
-
-inline void matrix_init_values(matrix_t *self, double *values) {
-    size_t num_values = self->m * self->n;
-    memcpy(self->values, values, num_values * sizeof(double));
-}
-
-inline void matrix_zero(matrix_t *self) {
-    memset(self->values, 0, self->m * self->n * sizeof(double));
-}
-
-inline void matrix_set(matrix_t *self, double value) {
-    double_array_set(self->values, self->m * self->n, value);
-}
-
-inline void matrix_set_row(matrix_t *self, size_t index, double *row) {
-    size_t offset = index * self->n;
-    double *values = self->values;
-    size_t n = self->n;
-    memcpy(values + offset, row, n * sizeof(double));
-}
-
-inline void matrix_set_scalar(matrix_t *self, size_t row_index, size_t col_index, double value) {
-    size_t offset = row_index * self->n + col_index;
-    self->values[offset] = value;
-}
-
-inline void matrix_add_scalar(matrix_t *self, size_t row_index, size_t col_index, double value) {
-    size_t offset = row_index * self->n + col_index;
-    self->values[offset] += value;
-}
-
-inline void matrix_sub_scalar(matrix_t *self, size_t row_index, size_t col_index, double value) {
-    size_t offset = row_index * self->n + col_index;
-    self->values[offset] -= value;
-}
-
-inline void matrix_mul_scalar(matrix_t *self, size_t row_index, size_t col_index, double value) {
-    size_t offset = row_index * self->n + col_index;
-    self->values[offset] *= value;
-}
-
-inline void matrix_div_scalar(matrix_t *self, size_t row_index, size_t col_index, double value) {
-    size_t offset = row_index * self->n + col_index;
-    self->values[offset] /= value;
-}
-
-inline double matrix_get(matrix_t *self, size_t row_index, size_t col_index) {
-    size_t index = row_index * self->n + col_index;
-    return self->values[index];
-}
-
-inline double *matrix_get_row(matrix_t *self, size_t row_index) {
-    size_t index = row_index * self->n;
-    return self->values + index;
-}
-
-
-inline matrix_t *matrix_new_value(size_t m, size_t n, double value) {
-    matrix_t *matrix = matrix_new(m, n);
-    matrix_set(matrix, value);
-    return matrix;
-}
-
-inline matrix_t *matrix_new_zeros(size_t m, size_t n) {
-    return matrix_new_value(m, n, 0.0);
-}
-
-inline matrix_t *matrix_new_ones(size_t m, size_t n) {
-    return matrix_new_value(m, n, 1.0);
-}
-
-matrix_t *matrix_new_values(size_t m, size_t n, double *values) {
-    matrix_t *matrix = matrix_new(m, n);
-    memcpy(matrix->values, values, m * n * sizeof(double));
-    return matrix;
-}
-
-
-inline void matrix_div(matrix_t *self, double value) {
-    double_array_div(self->values, value, self->m * self->n);
-}
-
-inline bool matrix_div_matrix(matrix_t *self, matrix_t *other) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_div_array(self->values, other->values, self->m * self->n);
-    return true;
-}
-
-inline bool matrix_div_matrix_times_scalar(matrix_t *self, matrix_t *other, double v) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_div_array_times_scalar(self->values, other->values, v, self->m * self->n);
-    return true;
-}
-
-
-inline void matrix_mul(matrix_t *self, double value) {
-    double_array_mul(self->values, value, self->m * self->n);
-}
-
-inline bool matrix_mul_matrix(matrix_t *self, matrix_t *other) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_mul_array(self->values, other->values, self->m * self->n);
-    return true;
-}
-
-inline bool matrix_mul_matrix_times_scalar(matrix_t *self, matrix_t *other, double v) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_mul_array_times_scalar(self->values, other->values, v, self->m * self->n);
-    return true;
-}
-
-inline void matrix_add(matrix_t *self, double value) {
-    double_array_add(self->values, self->m * self->n, value);
-}
-
-
-inline bool matrix_add_matrix(matrix_t *self, matrix_t *other) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_add_array(self->values, other->values, self->m * self->n);
-    return true;
-}
-
-inline bool matrix_add_matrix_times_scalar(matrix_t *self, matrix_t *other, double v) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_add_array_times_scalar(self->values, other->values, v, self->m * self->n);
-    return true;
-}
-
-inline void matrix_sub(matrix_t *self, double value) {
-    double_array_sub(self->values, value, self->m * self->n);
-}
-
-inline bool matrix_sub_matrix(matrix_t *self, matrix_t *other) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_sub_array(self->values, other->values, self->m * self->n);
-    return true;
-}
-
-inline bool matrix_sub_matrix_times_scalar(matrix_t *self, matrix_t *other, double v) {
-    if (self->m != other->m || self->n != other->n) return false;
-    double_array_sub_array_times_scalar(self->values, other->values, v, self->m * self->n);
-    return true;
-}
-
-
-inline void matrix_log(matrix_t *self) {
-    double_array_log(self->values, self->m * self->n);
-}
-
-inline void matrix_exp(matrix_t *self) {
-    double_array_exp(self->values, self->m * self->n);
-}
-
-void matrix_dot_vector(matrix_t *self, double *vec, double *result) {
-    double *values = self->values;
-    size_t n = self->n;
-    for (int i = 0; i < self->m; i++) {
-        for (int j = 0; j < n; j++) {
-            result[i] += values[n * i + j] * vec[j];
-        }
-    }
-}
-
-inline bool matrix_dot_matrix(matrix_t *m1, matrix_t *m2, matrix_t *result) {
-    if (m1->n != m2->m || m1->m != result->m || m2->n != result->n) {
-        return false;
-    }
-
-    size_t m1_rows = m1->m;
-    size_t m1_cols = m1->n;
-    size_t m2_rows = m2->m;
-    size_t m2_cols = m2->n;
-
-    double *m1_values = m1->values;
-    double *m2_values = m2->values;
-    double *result_values = result->values;
-
-    for (int i = 0; i < m1_rows; i++) {
-        for (int j = 0; j < m2_cols; j++) {
-            size_t result_index = m2_cols * i + j;
-            result_values[result_index] = 0.0;
-            for (int k = 0; k < m2_rows; k++) {
-                result_values[result_index] += m1_values[m1_cols * i + k] * m2_values[m2_cols * k + j];
-            }
-        }
-    }
-
-    return true;
-}
-
-matrix_t *matrix_read(FILE *f) {
-    matrix_t *mat = malloc(sizeof(matrix_t));
-    if (mat == NULL) return NULL;
-
-    mat->values = NULL;
-
-    uint64_t m = 0;
-    uint64_t n = 0;
-
-    if (!file_read_uint64(f, &m) ||
-        !file_read_uint64(f, &n)) {
-        goto exit_matrix_allocated;
-    }
-
-    mat->m = (size_t)m;
-    mat->n = (size_t)n;
-
-    size_t len_data = mat->m * mat->n;
-
-    double *data = malloc(len_data * sizeof(double));
-    if (data == NULL) {
-        printf("data alloc\n");
-        goto exit_matrix_allocated;
-    }
-
-    if (!file_read_double_array(f, data, len_data)) {
-        free(data);
-        goto exit_matrix_allocated;
-    }
-
-    mat->values = data;
-
-    return mat;
-
-exit_matrix_allocated:
-    matrix_destroy(mat);
-    return NULL;
-}
-
-bool matrix_write(matrix_t *self, FILE *f) {
-    if (self == NULL || self->values == NULL) {
-        return false;
-    }
-
-    if (!file_write_uint64(f, (uint64_t)self->m) ||
-        !file_write_uint64(f, (uint64_t)self->n)) {
-        return false;
-    }
-
-    uint64_t len_data = (uint64_t)self->m * (uint64_t)self->n;
-
-    for (uint64_t i = 0; i < len_data; i++) {
-        if (!file_write_double(f, self->values[i])) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-
-void matrix_destroy(matrix_t *self) {
-    if (self == NULL) return;
-
-    if (self->values != NULL) {
-        free(self->values);
-    }
-
-    free(self);
-}
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -6,60 +6,374 @@
 #include <stdint.h>
 #include <stdbool.h>

+#include <config.h>
+
 #include "collections.h"
+#include "file_utils.h"
+#include "vector.h"
+#include "vector_math.h"

-typedef struct matrix {
-    size_t m;
-    size_t n;
-    double *values;
-} matrix_t;
+#ifdef HAVE_BLAS
+#include <cblas.h>
+#else
+#warning "No CLBAS"
+#endif

-matrix_t *matrix_new(size_t m, size_t n);
-matrix_t *matrix_new_value(size_t m, size_t n, double value);
-matrix_t *matrix_new_values(size_t m, size_t n, double *values);
-matrix_t *matrix_new_zeros(size_t m, size_t n);
-matrix_t *matrix_new_ones(size_t m, size_t n);
+#define MATRIX_INIT(name, type, type_name, array_type)                                                      \
+    typedef struct {                                                                                        \
+        size_t m, n;                                                                                        \
+        type *values;                                                                                       \
+    } name##_t;                                                                                             \
+                                                                                                            \
+    static name##_t *name##_new(size_t m, size_t n) {                                                       \
+        name##_t *matrix = malloc(sizeof(name##_t));                                                        \
+                                                                                                            \
+        if (matrix == NULL) {                                                                               \
+            return NULL;                                                                                    \
+        }                                                                                                   \
+                                                                                                            \
+        matrix->m = m;                                                                                      \
+        matrix->n = n;                                                                                      \
+                                                                                                            \
+        matrix->values = malloc(sizeof(type) * m * n);                                                      \
+        if (matrix->values == NULL) {                                                                       \
+            free(matrix);                                                                                   \
+            return NULL;                                                                                    \
+        }                                                                                                   \
+                                                                                                            \
+        return matrix;                                                                                      \
+                                                                                                            \
+    }                                                                                                       \
+                                                                                                            \
+    static void name##_destroy(name##_t *self) {                                                            \
+        if (self == NULL) return;                                                                           \
+                                                                                                            \
+        if (self->values != NULL) {                                                                         \
+            free(self->values);                                                                             \
+        }                                                                                                   \
+                                                                                                            \
+        free(self);                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_zero(name##_t *self) {                                                        \
+        memset(self->values, 0, self->m * self->n * sizeof(type));                                          \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    static inline bool name##_resize(name##_t *self, size_t m, size_t n) {                                  \
+        if (self == NULL) return false;                                                                     \
+                                                                                                            \
+        if (m * n > (self->m * self->n)) {                                                                  \
+            self->values = realloc(self->values, sizeof(type) * m * n);                                     \
+            if (self->values == NULL) {                                                                     \
+                return false;                                                                               \
+            }                                                                                               \
+        }                                                                                                   \
+                                                                                                            \
+        self->m = m;                                                                                        \
+        self->n = n;                                                                                        \
+                                                                                                            \
+        name##_zero(self);                                                                                  \
+                                                                                                            \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline name##_t *name##_new_copy(name##_t *self) {                                               \
+        name##_t *cpy = name##_new(self->m, self->n);                                                       \
+        size_t num_values = self->m * self->n;                                                              \
+        memcpy(cpy->values, self->values, num_values);                                                      \
+                                                                                                            \
+        return cpy;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_copy(name##_t *self, name##_t *other) {                                       \
+        if (self->m != other->m || self->n != other->n) {                                                   \
+            return false;                                                                                   \
+        }                                                                                                   \
+        size_t num_values = self->n * self->n;                                                              \
+                                                                                                            \
+        memcpy(other->values, self->values, num_values * sizeof(type));                                     \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_init_values(name##_t *self, type *values) {                                   \
+        size_t num_values = self->m * self->n;                                                              \
+        memcpy(self->values, values, num_values * sizeof(type));                                            \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_set(name##_t *self, type value) {                                             \
+        array_type##_set(self->values, value, self->m * self->n);                                           \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_set_row(name##_t *self, size_t index, type *row) {                            \
+        size_t offset = index * self->n;                                                                    \
+        type *values = self->values;                                                                        \
+        size_t n = self->n;                                                                                 \
+        memcpy(values + offset, row, n * sizeof(type));                                                     \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_set_scalar(name##_t *self, size_t row_index, size_t col_index, type value) {  \
+        size_t offset = row_index * self->n + col_index;                                                    \
+        self->values[offset] = value;                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_add_scalar(name##_t *self, size_t row_index, size_t col_index, type value) {  \
+        size_t offset = row_index * self->n + col_index;                                                    \
+        self->values[offset] += value;                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_sub_scalar(name##_t *self, size_t row_index, size_t col_index, type value) {  \
+        size_t offset = row_index * self->n + col_index;                                                    \
+        self->values[offset] -= value;                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_mul_scalar(name##_t *self, size_t row_index, size_t col_index, type value) {  \
+        size_t offset = row_index * self->n + col_index;                                                    \
+        self->values[offset] *= value;                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_div_scalar(name##_t *self, size_t row_index, size_t col_index, type value) {  \
+        size_t offset = row_index * self->n + col_index;                                                    \
+        self->values[offset] /= value;                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline type name##_get(name##_t *self, size_t row_index, size_t col_index) {                     \
+        size_t index = row_index * self->n + col_index;                                                     \
+        return self->values[index];                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    static inline type *name##_get_row(name##_t *self, size_t row_index) {                                  \
+        size_t index = row_index * self->n;                                                                 \
+        return self->values + index;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline name##_t *name##_new_value(size_t m, size_t n, type value) {                              \
+        name##_t *matrix = name##_new(m, n);                                                                \
+        name##_set(matrix, value);                                                                          \
+        return matrix;                                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline name##_t *name##_new_zeros(size_t m, size_t n) {                                          \
+        name##_t *matrix = name##_new(m, n);                                                                \
+        name##_zero(matrix);                                                                                \
+        return matrix;                                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline name##_t *name##_new_ones(size_t m, size_t n) {                                           \
+        return name##_new_value(m, n, (type)1);                                                             \
+    }                                                                                                       \
+                                                                                                            \
+    static inline name##_t *name##_new_values(size_t m, size_t n, type *values) {                           \
+        name##_t *matrix = name##_new(m, n);                                                                \
+        memcpy(matrix->values, values, m * n * sizeof(type));                                               \
+        return matrix;                                                                                      \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_div(name##_t *self, type value) {                                             \
+        array_type##_div(self->values, value, self->m * self->n);                                           \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_div_matrix(name##_t *self, name##_t *other) {                                 \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_div_array(self->values, other->values, self->m * self->n);                             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_div_matrix_times_scalar(name##_t *self, name##_t *other, type v) {            \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_div_array_times_scalar(self->values, other->values, v, self->m * self->n);             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_mul(name##_t *self, type value) {                                             \
+        array_type##_mul(self->values, value, self->m * self->n);                                           \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_mul_matrix(name##_t *self, name##_t *other) {                                 \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_mul_array(self->values, other->values, self->m * self->n);                             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_mul_matrix_times_scalar(name##_t *self, name##_t *other, type v) {            \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_mul_array_times_scalar(self->values, other->values, v, self->m * self->n);             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_add(name##_t *self, type value) {                                             \
+        array_type##_add(self->values, self->m * self->n, value);                                           \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    static inline bool name##_add_matrix(name##_t *self, name##_t *other) {                                 \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_add_array(self->values, other->values, self->m * self->n);                             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_add_matrix_times_scalar(name##_t *self, name##_t *other, type v) {            \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_add_array_times_scalar(self->values, other->values, v, self->m * self->n);             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline void name##_sub(name##_t *self, type value) {                                             \
+        array_type##_sub(self->values, value, self->m * self->n);                                           \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_sub_matrix(name##_t *self, name##_t *other) {                                 \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_sub_array(self->values, other->values, self->m * self->n);                             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static inline bool name##_sub_matrix_times_scalar(name##_t *self, name##_t *other, type v) {            \
+        if (self->m != other->m || self->n != other->n) return false;                                       \
+        array_type##_sub_array_times_scalar(self->values, other->values, v, self->m * self->n);             \
+        return true;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static name##_t *name##_read(FILE *f) {                                                                 \
+        name##_t *mat = malloc(sizeof(name##_t));                                                           \
+        if (mat == NULL) return NULL;                                                                       \
+                                                                                                            \
+        mat->values = NULL;                                                                                 \
+                                                                                                            \
+        uint64_t m = 0;                                                                                     \
+        uint64_t n = 0;                                                                                     \
+                                                                                                            \
+        if (!file_read_uint64(f, &m) ||                                                                     \
+            !file_read_uint64(f, &n)) {                                                                     \
+            goto exit_##name##_allocated;                                                                   \
+        }                                                                                                   \
+                                                                                                            \
+        mat->m = (size_t)m;                                                                                 \
+        mat->n = (size_t)n;                                                                                 \
+                                                                                                            \
+        size_t len_data = mat->m * mat->n;                                                                  \
+                                                                                                            \
+        type *data = malloc(len_data * sizeof(type));                                                       \
+        if (data == NULL) {                                                                                 \
+            log_error("error in data malloc\n");                                                            \
+            goto exit_##name##_allocated;                                                                   \
+        }                                                                                                   \
+                                                                                                            \
+        if (!file_read_##array_type(f, data, len_data)) {                                                   \
+            free(data);                                                                                     \
+            goto exit_##name##_allocated;                                                                   \
+        }                                                                                                   \
+                                                                                                            \
+        mat->values = data;                                                                                 \
+                                                                                                            \
+        return mat;                                                                                         \
+                                                                                                            \
+    exit_##name##_allocated:                                                                                \
+        name##_destroy(mat);                                                                                \
+        return NULL;                                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    static bool name##_write(name##_t *self, FILE *f) {                                                     \
+        if (self == NULL || self->values == NULL) {                                                         \
+            return false;                                                                                   \
+        }                                                                                                   \
+                                                                                                            \
+        if (!file_write_uint64(f, (uint64_t)self->m) ||                                                     \
+            !file_write_uint64(f, (uint64_t)self->n)) {                                                     \
+            return false;                                                                                   \
+        }                                                                                                   \
+                                                                                                            \
+        uint64_t len_data = (uint64_t)self->m * (uint64_t)self->n;                                          \
+                                                                                                            \
+        for (uint64_t i = 0; i < len_data; i++) {                                                           \
+            if (!file_write_##type_name(f, self->values[i])) {                                              \
+                return false;                                                                               \
+            }                                                                                               \
+        }                                                                                                   \
+                                                                                                            \
+        return true;                                                                                        \
+    }

-bool matrix_resize(matrix_t *self, size_t m, size_t n);

-matrix_t *matrix_new_copy(matrix_t *self);
-bool matrix_copy(matrix_t *self, matrix_t *other);
+#define MATRIX_INIT_FLOAT_BASE(name, type, type_name, array_type)                       \
+    MATRIX_INIT(name, type, type_name, array_type)                                      \
+                                                                                        \
+    static inline void name##_log(name##_t *self) {                                     \
+        array_type##_log(self->values, self->m * self->n);                              \
+    }                                                                                   \
+                                                                                        \
+    static inline void name##_exp(name##_t *self) {                                     \
+        array_type##_exp(self->values, self->m * self->n);                              \
+    }                                                                                   \
+                                                                                        \
+    static inline void name##_dot_vector(name##_t *self, type *vec, type *result) {     \
+        type *values = self->values;                                                    \
+        size_t m = self->m;                                                             \
+        size_t n = self->n;                                                             \
+        for (size_t i = 0; i < m; i++) {                                                \
+            for (size_t j = 0; j < n; j++) {                                            \
+                result[i] += values[n * i + j] * vec[j];                                \
+            }                                                                           \
+        }                                                                               \
+    }

-void matrix_init_values(matrix_t *self, double *values);
-void matrix_set(matrix_t *self, double value);
-void matrix_zero(matrix_t *self);
-void matrix_set_row(matrix_t *self, size_t index, double *row);
-void matrix_set_scalar(matrix_t *self, size_t row_index, size_t col_index, double value);
-void matrix_add_scalar(matrix_t *self, size_t row_index, size_t col_index, double value);
-void matrix_sub_scalar(matrix_t *self, size_t row_index, size_t col_index, double value);
-void matrix_mul_scalar(matrix_t *self, size_t row_index, size_t col_index, double value);
-void matrix_div_scalar(matrix_t *self, size_t row_index, size_t col_index, double value);

-double matrix_get(matrix_t *self, size_t row_index, size_t col_index);
-double *matrix_get_row(matrix_t *self, size_t row_index);
+#ifdef HAVE_BLAS
+#define MATRIX_INIT_FLOAT(name, type, type_name, array_type, blas_prefix)                                   \
+    MATRIX_INIT_FLOAT_BASE(name, type, type_name, array_type)                                               \
+                                                                                                            \
+    static inline bool name##_dot_matrix(name##_t *m1, name##_t *m2, name##_t *result) {                    \
+        if (m1->n != m2->m || m1->m != result->m || m2->n != result->n) {                                   \
+            return false;                                                                                   \
+        }                                                                                                   \
+                                                                                                            \
+        log_debug("doing CBLAS\n");                                                                         \
+        cblas_##blas_prefix##gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,                                \
+                    m1->m, m2->n, m1->n, 1.0,                                                               \
+                    m1->values, m1->n,                                                                      \
+                    m2->values, m2->n, 0.0,                                                                 \
+                    result->values, result->n                                                               \
+                    );                                                                                      \
+                                                                                                            \
+        return true;                                                                                        \
+    }

-void matrix_add(matrix_t *self, double value);
-bool matrix_add_matrix(matrix_t *self, matrix_t *other);
-bool matrix_add_matrix_times_scalar(matrix_t *self, matrix_t *other, double v);
-void matrix_sub(matrix_t *self, double value);
-bool matrix_sub_matrix(matrix_t *self, matrix_t *other);
-bool matrix_sub_matrix_times_scalar(matrix_t *self, matrix_t *other, double v);
-void matrix_mul(matrix_t *self, double value);
-bool matrix_mul_matrix(matrix_t *self, matrix_t *other);
-bool matrix_mul_matrix_times_scalar(matrix_t *self, matrix_t *other, double v);
-void matrix_div(matrix_t *self, double value);
-bool matrix_div_matrix(matrix_t *self, matrix_t *other);
-bool matrix_div_matrix_times_scalar(matrix_t *self, matrix_t *other, double v);
+#else
+#define MATRIX_INIT_FLOAT(name, type, type_name, array_type, blas_prefix)                                       \
+    MATRIX_INIT_FLOAT_BASE(name, type, type_name, array_type)                                                   \
+                                                                                                                \
+    static inline bool name##_dot_matrix(name##_t *m1, name##_t *m2, name##_t *result) {                        \
+        if (m1->n != m2->m || m1->m != result->m || m2->n != result->n) {                                       \
+            return false;                                                                                       \
+        }                                                                                                       \
+                                                                                                                \
+        size_t m1_rows = m1->m;                                                                                 \
+        size_t m1_cols = m1->n;                                                                                 \
+        size_t m2_rows = m2->m;                                                                                 \
+        size_t m2_cols = m2->n;                                                                                 \
+                                                                                                                \
+        type *m1_values = m1->values;                                                                           \
+        type *m2_values = m2->values;                                                                           \
+        type *result_values = result->values;                                                                   \
+                                                                                                                \
+        for (size_t i = 0; i < m1_rows; i++) {                                                                  \
+            for (size_t j = 0; j < m2_cols; j++) {                                                              \
+                size_t result_index = m2_cols * i + j;                                                          \
+                result_values[result_index] = 0.0;                                                              \
+                for (size_t k = 0; k < m2_rows; k++) {                                                          \
+                    result_values[result_index] += m1_values[m1_cols * i + k] * m2_values[m2_cols * k + j];     \
+                }                                                                                               \
+            }                                                                                                   \
+        }                                                                                                       \
+                                                                                                                \
+        return true;                                                                                            \
+    }
+#endif

-void matrix_log(matrix_t *self);
-void matrix_exp(matrix_t *self);
+MATRIX_INIT(uint32_matrix, uint32_t, uint32, uint32_array)

-void matrix_dot_vector(matrix_t *self, double *vec, double *result);
-bool matrix_dot_matrix(matrix_t *m1, matrix_t *m2, matrix_t *result);
+MATRIX_INIT_FLOAT(float_matrix, float, float, float_array,s)
+MATRIX_INIT_FLOAT(double_matrix, double, double, double_array,d)

-matrix_t *matrix_read(FILE *f);
-bool matrix_write(matrix_t *self, FILE *f);
-
-void matrix_destroy(matrix_t *self);

 #endif
--- a/src/sparse_matrix.c
+++ b/src/sparse_matrix.c
@@ -231,7 +231,7 @@ int sparse_matrix_sum_rows(sparse_matrix_t *self, uint32_t *rows, size_t m, doub



-int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result) {
+int sparse_matrix_dot_dense(sparse_matrix_t *self, double_matrix_t *matrix, double_matrix_t *result) {
    if (self->n != matrix->m || self->m != result->m || matrix->n != result->n) {
        return -1;
    }
@@ -267,7 +267,7 @@ int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *r



-int sparse_matrix_dot_sparse(sparse_matrix_t *self, sparse_matrix_t *other, matrix_t *result) {
+int sparse_matrix_dot_sparse(sparse_matrix_t *self, sparse_matrix_t *other, double_matrix_t *result) {
    if (self->n != other->m || self->m != result->m || other->n != result->n) {
        return -1;
    }
--- a/src/sparse_matrix.h
+++ b/src/sparse_matrix.h
@@ -78,8 +78,8 @@ int sparse_matrix_rows_sum_cols(sparse_matrix_t *self, uint32_t *rows, size_t m,
 int sparse_matrix_sum_all_rows(sparse_matrix_t *self, double *result, size_t n);
 int sparse_matrix_sum_rows(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n);

-int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result);
-int sparse_matrix_dot_sparse(sparse_matrix_t *self, sparse_matrix_t *other, matrix_t *result);
+int sparse_matrix_dot_dense(sparse_matrix_t *self, double_matrix_t *matrix, double_matrix_t *result);
+int sparse_matrix_dot_sparse(sparse_matrix_t *self, sparse_matrix_t *other, double_matrix_t *result);

 bool sparse_matrix_write(sparse_matrix_t *self, FILE *f);
 sparse_matrix_t *sparse_matrix_read(FILE *f);
--- a/src/sparse_matrix_utils.c
+++ b/src/sparse_matrix_utils.c
@@ -38,7 +38,9 @@ bool sparse_matrix_add_unique_columns(sparse_matrix_t *matrix, khash_t(int_set)
    }

    uint32_array_clear(array);
-    uint32_array_resize(array, kh_size(unique_columns));
+    if (!uint32_array_resize(array, kh_size(unique_columns))) {
+        return false;
+    }

    khint_t k;

--- a/src/sparse_matrix_utils.h
+++ b/src/sparse_matrix_utils.h
@@ -6,7 +6,7 @@
 #include "sparse_matrix.h"
 #include "matrix.h"

-sparse_matrix_t *sparse_matrix_new_from_matrix(matrix_t *matrix);
+sparse_matrix_t *sparse_matrix_new_from_matrix(double_matrix_t *matrix);
 uint32_array *sparse_matrix_unique_columns(sparse_matrix_t *matrix);
 bool sparse_matrix_add_unique_columns(sparse_matrix_t *matrix, khash_t(int_set) *unique_columns, uint32_array *array);

--- a/src/stochastic_gradient_descent.c
+++ b/src/stochastic_gradient_descent.c
@@ -1,6 +1,6 @@
 #include "stochastic_gradient_descent.h"

-bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma) {
+bool stochastic_gradient_descent(double_matrix_t *theta, double_matrix_t *gradient, double gamma) {
    if (gradient->m != theta->m || gradient->n != theta->n) {
        return false;
    }
@@ -8,10 +8,10 @@ bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gam
    size_t m = gradient->m;
    size_t n = gradient->n;

-    return matrix_sub_matrix_times_scalar(theta, gradient, gamma);
+    return double_matrix_sub_matrix_times_scalar(theta, gradient, gamma);
 }

-bool stochastic_gradient_descent_sparse(matrix_t *theta, matrix_t *gradient, uint32_array *update_indices, double gamma) {
+bool stochastic_gradient_descent_sparse(double_matrix_t *theta, double_matrix_t *gradient, uint32_array *update_indices, double gamma) {
    if (gradient->m != theta->m || gradient->n != theta->n) {
        return false;
    }
@@ -90,7 +90,7 @@ static inline void regularize_row(double *theta_i, size_t n, double lambda, uint
    double_array_mul(theta_i, update, n);
 }

-bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) {
+bool stochastic_gradient_descent_regularize_weights(double_matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) {
    if (lambda > 0.0) {        
        uint32_t *updates = last_updated->a;

@@ -101,7 +101,7 @@ bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_arra

        for (size_t i = 0; i < batch_rows; i++) {
            uint32_t row = rows[i];
-            double *theta_i = matrix_get_row(theta, row);
+            double *theta_i = double_matrix_get_row(theta, row);
            uint32_t last_updated = updates[row];

            double gamma_t  = stochastic_gradient_descent_gamma_t(gamma_0, lambda, t - last_updated);
@@ -114,14 +114,14 @@ bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_arra
    return true;
 }

-inline bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) {
+inline bool stochastic_gradient_descent_finalize_weights(double_matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0) {
    if (lambda > 0.0) {
        uint32_t *updates = last_updated->a;
        size_t m = theta->m;
        size_t n = theta->n;

        for (size_t i = 0; i < m; i++) {
-            double *theta_i = matrix_get_row(theta, i);
+            double *theta_i = double_matrix_get_row(theta, i);
            uint32_t last_updated = updates[i];

            double gamma_t  = stochastic_gradient_descent_gamma_t(gamma_0, lambda, t - last_updated);
--- a/src/stochastic_gradient_descent.h
+++ b/src/stochastic_gradient_descent.h
@@ -16,10 +16,10 @@ gamma_t = gamma_0(1 + gamma_0 * lambda * t)^-1

 #include "matrix.h"

-bool stochastic_gradient_descent(matrix_t *theta, matrix_t *gradient, double gamma);
-bool stochastic_gradient_descent_sparse(matrix_t *theta, matrix_t *gradient, uint32_array *update_indices, double gamma);
-bool stochastic_gradient_descent_regularize_weights(matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0);
-bool stochastic_gradient_descent_finalize_weights(matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0);
+bool stochastic_gradient_descent(double_matrix_t *theta, double_matrix_t *gradient, double gamma);
+bool stochastic_gradient_descent_sparse(double_matrix_t *theta, double_matrix_t *gradient, uint32_array *update_indices, double gamma);
+bool stochastic_gradient_descent_regularize_weights(double_matrix_t *theta, uint32_array *update_indices, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0);
+bool stochastic_gradient_descent_finalize_weights(double_matrix_t *theta, uint32_array *last_updated, uint32_t t, double lambda, double gamma_0);
 double stochastic_gradient_descent_gamma_t(double gamma_0, double lambda, uint32_t t);