From a090a22bca429b7fd75e9a418da40532fd6b54cc Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 31 Aug 2015 15:52:28 -0400 Subject: [PATCH] [math] Adding compressed sparse row (CSR) format sparse matrix, designed for dynamic construction, just the methods needed for logistic regression for now i.e. no sparse dot products --- src/sparse_matrix.c | 177 ++++++++++++++++++++++++++++++++++++++++++++ src/sparse_matrix.h | 71 ++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 src/sparse_matrix.c create mode 100644 src/sparse_matrix.h diff --git a/src/sparse_matrix.c b/src/sparse_matrix.c new file mode 100644 index 00000000..9b64fa0d --- /dev/null +++ b/src/sparse_matrix.c @@ -0,0 +1,177 @@ +#include "sparse_matrix.h" +#include "klib/ksort.h" + +sparse_matrix_t *sparse_matrix_new(void) { + sparse_matrix_t *matrix = malloc(sizeof(sparse_matrix_t)); + if (matrix == NULL) return NULL; + matrix->m = 0; + matrix->n = 0; + matrix->indptr = uint32_array_new_size(1); + if (matrix->indptr == NULL) { + goto exit_matrix_created; + } + uint32_array_push(matrix->indptr, 0); + + matrix->indices = uint32_array_new(); + if (matrix->indices == NULL) { + goto exit_matrix_created; + } + + matrix->data = double_array_new(); + if (matrix->data == NULL) { + goto exit_matrix_created; + } + + return matrix; + +exit_matrix_created: + sparse_matrix_destroy(matrix); + return NULL; +} + +void sparse_matrix_destroy(sparse_matrix_t *self) { + if (self == NULL) return; + + if (self->indptr != NULL) { + uint32_array_destroy(self->indptr); + } + + if (self->indices != NULL) { + uint32_array_destroy(self->indices); + } + + if (self->data != NULL) { + double_array_destroy(self->data); + } + + free(self); +} + +inline void sparse_matrix_clear(sparse_matrix_t *self) { + uint32_array_clear(self->indptr); + uint32_array_push(self->indptr, 0); + + uint32_array_clear(self->indices); + double_array_clear(self->data); +} + +inline void sparse_matrix_finalize_row(sparse_matrix_t *self) { + uint32_array_push(self->indptr, (uint32_t)self->indices->n); + self->m++; +} + + +inline void sparse_matrix_append(sparse_matrix_t *self, uint32_t col, double val) { + uint32_array_push(self->indices, col); + double_array_push(self->data, val); + if (col >= self->n) self->n = col + 1; +} + +inline void sparse_matrix_append_row(sparse_matrix_t *self, uint32_t *col, double *val, size_t n) { + for (int i = 0; i < n; i++) { + sparse_matrix_append(self, col[i], val[i]); + } + sparse_matrix_finalize_row(self); +} + +typedef struct column_value { + uint32_t col; + double val; +} column_value_t; + +VECTOR_INIT(column_value_array, column_value_t) + +#define ks_lt_column_value(a, b) ((a).col < (b).col) + +KSORT_INIT(column_value_array, column_value_t, ks_lt_column_value) + +void sparse_matrix_sort_indices(sparse_matrix_t *self) { + uint32_t row, row_start, row_len, i; + + column_value_array *col_vals = column_value_array_new(); + + sparse_matrix_foreach_row(self, row, row_start, row_len, { + for (i = row_start; i < row_start + row_len; i++) { + column_value_array_push(col_vals, (column_value_t){self->indices->a[i], self->data->a[i]}); + } + ks_introsort(column_value_array, col_vals->n, col_vals->a); + + for (i = 0; i < col_vals->n; i++) { + column_value_t col_val = col_vals->a[i]; + self->indices->a[row_start + i] = col_val.col; + self->data->a[row_start + i] = col_val.val; + } + }) + +} + + +inline int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result) { + uint32_t row, row_start, row_len; + double val; + double *data = self->data->a; + + sparse_matrix_foreach_row(self, row, row_start, row_len, { + double sum = result[row]; + for (uint32_t col = row_start; col < row_start + row_len; col++) { + sum += data[col] * vec[col]; + } + result[row] = sum; + }) + return 0; +} + +int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result) { + uint32_t *indptr = self->indptr->a; + uint32_t *indices = self->indices->a; + double *data = self->data->a; + + for (int i = 0; i < m; i++) { + uint32_t row = rows[i]; + + double sum = result[i]; + if (row >= self->m) return -1; + + for (int j = indptr[row]; j < indptr[row+1]; j++) { + sum += data[j] * vec[indices[j]]; + } + + result[i] = sum; + + } + return 0; +} + +int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result) { + if (self->n != matrix->m || self->m != result->m || matrix->n != result->n) { + return -1; + } + + uint32_t *indptr = self->indptr->a; + uint32_t *indices = self->indices->a; + double *data = self->data->a; + + size_t m1_rows = self->m; + size_t m1_cols = self->n; + + size_t m2_rows = matrix->m; + size_t m2_cols = matrix->n; + + double *dense_values = matrix->values; + double *result_values = result->values; + + uint32_t row, row_start, row_len; + + sparse_matrix_foreach_row(self, row, row_start, row_len, { + for (uint32_t j = 0; j < m2_cols; j++) { + size_t result_index = row * m2_cols + j; + double sum = result_values[result_index]; + for (uint32_t col = row_start; col < row_start + row_len; col++) { + sum += data[col] * dense_values[m2_cols * indices[col] + j]; + } + result_values[result_index] = sum; + } + }) + + return 0; +} \ No newline at end of file diff --git a/src/sparse_matrix.h b/src/sparse_matrix.h new file mode 100644 index 00000000..14676ebb --- /dev/null +++ b/src/sparse_matrix.h @@ -0,0 +1,71 @@ +#ifndef SPARSE_MATRIX_H +#define SPARSE_MATRIX_H + +#include +#include + +#include "collections.h" +#include "matrix.h" +#include "vector.h" + +// Simple, partial sparse matrix implementation + + +// Double-valued dynamic compressed sparse row (CSR) sparse matrix + +typedef struct { + uint32_t m; + uint32_t n; + uint32_array *indptr; + uint32_array *indices; + double_array *data; +} sparse_matrix_t; + + +sparse_matrix_t *sparse_matrix_new(void); +void sparse_matrix_destroy(sparse_matrix_t *self); + +void sparse_matrix_clear(sparse_matrix_t *self); + +void sparse_matrix_append(sparse_matrix_t *self, uint32_t col, double val); +void sparse_matrix_append_row(sparse_matrix_t *self, uint32_t *col, double *val, size_t n); + +void sparse_matrix_finalize_row(sparse_matrix_t *self); + +void sparse_matrix_sort_indices(sparse_matrix_t *self); + +int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result); +int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result); + +int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result); + +#define sparse_matrix_foreach_row(sp, row_var, index_var, length_var, code) { \ + uint32_t _row_start = 0, _row_end = 0; \ + uint32_t *_indptr = sp->indptr->a; \ + size_t _m = sp->m; \ + \ + for (uint32_t _i = 0; _i < _m; _i++) { \ + (row_var) = _i; \ + _row_start = _indptr[_i]; \ + _row_end = _indptr[_i + 1]; \ + (index_var) = _row_start; \ + (length_var) = _row_end - _row_start; \ + code; \ + } \ +} + +#define sparse_matrix_foreach(sp, row_var, col_var, data_var, code) { \ + uint32_t *_indices = sp->indices->a; \ + double *_data = sp->data->a; \ + uint32_t _index, _length; \ + sparse_matrix_foreach_row(sp, row_var, _index, _length, { \ + for (uint32_t _j = _index; _j < _index + _length; _j++) { \ + (col_var) = _indices[_j]; \ + (data_var) = _data[_j]; \ + code; \ + } \ + }) \ +} + + +#endif