From cddffdb65f9e840b5a59afdbde1a66a458a4696b Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 7 Sep 2015 00:34:00 -0700 Subject: [PATCH] [math] Adding column and row sums to sparse matrices --- src/sparse_matrix.c | 83 +++++++++++++++++++++++++++++++++++++++++++++ src/sparse_matrix.h | 48 +++++++++++++++++++++++--- 2 files changed, 126 insertions(+), 5 deletions(-) diff --git a/src/sparse_matrix.c b/src/sparse_matrix.c index 1f364b90..292b625a 100644 --- a/src/sparse_matrix.c +++ b/src/sparse_matrix.c @@ -106,6 +106,8 @@ void sparse_matrix_sort_indices(sparse_matrix_t *self) { inline int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result) { + if (n != self->m) return -1; + uint32_t row, row_start, row_len; double val; double *data = self->data->a; @@ -121,6 +123,8 @@ inline int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n } int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result) { + if (m != n) return -1; + uint32_t *indptr = self->indptr->a; uint32_t *indices = self->indices->a; double *data = self->data->a; @@ -141,6 +145,85 @@ int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t return 0; } +int sparse_matrix_sum_cols(sparse_matrix_t *self, double *result, size_t n) { + if (n != self->m) return -1; + + uint32_t row, row_start, row_len; + double val; + double *data = self->data->a; + + sparse_matrix_foreach_row(self, row, row_start, row_len, { + double sum = result[row]; + for (uint32_t col = row_start; col < row_start + row_len; col++) { + sum += data[col]; + } + result[row] = sum; + }) + return 0; + +} + +// No need to allocate actual vector for values, sum rather than a dot product +int sparse_matrix_rows_sum_cols(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n) { + if (m != n) return -1; + + uint32_t *indptr = self->indptr->a; + uint32_t *indices = self->indices->a; + double *data = self->data->a; + + for (int i = 0; i < m; i++) { + uint32_t row = rows[i]; + + double sum = result[i]; + if (row >= self->m) return -1; + + for (int j = indptr[row]; j < indptr[row+1]; j++) { + sum += data[j]; + } + + result[i] = sum; + } + return 0; +} + + +int sparse_matrix_sum_all_rows(sparse_matrix_t *self, double *result, size_t n) { + if (n != self->n) return -1; + + uint32_t row, row_start, row_len; + double val; + double *data = self->data->a; + + sparse_matrix_foreach_row(self, row, row_start, row_len, { + for (uint32_t col = row_start; col < row_start + row_len; col++) { + result[col] += data[col]; + } + }) + return 0; + +} + +int sparse_matrix_sum_rows(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n); + if (n != self->n) return -1; + + uint32_t *indptr = self->indptr->a; + uint32_t *indices = self->indices->a; + double *data = self->data->a; + + for (int i = 0; i < m; i++) { + uint32_t row = rows[i]; + + if (row >= self->m) return -1; + + for (int j = indptr[row]; j < indptr[row+1]; j++) { + result[j] += data[j]; + } + } + return 0; +} + + + int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result) { if (self->n != matrix->m || self->m != result->m || matrix->n != result->n) { return -1; diff --git a/src/sparse_matrix.h b/src/sparse_matrix.h index ce5fc28e..36ae07a7 100644 --- a/src/sparse_matrix.h +++ b/src/sparse_matrix.h @@ -1,3 +1,39 @@ +/* +sparse_matrix.h +--------------- + +Double-valued dynamic compressed sparse row (CSR) sparse matrix. + +A sparse matrix is a matrix where an overwhelming number of the +entries are 0. Thus we get significant space/time advantages +from only storing the non-zero values. + +These types of matrices arise often when representing graphs +and term-document matrices in text collections. + +The compressed sparse row format stores the following 3x4 +dense matrix: + +{ 1.0, 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 2.0 } + +with the following 3 arrays: + +indptr = { 0, 1, 2, 3 } +indices = { 0, 1, 3 } +data = { 1.0, 1.0, 2.0 } + +For a given row i, the indices indptr[i] through indptr[i+1] +denotes the number of nonzero columns in row i. The column +indices can be found at that contiguous location in indices +and the data values can be found at the same location in the +data array. + +Sparse matrix row iteration, row indexing, scalar arithmetic +and dot products with vectors and dense matrices are efficient. +*/ + #ifndef SPARSE_MATRIX_H #define SPARSE_MATRIX_H @@ -10,11 +46,6 @@ #include "matrix.h" #include "vector.h" -// Simple, partial sparse matrix implementation - - -// Double-valued dynamic compressed sparse row (CSR) sparse matrix - typedef struct { uint32_t m; uint32_t n; @@ -39,6 +70,13 @@ void sparse_matrix_sort_indices(sparse_matrix_t *self); int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result); int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result); +// No need to allocate vector. Equivalent to doing a dot product with a vector of all ones +int sparse_matrix_sum_cols(sparse_matrix_t *self, double *result, size_t n); +int sparse_matrix_rows_sum_cols(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n); + +int sparse_matrix_sum_all_rows(sparse_matrix_t *self, double *result, size_t n); +int sparse_matrix_sum_rows(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n); + int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result); bool sparse_matrix_write(sparse_matrix_t *self, FILE *f);