From cddffdb65f9e840b5a59afdbde1a66a458a4696b Mon Sep 17 00:00:00 2001
From: Al <albarrentine@gmail.com>
Date: Mon, 7 Sep 2015 00:34:00 -0700
Subject: [PATCH] [math] Adding column and row sums to sparse matrices

---
 src/sparse_matrix.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
 src/sparse_matrix.h | 48 +++++++++++++++++++++++---
 2 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/src/sparse_matrix.c b/src/sparse_matrix.c
index 1f364b90..292b625a 100644
--- a/src/sparse_matrix.c
+++ b/src/sparse_matrix.c
@@ -106,6 +106,8 @@ void sparse_matrix_sort_indices(sparse_matrix_t *self) {
 
 
 inline int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result) {
+    if (n != self->m) return -1;
+
     uint32_t row, row_start, row_len;
     double val;
     double *data = self->data->a;
@@ -121,6 +123,8 @@ inline int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n
 }
 
 int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result) {
+    if (m != n) return -1;
+
     uint32_t *indptr = self->indptr->a;
     uint32_t *indices = self->indices->a;
     double *data = self->data->a;
@@ -141,6 +145,85 @@ int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t
     return 0;
 }
 
+int sparse_matrix_sum_cols(sparse_matrix_t *self, double *result, size_t n) {
+    if (n != self->m) return -1;
+
+    uint32_t row, row_start, row_len;
+    double val;
+    double *data = self->data->a;
+
+    sparse_matrix_foreach_row(self, row, row_start, row_len, {
+        double sum = result[row];
+        for (uint32_t col = row_start; col < row_start + row_len; col++) {
+            sum += data[col];
+        }
+        result[row] = sum;
+    })
+    return 0;
+
+}
+
+// No need to allocate actual vector for values, sum rather than a dot product
+int sparse_matrix_rows_sum_cols(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n) {
+    if (m != n) return -1;
+
+    uint32_t *indptr = self->indptr->a;
+    uint32_t *indices = self->indices->a;
+    double *data = self->data->a;
+
+    for (int i = 0; i < m; i++) {
+        uint32_t row = rows[i];
+
+        double sum = result[i];
+        if (row >= self->m) return -1;
+
+        for (int j = indptr[row]; j < indptr[row+1]; j++) {
+            sum += data[j];
+        }
+
+        result[i] = sum;
+    }
+    return 0;
+}
+
+
+int sparse_matrix_sum_all_rows(sparse_matrix_t *self, double *result, size_t n) {
+    if (n != self->n) return -1;
+
+    uint32_t row, row_start, row_len;
+    double val;
+    double *data = self->data->a;
+
+    sparse_matrix_foreach_row(self, row, row_start, row_len, {
+        for (uint32_t col = row_start; col < row_start + row_len; col++) {
+            result[col] += data[col];
+        }
+    })
+    return 0;
+
+}
+
+int sparse_matrix_sum_rows(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n);
+    if (n != self->n) return -1;
+
+    uint32_t *indptr = self->indptr->a;
+    uint32_t *indices = self->indices->a;
+    double *data = self->data->a;
+
+    for (int i = 0; i < m; i++) {
+        uint32_t row = rows[i];
+
+        if (row >= self->m) return -1;
+
+        for (int j = indptr[row]; j < indptr[row+1]; j++) {
+            result[j] += data[j];
+        }
+    }
+    return 0;
+}
+
+
+
 int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result) {
     if (self->n != matrix->m || self->m != result->m || matrix->n != result->n) {
         return -1;
diff --git a/src/sparse_matrix.h b/src/sparse_matrix.h
index ce5fc28e..36ae07a7 100644
--- a/src/sparse_matrix.h
+++ b/src/sparse_matrix.h
@@ -1,3 +1,39 @@
+/*
+sparse_matrix.h
+---------------
+
+Double-valued dynamic compressed sparse row (CSR) sparse matrix.
+
+A sparse matrix is a matrix where an overwhelming number of the
+entries are 0. Thus we get significant space/time advantages
+from only storing the non-zero values.
+
+These types of matrices arise often when representing graphs
+and term-document matrices in text collections.
+
+The compressed sparse row format stores the following 3x4
+dense matrix:
+
+{   1.0, 0.0, 0.0, 0.0,
+    0.0, 1.0, 0.0, 0.0,
+    0.0, 0.0, 0.0, 2.0  }
+
+with the following 3 arrays:
+
+indptr = { 0, 1, 2, 3 }
+indices = { 0, 1, 3 }
+data = { 1.0, 1.0, 2.0 }
+
+For a given row i, the indices indptr[i] through indptr[i+1]
+denotes the number of nonzero columns in row i. The column
+indices can be found at that contiguous location in indices
+and the data values can be found at the same location in the
+data array.
+
+Sparse matrix row iteration, row indexing, scalar arithmetic
+and dot products with vectors and dense matrices are efficient.
+*/
+
 #ifndef SPARSE_MATRIX_H
 #define SPARSE_MATRIX_H
 
@@ -10,11 +46,6 @@
 #include "matrix.h"
 #include "vector.h"
 
-// Simple, partial sparse matrix implementation
-
-
-// Double-valued dynamic compressed sparse row (CSR) sparse matrix
-
 typedef struct {
     uint32_t m;
     uint32_t n;
@@ -39,6 +70,13 @@ void sparse_matrix_sort_indices(sparse_matrix_t *self);
 int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result);
 int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result);
 
+// No need to allocate vector. Equivalent to doing a dot product with a vector of all ones
+int sparse_matrix_sum_cols(sparse_matrix_t *self, double *result, size_t n);
+int sparse_matrix_rows_sum_cols(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n);
+
+int sparse_matrix_sum_all_rows(sparse_matrix_t *self, double *result, size_t n);
+int sparse_matrix_sum_rows(sparse_matrix_t *self, uint32_t *rows, size_t m, double *result, size_t n);
+
 int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result);
 
 bool sparse_matrix_write(sparse_matrix_t *self, FILE *f);