[math] Adding compressed sparse row (CSR) format sparse matrix, designed for dynamic construction, just the methods needed for logistic regression for now i.e. no sparse dot products
This commit is contained in:
177
src/sparse_matrix.c
Normal file
177
src/sparse_matrix.c
Normal file
@@ -0,0 +1,177 @@
|
||||
#include "sparse_matrix.h"
|
||||
#include "klib/ksort.h"
|
||||
|
||||
sparse_matrix_t *sparse_matrix_new(void) {
|
||||
sparse_matrix_t *matrix = malloc(sizeof(sparse_matrix_t));
|
||||
if (matrix == NULL) return NULL;
|
||||
matrix->m = 0;
|
||||
matrix->n = 0;
|
||||
matrix->indptr = uint32_array_new_size(1);
|
||||
if (matrix->indptr == NULL) {
|
||||
goto exit_matrix_created;
|
||||
}
|
||||
uint32_array_push(matrix->indptr, 0);
|
||||
|
||||
matrix->indices = uint32_array_new();
|
||||
if (matrix->indices == NULL) {
|
||||
goto exit_matrix_created;
|
||||
}
|
||||
|
||||
matrix->data = double_array_new();
|
||||
if (matrix->data == NULL) {
|
||||
goto exit_matrix_created;
|
||||
}
|
||||
|
||||
return matrix;
|
||||
|
||||
exit_matrix_created:
|
||||
sparse_matrix_destroy(matrix);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void sparse_matrix_destroy(sparse_matrix_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
if (self->indptr != NULL) {
|
||||
uint32_array_destroy(self->indptr);
|
||||
}
|
||||
|
||||
if (self->indices != NULL) {
|
||||
uint32_array_destroy(self->indices);
|
||||
}
|
||||
|
||||
if (self->data != NULL) {
|
||||
double_array_destroy(self->data);
|
||||
}
|
||||
|
||||
free(self);
|
||||
}
|
||||
|
||||
inline void sparse_matrix_clear(sparse_matrix_t *self) {
|
||||
uint32_array_clear(self->indptr);
|
||||
uint32_array_push(self->indptr, 0);
|
||||
|
||||
uint32_array_clear(self->indices);
|
||||
double_array_clear(self->data);
|
||||
}
|
||||
|
||||
inline void sparse_matrix_finalize_row(sparse_matrix_t *self) {
|
||||
uint32_array_push(self->indptr, (uint32_t)self->indices->n);
|
||||
self->m++;
|
||||
}
|
||||
|
||||
|
||||
inline void sparse_matrix_append(sparse_matrix_t *self, uint32_t col, double val) {
|
||||
uint32_array_push(self->indices, col);
|
||||
double_array_push(self->data, val);
|
||||
if (col >= self->n) self->n = col + 1;
|
||||
}
|
||||
|
||||
inline void sparse_matrix_append_row(sparse_matrix_t *self, uint32_t *col, double *val, size_t n) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
sparse_matrix_append(self, col[i], val[i]);
|
||||
}
|
||||
sparse_matrix_finalize_row(self);
|
||||
}
|
||||
|
||||
typedef struct column_value {
|
||||
uint32_t col;
|
||||
double val;
|
||||
} column_value_t;
|
||||
|
||||
VECTOR_INIT(column_value_array, column_value_t)
|
||||
|
||||
#define ks_lt_column_value(a, b) ((a).col < (b).col)
|
||||
|
||||
KSORT_INIT(column_value_array, column_value_t, ks_lt_column_value)
|
||||
|
||||
void sparse_matrix_sort_indices(sparse_matrix_t *self) {
|
||||
uint32_t row, row_start, row_len, i;
|
||||
|
||||
column_value_array *col_vals = column_value_array_new();
|
||||
|
||||
sparse_matrix_foreach_row(self, row, row_start, row_len, {
|
||||
for (i = row_start; i < row_start + row_len; i++) {
|
||||
column_value_array_push(col_vals, (column_value_t){self->indices->a[i], self->data->a[i]});
|
||||
}
|
||||
ks_introsort(column_value_array, col_vals->n, col_vals->a);
|
||||
|
||||
for (i = 0; i < col_vals->n; i++) {
|
||||
column_value_t col_val = col_vals->a[i];
|
||||
self->indices->a[row_start + i] = col_val.col;
|
||||
self->data->a[row_start + i] = col_val.val;
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result) {
|
||||
uint32_t row, row_start, row_len;
|
||||
double val;
|
||||
double *data = self->data->a;
|
||||
|
||||
sparse_matrix_foreach_row(self, row, row_start, row_len, {
|
||||
double sum = result[row];
|
||||
for (uint32_t col = row_start; col < row_start + row_len; col++) {
|
||||
sum += data[col] * vec[col];
|
||||
}
|
||||
result[row] = sum;
|
||||
})
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result) {
|
||||
uint32_t *indptr = self->indptr->a;
|
||||
uint32_t *indices = self->indices->a;
|
||||
double *data = self->data->a;
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
uint32_t row = rows[i];
|
||||
|
||||
double sum = result[i];
|
||||
if (row >= self->m) return -1;
|
||||
|
||||
for (int j = indptr[row]; j < indptr[row+1]; j++) {
|
||||
sum += data[j] * vec[indices[j]];
|
||||
}
|
||||
|
||||
result[i] = sum;
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result) {
|
||||
if (self->n != matrix->m || self->m != result->m || matrix->n != result->n) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t *indptr = self->indptr->a;
|
||||
uint32_t *indices = self->indices->a;
|
||||
double *data = self->data->a;
|
||||
|
||||
size_t m1_rows = self->m;
|
||||
size_t m1_cols = self->n;
|
||||
|
||||
size_t m2_rows = matrix->m;
|
||||
size_t m2_cols = matrix->n;
|
||||
|
||||
double *dense_values = matrix->values;
|
||||
double *result_values = result->values;
|
||||
|
||||
uint32_t row, row_start, row_len;
|
||||
|
||||
sparse_matrix_foreach_row(self, row, row_start, row_len, {
|
||||
for (uint32_t j = 0; j < m2_cols; j++) {
|
||||
size_t result_index = row * m2_cols + j;
|
||||
double sum = result_values[result_index];
|
||||
for (uint32_t col = row_start; col < row_start + row_len; col++) {
|
||||
sum += data[col] * dense_values[m2_cols * indices[col] + j];
|
||||
}
|
||||
result_values[result_index] = sum;
|
||||
}
|
||||
})
|
||||
|
||||
return 0;
|
||||
}
|
||||
71
src/sparse_matrix.h
Normal file
71
src/sparse_matrix.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#ifndef SPARSE_MATRIX_H
|
||||
#define SPARSE_MATRIX_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "collections.h"
|
||||
#include "matrix.h"
|
||||
#include "vector.h"
|
||||
|
||||
// Simple, partial sparse matrix implementation
|
||||
|
||||
|
||||
// Double-valued dynamic compressed sparse row (CSR) sparse matrix
|
||||
|
||||
typedef struct {
|
||||
uint32_t m;
|
||||
uint32_t n;
|
||||
uint32_array *indptr;
|
||||
uint32_array *indices;
|
||||
double_array *data;
|
||||
} sparse_matrix_t;
|
||||
|
||||
|
||||
sparse_matrix_t *sparse_matrix_new(void);
|
||||
void sparse_matrix_destroy(sparse_matrix_t *self);
|
||||
|
||||
void sparse_matrix_clear(sparse_matrix_t *self);
|
||||
|
||||
void sparse_matrix_append(sparse_matrix_t *self, uint32_t col, double val);
|
||||
void sparse_matrix_append_row(sparse_matrix_t *self, uint32_t *col, double *val, size_t n);
|
||||
|
||||
void sparse_matrix_finalize_row(sparse_matrix_t *self);
|
||||
|
||||
void sparse_matrix_sort_indices(sparse_matrix_t *self);
|
||||
|
||||
int sparse_matrix_dot_vector(sparse_matrix_t *self, double *vec, size_t n, double *result);
|
||||
int sparse_matrix_rows_dot_vector(sparse_matrix_t *self, uint32_t *rows, size_t m, double *vec, size_t n, double *result);
|
||||
|
||||
int sparse_matrix_dot_dense(sparse_matrix_t *self, matrix_t *matrix, matrix_t *result);
|
||||
|
||||
#define sparse_matrix_foreach_row(sp, row_var, index_var, length_var, code) { \
|
||||
uint32_t _row_start = 0, _row_end = 0; \
|
||||
uint32_t *_indptr = sp->indptr->a; \
|
||||
size_t _m = sp->m; \
|
||||
\
|
||||
for (uint32_t _i = 0; _i < _m; _i++) { \
|
||||
(row_var) = _i; \
|
||||
_row_start = _indptr[_i]; \
|
||||
_row_end = _indptr[_i + 1]; \
|
||||
(index_var) = _row_start; \
|
||||
(length_var) = _row_end - _row_start; \
|
||||
code; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define sparse_matrix_foreach(sp, row_var, col_var, data_var, code) { \
|
||||
uint32_t *_indices = sp->indices->a; \
|
||||
double *_data = sp->data->a; \
|
||||
uint32_t _index, _length; \
|
||||
sparse_matrix_foreach_row(sp, row_var, _index, _length, { \
|
||||
for (uint32_t _j = _index; _j < _index + _length; _j++) { \
|
||||
(col_var) = _indices[_j]; \
|
||||
(data_var) = _data[_j]; \
|
||||
code; \
|
||||
} \
|
||||
}) \
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user