From 4984352eda29c7a2a320d8ad315e742c024e2478 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 6 Oct 2015 18:58:18 -0400 Subject: [PATCH] [graph] Simple sparse graph implementation, essentially a sparse matrix with no values array --- src/graph.c | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/graph.h | 90 ++++++++++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 src/graph.c create mode 100644 src/graph.h diff --git a/src/graph.c b/src/graph.c new file mode 100644 index 00000000..5e1a096a --- /dev/null +++ b/src/graph.c @@ -0,0 +1,183 @@ +#include "graph.h" + +graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows) { + graph_t *graph = malloc(sizeof(graph_t)); + graph->m = m; + graph->fixed_rows = fixed_rows; + graph->n = n; + graph->type = type; + graph->indptr = uint32_array_new_size(m + 1); + if (graph->indptr == NULL) { + graph_destroy(graph); + return NULL; + } + + if (!fixed_rows) { + uint32_array_push(graph->indptr, 0); + } + + if (nnz > 0) { + graph->indices = uint32_array_new_size(nnz); + } else { + graph->indices = uint32_array_new(); + } + + if (graph->indices == NULL) { + graph_destroy(graph); + return NULL; + } + + return graph; +} + +graph_t *graph_new(graph_type_t type) { + return graph_new_dims(type, 0, 0, 0, false); +} + +void graph_destroy(graph_t *self) { + if (self == NULL) return; + + if (self->indptr != NULL) { + uint32_array_destroy(self->indptr); + } + + if (self->indices != NULL) { + uint32_array_destroy(self->indices); + } + + free(self); +} + +inline void graph_set_size(graph_t *self) { + if (self->type != GRAPH_BIPARTITE) { + uint32_t max = self->m > self->n ? self->m : self->n; + self->m = max; + self->n = max; + } +} + +inline void graph_clear(graph_t *self) { + uint32_array_clear(self->indptr); + if (!self->fixed_rows) { + uint32_array_push(self->indptr, 0); + } + + uint32_array_clear(self->indices); +} + +inline void graph_finalize_vertex(graph_t *self) { + uint32_array_push(self->indptr, (uint32_t)self->indices->n); + if (!self->fixed_rows) { + self->m++; + graph_set_size(self); + } +} + +inline void graph_append_edge(graph_t *self, uint32_t col) { + uint32_array_push(self->indices, col); + if (col >= self->n) self->n = col + 1; + graph_set_size(self); +} + +inline void graph_append_edges(graph_t *self, uint32_t *col, size_t n) { + for (int i = 0; i < n; i++) { + graph_append_edge(self, col[i]); + } + graph_finalize_vertex(self); +} + +graph_t *graph_read(FILE *f) { + graph_t *g = malloc(sizeof(graph_t)); + if (g == NULL) return NULL; + + g->indptr = NULL; + g->indices = NULL; + + if (!file_read_uint32(f, &g->m) || + !file_read_uint32(f, &g->n) || + !file_read_uint8(f, (uint8_t *)&g->fixed_rows)) { + goto exit_graph_allocated; + } + + uint64_t len_indptr; + + if (!file_read_uint64(f, &len_indptr)) { + goto exit_graph_allocated; + } + + uint32_array *indptr = uint32_array_new_size(len_indptr); + if (indptr == NULL) { + goto exit_graph_allocated; + } + + g->indptr = indptr; + + for (int i = 0; i < len_indptr; i++) { + if (!file_read_uint32(f, indptr->a + i)) { + goto exit_graph_allocated; + } + } + + uint64_t len_indices; + + if (!file_read_uint64(f, &len_indices)) { + goto exit_graph_allocated; + } + + uint32_array *indices = uint32_array_new_size(len_indices); + if (indices == NULL) { + goto exit_graph_allocated; + } + + g->indices = indices; + + for (int i = 0; i < len_indices; i++) { + if (!file_read_uint32(f, indices->a + i)) { + goto exit_graph_allocated; + } + } + + return g; + +exit_graph_allocated: + graph_destroy(g); + return NULL; +} + +bool graph_write(graph_t *self, FILE *f) { + if (self == NULL || self->indptr == NULL || self->indices == NULL) { + return false; + } + + if (!file_write_uint32(f, self->m) || + !file_write_uint32(f, self->n) || + !file_write_uint8(f, (uint8_t)self->fixed_rows)) { + return false; + } + + uint64_t len_indptr = self->indptr->n; + + if (!file_write_uint64(f, len_indptr)) { + return false; + } + + for (int i = 0; i < len_indptr; i++) { + if (!file_write_uint32(f, self->indptr->a[i])) { + return false; + } + } + + uint64_t len_indices = (uint64_t)self->indices->n; + + if (!file_write_uint64(f, len_indices)) { + return false; + } + + for (int i = 0; i < len_indices; i++) { + if (!file_write_uint32(f, self->indices->a[i])) { + return false; + } + } + + return true; +} diff --git a/src/graph.h b/src/graph.h new file mode 100644 index 00000000..0bec6a0d --- /dev/null +++ b/src/graph.h @@ -0,0 +1,90 @@ +/* +graph.h +------- + +Graph stored as a compressed sparse row (CSR) matrix with no values. + +This is a specialization of sparse matrices suitable for cases +where we only need to know that two nodes are connected and will +typically be iterating row-by-row (get all edges for vertex v). +By default it stores bipartite graphs + +Essentially this can be viewed as a sparse matrix where all +of the non-zero values are 1. + +See sparse_matrix.h for more details. + +Currently we're not implementing edge types, graph traversal, etc. +*/ + +#ifndef GRAPH_H +#define GRAPH_H + +#include +#include +#include + +#include "collections.h" +#include "file_utils.h" +#include "vector.h" + +typedef enum { + GRAPH_DIRECTED, + GRAPH_UNDIRECTED, + GRAPH_BIPARTITE +} graph_type_t; + +typedef struct { + graph_type_t type; + uint32_t m; + uint32_t n; + bool fixed_rows; + uint32_array *indptr; + uint32_array *indices; +} graph_t; + + +graph_t *graph_new_dims(graph_type_t type, uint32_t m, uint32_t n, size_t nnz, bool fixed_rows); +graph_t *graph_new(graph_type_t type); +void graph_destroy(graph_t *self); + +void graph_set_size(graph_t *self); + +void graph_clear(graph_t *self); + +void graph_append_edge(graph_t *self, uint32_t col); +void graph_append_edges(graph_t *self, uint32_t *col, size_t n); + +void graph_finalize_vertex(graph_t *self); + +bool graph_write(graph_t *self, FILE *f); +graph_t *graph_read(FILE *f); + +#define graph_foreach_row(g, row_var, index_var, length_var, code) { \ + uint32_t _row_start = 0, _row_end = 0; \ + uint32_t *_indptr = g->indptr->a; \ + size_t _m = g->indptr->n - 1; \ + \ + for (uint32_t _i = 0; _i < _m; _i++) { \ + (row_var) = _i; \ + _row_start = _indptr[_i]; \ + _row_end = _indptr[_i + 1]; \ + (index_var) = _row_start; \ + (length_var) = _row_end - _row_start; \ + code; \ + } \ +} + +#define graph_foreach(g, row_var, col_var, code) { \ + uint32_t *_indices = g->indices->a; \ + uint32_t _index, _length; \ + graph_foreach_row(g, row_var, _index, _length, { \ + if (_length == 0) continue; \ + for (uint32_t _j = _index; _j < _index + _length; _j++) { \ + (col_var) = _indices[_j]; \ + code; \ + } \ + }) \ +} + +#endif