[utils] sparse_matrix_add_unique_columns_alias, adds the actual column indices to hashtable/array and aliases those in the table from 1 to N (where N is the number of unique columns in this batch). This way it's compatible with smaller matrices of batch weights.

This commit is contained in:
Al
2017-04-02 13:48:46 -04:00
parent a2563a4dcd
commit 96e1ca5e89
2 changed files with 60 additions and 16 deletions

View File

@@ -19,38 +19,80 @@ sparse_matrix_t *sparse_matrix_new_from_matrix(double_matrix_t *matrix) {
}
bool sparse_matrix_add_unique_columns(sparse_matrix_t *matrix, khash_t(int_set) *unique_columns, uint32_array *array) {
bool sparse_matrix_add_unique_columns(sparse_matrix_t *matrix, khash_t(int_uint32) *unique_columns, uint32_array *array) {
size_t n = matrix->indices->n;
uint32_t *indices = matrix->indices->a;
kh_clear(int_set, unique_columns);
kh_clear(int_uint32, unique_columns);
size_t i;
khiter_t k;
for (i = 0; i < n; i++) {
uint32_t col = indices[i];
int ret;
kh_put(int_set, unique_columns, (khint_t)col, &ret);
if (ret < 0) {
int ret = 0;
k = kh_get(int_uint32, unique_columns, col);
if (k == kh_end(unique_columns)) {
uint32_t next_id = (uint32_t)kh_size(unique_columns);
k = kh_put(int_uint32, unique_columns, col, &ret);
if (ret < 0) {
return false;
}
kh_value(unique_columns, k) = next_id;
}
}
uint32_array_clear(array);
if (!uint32_array_resize_fixed(array, kh_size(unique_columns))) {
return false;
}
khint_t key;
uint32_t *batch = array->a;
uint32_t col_id;
kh_foreach(unique_columns, key, col_id, {
batch[col_id] = (uint32_t)key;
})
return true;
}
bool sparse_matrix_alias_columns(sparse_matrix_t *matrix, khash_t(int_uint32) *unique_columns) {
size_t n = matrix->indices->n;
uint32_t *indices = matrix->indices->a;
size_t i;
khiter_t k;
uint32_t col_id;
for (i = 0; i < n; i++) {
uint32_t col = indices[i];
int ret = 0;
k = kh_get(int_uint32, unique_columns, col);
if (k != kh_end(unique_columns)) {
col_id = kh_value(unique_columns, k);
indices[i] = col_id;
} else {
return false;
}
}
uint32_array_clear(array);
if (!uint32_array_resize(array, kh_size(unique_columns))) {
return false;
}
khint_t k;
kh_foreach_key(unique_columns, k, {
uint32_array_push(array, (uint32_t)k);
})
matrix->n = kh_size(unique_columns);
return true;
}
inline bool sparse_matrix_add_unique_columns_alias(sparse_matrix_t *matrix, khash_t(int_uint32) *unique_columns, uint32_array *array) {
return sparse_matrix_add_unique_columns(matrix, unique_columns, array) &&
sparse_matrix_alias_columns(matrix, unique_columns);
}
uint32_array *sparse_matrix_unique_columns(sparse_matrix_t *matrix) {
khash_t(int_set) *unique_columns = kh_init(int_set);
uint32_array *ret = uint32_array_new();

View File

@@ -8,6 +8,8 @@
sparse_matrix_t *sparse_matrix_new_from_matrix(double_matrix_t *matrix);
uint32_array *sparse_matrix_unique_columns(sparse_matrix_t *matrix);
bool sparse_matrix_add_unique_columns(sparse_matrix_t *matrix, khash_t(int_set) *unique_columns, uint32_array *array);
bool sparse_matrix_add_unique_columns(sparse_matrix_t *matrix, khash_t(int_uint32) *unique_columns, uint32_array *array);
bool sparse_matrix_alias_columns(sparse_matrix_t *matrix, khash_t(int_uint32) *unique_columns);
bool sparse_matrix_add_unique_columns_alias(sparse_matrix_t *matrix, khash_t(int_uint32) *unique_columns, uint32_array *array);
#endif