[parser] counting classes instead of keeping a set
This commit is contained in:
@@ -344,9 +344,9 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
khash_t(str_set) *unique_classes = kh_init(str_set);
|
khash_t(str_uint32) *class_counts = kh_init(str_uint32);
|
||||||
if (unique_classes == NULL) {
|
if (class_counts == NULL) {
|
||||||
log_error("Could not allocate unique_classes\n");
|
log_error("Could not allocate class_counts\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -476,16 +476,10 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
cstring_array_foreach(phrase_labels, i, label, {
|
cstring_array_foreach(phrase_labels, i, label, {
|
||||||
k = kh_get(str_set, unique_classes, label);
|
if (!str_uint32_hash_incr(class_counts, label)) {
|
||||||
if (k == kh_end(unique_classes)) {
|
log_error("Error in hash_incr for class_counts\n");
|
||||||
char *label_key = strdup(label);
|
|
||||||
k = kh_put(str_set, unique_classes, label_key, &ret);
|
|
||||||
if (ret < 0) {
|
|
||||||
log_error("Error in kh_put in unique_classes\n");
|
|
||||||
free(label_key);
|
|
||||||
goto exit_hashes_allocated;
|
goto exit_hashes_allocated;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
cstring_array_foreach(phrases, i, phrase, {
|
cstring_array_foreach(phrases, i, phrase, {
|
||||||
@@ -699,7 +693,7 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
parser->model = NULL;
|
parser->model = NULL;
|
||||||
|
|
||||||
|
|
||||||
size_t num_classes = kh_size(unique_classes);
|
size_t num_classes = kh_size(class_counts);
|
||||||
log_info("num_classes = %zu\n", num_classes);
|
log_info("num_classes = %zu\n", num_classes);
|
||||||
parser->num_classes = num_classes;
|
parser->num_classes = num_classes;
|
||||||
|
|
||||||
@@ -938,10 +932,10 @@ exit_hashes_allocated:
|
|||||||
})
|
})
|
||||||
kh_destroy(str_uint32, vocab);
|
kh_destroy(str_uint32, vocab);
|
||||||
|
|
||||||
kh_foreach_key(unique_classes, token, {
|
kh_foreach_key(class_counts, token, {
|
||||||
free((char *)token);
|
free((char *)token);
|
||||||
})
|
})
|
||||||
kh_destroy(str_set, unique_classes);
|
kh_destroy(str_uint32, class_counts);
|
||||||
|
|
||||||
kh_foreach(phrase_stats, token, stats, {
|
kh_foreach(phrase_stats, token, stats, {
|
||||||
kh_destroy(int_uint32, stats.class_counts);
|
kh_destroy(int_uint32, stats.class_counts);
|
||||||
|
|||||||
Reference in New Issue
Block a user