[parser] Averaged perceptron training
This commit is contained in:
@@ -26,7 +26,7 @@ inline double_array *averaged_perceptron_predict_scores(averaged_perceptron_t *s
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int col = indptr[feature_id]; col < indptr[feature_id+1]; col++) {
|
||||
for (int col = indptr[feature_id]; col < indptr[feature_id + 1]; col++) {
|
||||
uint32_t class_id = indices[col];
|
||||
scores[class_id] += data[col];
|
||||
}
|
||||
|
||||
@@ -15,11 +15,6 @@ very little memory.
|
||||
|
||||
The weights are stored as a sparse matrix in compressed sparse row format
|
||||
(see sparse_matrix.h)
|
||||
|
||||
Paper: [Collins, 2002] Discriminative Training Methods for Hidden Markov Models:
|
||||
Theory and Experiments with Perceptron Algorithms
|
||||
|
||||
Link: http://www.cs.columbia.edu/~mcollins/papers/tagperc.pdf
|
||||
*/
|
||||
#ifndef AVERAGED_PERCEPTRON_H
|
||||
#define AVERAGED_PERCEPTRON_H
|
||||
|
||||
337
src/averaged_perceptron_trainer.c
Normal file
337
src/averaged_perceptron_trainer.c
Normal file
@@ -0,0 +1,337 @@
|
||||
#include "averaged_perceptron_trainer.h"
|
||||
|
||||
void averaged_perceptron_trainer_destroy(averaged_perceptron_trainer_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
if (self->features != NULL) {
|
||||
trie_destroy(self->features);
|
||||
}
|
||||
|
||||
if (self->classes != NULL) {
|
||||
kh_destroy(str_uint32, self->classes);
|
||||
}
|
||||
|
||||
uint32_t feature_id;
|
||||
khash_t(class_weights) *weights;
|
||||
|
||||
kh_foreach(self->weights, feature_id, weights, {
|
||||
kh_destroy(class_weights, weights);
|
||||
})
|
||||
|
||||
if (self->weights != NULL) {
|
||||
kh_destroy(feature_class_weights, self->weights);
|
||||
}
|
||||
|
||||
if (self->scores != NULL) {
|
||||
double_array_destroy(self->scores);
|
||||
}
|
||||
|
||||
free(self);
|
||||
}
|
||||
|
||||
|
||||
bool averaged_perceptron_trainer_get_class_id(averaged_perceptron_trainer_t *self, char *class_name, uint32_t *class_id, bool add_if_missing) {
|
||||
khiter_t k;
|
||||
|
||||
khash_t(str_uint32) *classes = self->classes;
|
||||
|
||||
k = kh_get(str_uint32, classes, class_name);
|
||||
if (k != kh_end(classes)) {
|
||||
*class_id = kh_value(classes, k);
|
||||
return true;
|
||||
} else if (add_if_missing) {
|
||||
uint32_t new_id = kh_size(classes);
|
||||
int ret;
|
||||
k = kh_put(str_uint32, classes, class_name, &ret);
|
||||
kh_value(classes, k) = new_id;
|
||||
*class_id = new_id;
|
||||
|
||||
cstring_array_add_string(self->class_strings, class_name);
|
||||
self->num_classes++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool averaged_perceptron_trainer_get_feature_id(averaged_perceptron_trainer_t *self, char *feature, uint32_t *feature_id, bool add_if_missing) {
|
||||
trie_t *features = self->features;
|
||||
|
||||
bool in_trie = trie_get_data(features, feature, feature_id);
|
||||
|
||||
if (add_if_missing && !in_trie) {
|
||||
uint32_t new_id = features->num_keys;
|
||||
*feature_id = new_id;
|
||||
if (!trie_add(features, feature, new_id)) {
|
||||
return false;
|
||||
}
|
||||
self->num_features++;
|
||||
return true;
|
||||
} else if (in_trie) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
averaged_perceptron_t *averaged_perceptron_trainer_finalize(averaged_perceptron_trainer_t *self) {
|
||||
if (self == NULL || self->num_classes == 0) return NULL;
|
||||
|
||||
sparse_matrix_t *averaged_weights = sparse_matrix_new();
|
||||
|
||||
uint32_t class_id;
|
||||
class_weight_t weight;
|
||||
|
||||
uint64_t updates = self->num_updates;
|
||||
khash_t(class_weights) *weights;
|
||||
|
||||
for (uint32_t feature_id = 0; feature_id < self->num_features; feature_id++) {
|
||||
khiter_t k;
|
||||
k = kh_get(feature_class_weights, self->weights, feature_id);
|
||||
if (k == kh_end(self->weights)) {
|
||||
sparse_matrix_destroy(averaged_weights);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
weights = kh_value(self->weights, k);
|
||||
uint32_t class_id;
|
||||
|
||||
kh_foreach(weights, class_id, weight, {
|
||||
weight.total += (updates - weight.last_updated) * weight.value;
|
||||
double value = weight.total / updates;
|
||||
sparse_matrix_append(averaged_weights, class_id, value);
|
||||
})
|
||||
|
||||
sparse_matrix_finalize_row(averaged_weights);
|
||||
}
|
||||
|
||||
averaged_perceptron_t *perceptron = malloc(sizeof(averaged_perceptron_t));
|
||||
|
||||
perceptron->weights = averaged_weights;
|
||||
|
||||
perceptron->num_features = self->num_features;
|
||||
perceptron->num_classes = self->num_classes;
|
||||
|
||||
perceptron->scores = double_array_new_zeros(perceptron->num_classes);
|
||||
|
||||
// Set our pointers to NULL so they don't get free'd on destroy
|
||||
perceptron->classes = self->class_strings;
|
||||
self->class_strings = NULL;
|
||||
|
||||
perceptron->features = self->features;
|
||||
self->features = NULL;
|
||||
|
||||
averaged_perceptron_trainer_destroy(self);
|
||||
|
||||
return perceptron;
|
||||
}
|
||||
|
||||
khash_t(class_weights) *averaged_perceptron_trainer_get_class_weights(averaged_perceptron_trainer_t *self, uint32_t feature_id, bool add_if_missing) {
|
||||
khiter_t k;
|
||||
k = kh_get(feature_class_weights, self->weights, feature_id);
|
||||
if (k != kh_end(self->weights)) {
|
||||
return kh_value(self->weights, k);
|
||||
} else if (add_if_missing) {
|
||||
khash_t(class_weights) *weights = kh_init(class_weights);
|
||||
int ret;
|
||||
k = kh_put(feature_class_weights, self->weights, feature_id, &ret);
|
||||
if (ret < 0) {
|
||||
kh_destroy(class_weights, weights);
|
||||
return NULL;
|
||||
}
|
||||
kh_value(self->weights, k) = weights;
|
||||
return weights;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static inline bool averaged_perceptron_trainer_update_weight(khash_t(class_weights) *weights, uint64_t iter, uint32_t class_id, double value) {
|
||||
class_weight_t weight;
|
||||
size_t index;
|
||||
|
||||
khiter_t k;
|
||||
k = kh_get(class_weights, weights, class_id);
|
||||
if (k == kh_end(weights)) {
|
||||
weight = NULL_WEIGHT;
|
||||
} else {
|
||||
weight = kh_value(weights, k);
|
||||
}
|
||||
|
||||
weight.total += (iter - weight.last_updated) * weight.value;
|
||||
weight.last_updated = iter;
|
||||
weight.value += value;
|
||||
|
||||
int ret;
|
||||
k = kh_put(class_weights, weights, class_id, &ret);
|
||||
if (ret < 0) return false;
|
||||
kh_value(weights, k) = weight;
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
static inline bool averaged_perceptron_trainer_update_feature(averaged_perceptron_trainer_t *self, uint32_t feature_id, uint32_t guess, uint32_t truth, double value) {
|
||||
bool add_if_missing = true;
|
||||
|
||||
khash_t(class_weights) *weights = averaged_perceptron_trainer_get_class_weights(self, feature_id, add_if_missing);
|
||||
|
||||
if (weights == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t updates = self->num_updates;
|
||||
|
||||
if (!averaged_perceptron_trainer_update_weight(weights, updates, guess, -1.0 * value) ||
|
||||
!averaged_perceptron_trainer_update_weight(weights, updates, truth, value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t averaged_perceptron_trainer_predict(averaged_perceptron_trainer_t *self, cstring_array *features) {
|
||||
double_array *scores = self->scores;
|
||||
size_t num_classes = (size_t)self->num_classes;
|
||||
|
||||
uint32_t i = 0;
|
||||
char *feature = NULL;
|
||||
bool add_if_missing = false;
|
||||
uint32_t feature_id;
|
||||
|
||||
khash_t(class_weights) *weights;
|
||||
uint32_t class_id;
|
||||
class_weight_t weight;
|
||||
|
||||
if (scores->m < num_classes) {
|
||||
double_array_resize(scores, num_classes);
|
||||
}
|
||||
|
||||
if (scores->n < num_classes) {
|
||||
scores->n = num_classes;
|
||||
}
|
||||
|
||||
double_array_set(scores->a, scores->n, 0.0);
|
||||
|
||||
cstring_array_foreach(features, i, feature, {
|
||||
if (!averaged_perceptron_trainer_get_feature_id(self, feature, &feature_id, add_if_missing)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
weights = averaged_perceptron_trainer_get_class_weights(self, feature_id, add_if_missing);
|
||||
|
||||
if (weights == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
kh_foreach(weights, class_id, weight, {
|
||||
scores->a[class_id] += weight.value;
|
||||
})
|
||||
})
|
||||
|
||||
int64_t max_score = double_array_argmax(scores->a, scores->n);
|
||||
|
||||
return (uint32_t)max_score;
|
||||
}
|
||||
|
||||
bool averaged_perceptron_trainer_update(averaged_perceptron_trainer_t *self, uint32_t guess, uint32_t truth, cstring_array *features) {
|
||||
uint32_t i = 0;
|
||||
char *feature = NULL;
|
||||
uint32_t feature_id;
|
||||
bool add_if_missing = true;
|
||||
|
||||
cstring_array_foreach(features, i, feature, {
|
||||
if (!averaged_perceptron_trainer_get_feature_id(self, feature, &feature_id, add_if_missing)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!averaged_perceptron_trainer_update_feature(self, feature_id, guess, truth, 1.0)) {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
|
||||
self->num_updates++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool averaged_perceptron_trainer_update_counts(averaged_perceptron_trainer_t *self, uint32_t guess, uint32_t truth, khash_t(str_uint32) *feature_counts) {
|
||||
const char *feature;
|
||||
uint32_t feature_id;
|
||||
uint32_t count;
|
||||
bool add_if_missing = true;
|
||||
|
||||
kh_foreach(feature_counts, feature, count, {
|
||||
if (!averaged_perceptron_trainer_get_feature_id(self, (char *)feature, &feature_id, add_if_missing)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!averaged_perceptron_trainer_update_feature(self, feature_id, guess, truth, (double)count)) {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
|
||||
self->num_updates++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *self, cstring_array *features, char *label) {
|
||||
uint32_t truth;
|
||||
bool add_if_missing = true;
|
||||
|
||||
if (!averaged_perceptron_trainer_get_class_id(self, label, &truth, add_if_missing)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t guess = averaged_perceptron_trainer_predict(self, features);
|
||||
|
||||
// Online error-driven learning, only needs to update weights when it gets a wrong answer, making training fast
|
||||
if (guess != truth) {
|
||||
self->num_errors++;
|
||||
return averaged_perceptron_trainer_update(self, guess, truth, features);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) {
|
||||
averaged_perceptron_trainer_t *self = malloc(sizeof(averaged_perceptron_trainer_t));
|
||||
|
||||
if (self == NULL) return NULL;
|
||||
|
||||
self->num_features = 0;
|
||||
self->num_classes = 0;
|
||||
self->num_updates = 0;
|
||||
self->num_errors = 0;
|
||||
|
||||
self->features = trie_new();
|
||||
if (self->features == NULL) {
|
||||
goto exit_trainer_created;
|
||||
}
|
||||
|
||||
self->classes = kh_init(str_uint32);
|
||||
if (self->classes == NULL) {
|
||||
goto exit_trainer_created;
|
||||
}
|
||||
|
||||
self->class_strings = cstring_array_new();
|
||||
if (self->class_strings == NULL) {
|
||||
goto exit_trainer_created;
|
||||
}
|
||||
|
||||
self->weights = kh_init(feature_class_weights);
|
||||
|
||||
if (self->weights == NULL) {
|
||||
goto exit_trainer_created;
|
||||
}
|
||||
|
||||
self->scores = double_array_new();
|
||||
|
||||
return self;
|
||||
|
||||
exit_trainer_created:
|
||||
averaged_perceptron_trainer_destroy(self);
|
||||
return NULL;
|
||||
}
|
||||
75
src/averaged_perceptron_trainer.h
Normal file
75
src/averaged_perceptron_trainer.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
averaged_perceptron_trainer.h
|
||||
-----------------------------
|
||||
|
||||
Trainer for a generic averaged perceptron model.
|
||||
|
||||
The averaged perceptron uses a simple online error-driven
|
||||
learning algorithm. Given some features and the true label,
|
||||
it predicts the expected label under the current weights. If
|
||||
it guess correctly, there's nothing to do and it moves
|
||||
on to the next example. If it predicted the wrong answer, it
|
||||
makes the following updates to its weights:
|
||||
|
||||
weights[feature][predicted] -= 1.0
|
||||
weights[feature][actual] += 1.0
|
||||
|
||||
This seems overly simplistic, and it is. This is the regular
|
||||
perceptron update rule. On the more difficult cases, this model
|
||||
would tend to overfit by spending a lot of time fiddling with the
|
||||
weights for the few cases it got wrong and building the whole model
|
||||
around those few cases. The averaged perceptron is one way to account
|
||||
for this and build a more robust model.
|
||||
|
||||
|
||||
Paper: [Collins, 2002] Discriminative Training Methods for Hidden Markov Models:
|
||||
Theory and Experiments with Perceptron Algorithms
|
||||
|
||||
Link: http://www.cs.columbia.edu/~mcollins/papers/tagperc.pdf
|
||||
*/
|
||||
#ifndef AVERAGED_PERCEPTRON_TRAINER_H
|
||||
#define AVERAGED_PERCEPTRON_TRAINER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "averaged_perceptron.h"
|
||||
#include "collections.h"
|
||||
#include "string_utils.h"
|
||||
#include "trie.h"
|
||||
|
||||
typedef struct class_weight {
|
||||
double value;
|
||||
double total;
|
||||
uint64_t last_updated;
|
||||
} class_weight_t;
|
||||
|
||||
#define NULL_WEIGHT (class_weight_t){0.0, 0.0, 0}
|
||||
|
||||
KHASH_MAP_INIT_INT(class_weights, class_weight_t)
|
||||
|
||||
KHASH_MAP_INIT_INT(feature_class_weights, khash_t(class_weights) *)
|
||||
|
||||
typedef struct averaged_perceptron_trainer {
|
||||
uint32_t num_features;
|
||||
uint32_t num_classes;
|
||||
uint64_t num_updates;
|
||||
uint64_t num_errors;
|
||||
trie_t *features;
|
||||
khash_t(str_uint32) *classes;
|
||||
cstring_array *class_strings;
|
||||
// {feature_id => {class_id => class_weight_t}}
|
||||
khash_t(feature_class_weights) *weights;
|
||||
double_array *scores;
|
||||
} averaged_perceptron_trainer_t;
|
||||
|
||||
averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void);
|
||||
|
||||
uint32_t averaged_perceptron_trainer_predict(averaged_perceptron_trainer_t *self, cstring_array *features);
|
||||
bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *trainer, cstring_array *features, char *label);
|
||||
|
||||
averaged_perceptron_t *averaged_perceptron_trainer_finalize(averaged_perceptron_trainer_t *self);
|
||||
|
||||
void averaged_perceptron_trainer_destroy(averaged_perceptron_trainer_t *self);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user