[parser] Averaged perceptron training

This commit is contained in:
Al
2015-09-10 10:25:52 -07:00
parent 0ddf50cb5f
commit 6a5b01b51b
4 changed files with 413 additions and 6 deletions

View File

@@ -26,7 +26,7 @@ inline double_array *averaged_perceptron_predict_scores(averaged_perceptron_t *s
continue;
}
for (int col = indptr[feature_id]; col < indptr[feature_id+1]; col++) {
for (int col = indptr[feature_id]; col < indptr[feature_id + 1]; col++) {
uint32_t class_id = indices[col];
scores[class_id] += data[col];
}

View File

@@ -15,11 +15,6 @@ very little memory.
The weights are stored as a sparse matrix in compressed sparse row format
(see sparse_matrix.h)
Paper: [Collins, 2002] Discriminative Training Methods for Hidden Markov Models:
Theory and Experiments with Perceptron Algorithms
Link: http://www.cs.columbia.edu/~mcollins/papers/tagperc.pdf
*/
#ifndef AVERAGED_PERCEPTRON_H
#define AVERAGED_PERCEPTRON_H

View File

@@ -0,0 +1,337 @@
#include "averaged_perceptron_trainer.h"
void averaged_perceptron_trainer_destroy(averaged_perceptron_trainer_t *self) {
if (self == NULL) return;
if (self->features != NULL) {
trie_destroy(self->features);
}
if (self->classes != NULL) {
kh_destroy(str_uint32, self->classes);
}
uint32_t feature_id;
khash_t(class_weights) *weights;
kh_foreach(self->weights, feature_id, weights, {
kh_destroy(class_weights, weights);
})
if (self->weights != NULL) {
kh_destroy(feature_class_weights, self->weights);
}
if (self->scores != NULL) {
double_array_destroy(self->scores);
}
free(self);
}
bool averaged_perceptron_trainer_get_class_id(averaged_perceptron_trainer_t *self, char *class_name, uint32_t *class_id, bool add_if_missing) {
khiter_t k;
khash_t(str_uint32) *classes = self->classes;
k = kh_get(str_uint32, classes, class_name);
if (k != kh_end(classes)) {
*class_id = kh_value(classes, k);
return true;
} else if (add_if_missing) {
uint32_t new_id = kh_size(classes);
int ret;
k = kh_put(str_uint32, classes, class_name, &ret);
kh_value(classes, k) = new_id;
*class_id = new_id;
cstring_array_add_string(self->class_strings, class_name);
self->num_classes++;
return true;
}
return false;
}
bool averaged_perceptron_trainer_get_feature_id(averaged_perceptron_trainer_t *self, char *feature, uint32_t *feature_id, bool add_if_missing) {
trie_t *features = self->features;
bool in_trie = trie_get_data(features, feature, feature_id);
if (add_if_missing && !in_trie) {
uint32_t new_id = features->num_keys;
*feature_id = new_id;
if (!trie_add(features, feature, new_id)) {
return false;
}
self->num_features++;
return true;
} else if (in_trie) {
return true;
}
return false;
}
averaged_perceptron_t *averaged_perceptron_trainer_finalize(averaged_perceptron_trainer_t *self) {
if (self == NULL || self->num_classes == 0) return NULL;
sparse_matrix_t *averaged_weights = sparse_matrix_new();
uint32_t class_id;
class_weight_t weight;
uint64_t updates = self->num_updates;
khash_t(class_weights) *weights;
for (uint32_t feature_id = 0; feature_id < self->num_features; feature_id++) {
khiter_t k;
k = kh_get(feature_class_weights, self->weights, feature_id);
if (k == kh_end(self->weights)) {
sparse_matrix_destroy(averaged_weights);
return NULL;
}
weights = kh_value(self->weights, k);
uint32_t class_id;
kh_foreach(weights, class_id, weight, {
weight.total += (updates - weight.last_updated) * weight.value;
double value = weight.total / updates;
sparse_matrix_append(averaged_weights, class_id, value);
})
sparse_matrix_finalize_row(averaged_weights);
}
averaged_perceptron_t *perceptron = malloc(sizeof(averaged_perceptron_t));
perceptron->weights = averaged_weights;
perceptron->num_features = self->num_features;
perceptron->num_classes = self->num_classes;
perceptron->scores = double_array_new_zeros(perceptron->num_classes);
// Set our pointers to NULL so they don't get free'd on destroy
perceptron->classes = self->class_strings;
self->class_strings = NULL;
perceptron->features = self->features;
self->features = NULL;
averaged_perceptron_trainer_destroy(self);
return perceptron;
}
khash_t(class_weights) *averaged_perceptron_trainer_get_class_weights(averaged_perceptron_trainer_t *self, uint32_t feature_id, bool add_if_missing) {
khiter_t k;
k = kh_get(feature_class_weights, self->weights, feature_id);
if (k != kh_end(self->weights)) {
return kh_value(self->weights, k);
} else if (add_if_missing) {
khash_t(class_weights) *weights = kh_init(class_weights);
int ret;
k = kh_put(feature_class_weights, self->weights, feature_id, &ret);
if (ret < 0) {
kh_destroy(class_weights, weights);
return NULL;
}
kh_value(self->weights, k) = weights;
return weights;
}
return NULL;
}
static inline bool averaged_perceptron_trainer_update_weight(khash_t(class_weights) *weights, uint64_t iter, uint32_t class_id, double value) {
class_weight_t weight;
size_t index;
khiter_t k;
k = kh_get(class_weights, weights, class_id);
if (k == kh_end(weights)) {
weight = NULL_WEIGHT;
} else {
weight = kh_value(weights, k);
}
weight.total += (iter - weight.last_updated) * weight.value;
weight.last_updated = iter;
weight.value += value;
int ret;
k = kh_put(class_weights, weights, class_id, &ret);
if (ret < 0) return false;
kh_value(weights, k) = weight;
return true;
}
static inline bool averaged_perceptron_trainer_update_feature(averaged_perceptron_trainer_t *self, uint32_t feature_id, uint32_t guess, uint32_t truth, double value) {
bool add_if_missing = true;
khash_t(class_weights) *weights = averaged_perceptron_trainer_get_class_weights(self, feature_id, add_if_missing);
if (weights == NULL) {
return false;
}
uint64_t updates = self->num_updates;
if (!averaged_perceptron_trainer_update_weight(weights, updates, guess, -1.0 * value) ||
!averaged_perceptron_trainer_update_weight(weights, updates, truth, value)) {
return false;
}
return true;
}
uint32_t averaged_perceptron_trainer_predict(averaged_perceptron_trainer_t *self, cstring_array *features) {
double_array *scores = self->scores;
size_t num_classes = (size_t)self->num_classes;
uint32_t i = 0;
char *feature = NULL;
bool add_if_missing = false;
uint32_t feature_id;
khash_t(class_weights) *weights;
uint32_t class_id;
class_weight_t weight;
if (scores->m < num_classes) {
double_array_resize(scores, num_classes);
}
if (scores->n < num_classes) {
scores->n = num_classes;
}
double_array_set(scores->a, scores->n, 0.0);
cstring_array_foreach(features, i, feature, {
if (!averaged_perceptron_trainer_get_feature_id(self, feature, &feature_id, add_if_missing)) {
continue;
}
weights = averaged_perceptron_trainer_get_class_weights(self, feature_id, add_if_missing);
if (weights == NULL) {
continue;
}
kh_foreach(weights, class_id, weight, {
scores->a[class_id] += weight.value;
})
})
int64_t max_score = double_array_argmax(scores->a, scores->n);
return (uint32_t)max_score;
}
bool averaged_perceptron_trainer_update(averaged_perceptron_trainer_t *self, uint32_t guess, uint32_t truth, cstring_array *features) {
uint32_t i = 0;
char *feature = NULL;
uint32_t feature_id;
bool add_if_missing = true;
cstring_array_foreach(features, i, feature, {
if (!averaged_perceptron_trainer_get_feature_id(self, feature, &feature_id, add_if_missing)) {
return false;
}
if (!averaged_perceptron_trainer_update_feature(self, feature_id, guess, truth, 1.0)) {
return false;
}
})
self->num_updates++;
return true;
}
bool averaged_perceptron_trainer_update_counts(averaged_perceptron_trainer_t *self, uint32_t guess, uint32_t truth, khash_t(str_uint32) *feature_counts) {
const char *feature;
uint32_t feature_id;
uint32_t count;
bool add_if_missing = true;
kh_foreach(feature_counts, feature, count, {
if (!averaged_perceptron_trainer_get_feature_id(self, (char *)feature, &feature_id, add_if_missing)) {
return false;
}
if (!averaged_perceptron_trainer_update_feature(self, feature_id, guess, truth, (double)count)) {
return false;
}
})
self->num_updates++;
return true;
}
bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *self, cstring_array *features, char *label) {
uint32_t truth;
bool add_if_missing = true;
if (!averaged_perceptron_trainer_get_class_id(self, label, &truth, add_if_missing)) {
return false;
}
uint32_t guess = averaged_perceptron_trainer_predict(self, features);
// Online error-driven learning, only needs to update weights when it gets a wrong answer, making training fast
if (guess != truth) {
self->num_errors++;
return averaged_perceptron_trainer_update(self, guess, truth, features);
}
return true;
}
averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void) {
averaged_perceptron_trainer_t *self = malloc(sizeof(averaged_perceptron_trainer_t));
if (self == NULL) return NULL;
self->num_features = 0;
self->num_classes = 0;
self->num_updates = 0;
self->num_errors = 0;
self->features = trie_new();
if (self->features == NULL) {
goto exit_trainer_created;
}
self->classes = kh_init(str_uint32);
if (self->classes == NULL) {
goto exit_trainer_created;
}
self->class_strings = cstring_array_new();
if (self->class_strings == NULL) {
goto exit_trainer_created;
}
self->weights = kh_init(feature_class_weights);
if (self->weights == NULL) {
goto exit_trainer_created;
}
self->scores = double_array_new();
return self;
exit_trainer_created:
averaged_perceptron_trainer_destroy(self);
return NULL;
}

View File

@@ -0,0 +1,75 @@
/*
averaged_perceptron_trainer.h
-----------------------------
Trainer for a generic averaged perceptron model.
The averaged perceptron uses a simple online error-driven
learning algorithm. Given some features and the true label,
it predicts the expected label under the current weights. If
it guess correctly, there's nothing to do and it moves
on to the next example. If it predicted the wrong answer, it
makes the following updates to its weights:
weights[feature][predicted] -= 1.0
weights[feature][actual] += 1.0
This seems overly simplistic, and it is. This is the regular
perceptron update rule. On the more difficult cases, this model
would tend to overfit by spending a lot of time fiddling with the
weights for the few cases it got wrong and building the whole model
around those few cases. The averaged perceptron is one way to account
for this and build a more robust model.
Paper: [Collins, 2002] Discriminative Training Methods for Hidden Markov Models:
Theory and Experiments with Perceptron Algorithms
Link: http://www.cs.columbia.edu/~mcollins/papers/tagperc.pdf
*/
#ifndef AVERAGED_PERCEPTRON_TRAINER_H
#define AVERAGED_PERCEPTRON_TRAINER_H
#include <stdio.h>
#include <stdlib.h>
#include "averaged_perceptron.h"
#include "collections.h"
#include "string_utils.h"
#include "trie.h"
typedef struct class_weight {
double value;
double total;
uint64_t last_updated;
} class_weight_t;
#define NULL_WEIGHT (class_weight_t){0.0, 0.0, 0}
KHASH_MAP_INIT_INT(class_weights, class_weight_t)
KHASH_MAP_INIT_INT(feature_class_weights, khash_t(class_weights) *)
typedef struct averaged_perceptron_trainer {
uint32_t num_features;
uint32_t num_classes;
uint64_t num_updates;
uint64_t num_errors;
trie_t *features;
khash_t(str_uint32) *classes;
cstring_array *class_strings;
// {feature_id => {class_id => class_weight_t}}
khash_t(feature_class_weights) *weights;
double_array *scores;
} averaged_perceptron_trainer_t;
averaged_perceptron_trainer_t *averaged_perceptron_trainer_new(void);
uint32_t averaged_perceptron_trainer_predict(averaged_perceptron_trainer_t *self, cstring_array *features);
bool averaged_perceptron_trainer_train_example(averaged_perceptron_trainer_t *trainer, cstring_array *features, char *label);
averaged_perceptron_t *averaged_perceptron_trainer_finalize(averaged_perceptron_trainer_t *self);
void averaged_perceptron_trainer_destroy(averaged_perceptron_trainer_t *self);
#endif