From 4c03e563e045a16fd96db531828ca6220c983502 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 8 Mar 2017 15:40:12 -0500 Subject: [PATCH] [parser] for the min updates method to work, the feature that have not yet reached the min_updates threshold also need to be ignored when scoring, that way the model has to perform without those features, and should make more updates if they're relevant --- src/averaged_perceptron_trainer.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/averaged_perceptron_trainer.c b/src/averaged_perceptron_trainer.c index a82c3721..c150af67 100644 --- a/src/averaged_perceptron_trainer.c +++ b/src/averaged_perceptron_trainer.c @@ -316,20 +316,27 @@ uint32_t averaged_perceptron_trainer_predict(averaged_perceptron_trainer_t *self double_array_zero(scores->a, scores->n); + uint64_t *update_counts = self->update_counts->a; + cstring_array_foreach(features, i, feature, { if (!averaged_perceptron_trainer_get_feature_id(self, feature, &feature_id, add_if_missing)) { continue; } - weights = averaged_perceptron_trainer_get_class_weights(self, feature_id, add_if_missing); + uint64_t update_count = update_counts[feature_id]; + bool keep_feature = update_count >= self->min_updates; - if (weights == NULL) { - continue; + if (keep_feature) { + weights = averaged_perceptron_trainer_get_class_weights(self, feature_id, add_if_missing); + + if (weights == NULL) { + continue; + } + + kh_foreach(weights, class_id, weight, { + scores->a[class_id] += weight.value; + }) } - - kh_foreach(weights, class_id, weight, { - scores->a[class_id] += weight.value; - }) }) int64_t max_score = double_array_argmax(scores->a, scores->n);