[parser] moving feature printing to averaged perceptron tagger, taking advantage of trie prefix-sharing in feature incorporating previous tags

This commit is contained in:
Al
2017-03-06 20:32:50 -05:00
parent 839a13577d
commit 754f22c79a
3 changed files with 17 additions and 22 deletions

View File

@@ -1691,23 +1691,9 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
char *prev_label = NULL;
if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str)) {
if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str, parser->options.print_features)) {
response = address_parser_response_new();
if (parser->options.print_features) {
uint32_t fidx;
char *feature;
printf("{ ");
size_t num_features = cstring_array_num_strings(context->features);
cstring_array_foreach(context->features, fidx, feature, {
printf("%s", feature);
if (fidx < num_features - 1) printf(", ");
})
printf(" }\n");
}
size_t num_strings = cstring_array_num_strings(tokenized_str->strings);
cstring_array *labels = cstring_array_new_size(num_strings);

View File

@@ -1,7 +1,7 @@
#include "averaged_perceptron_tagger.h"
#include "log/log.h"
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized) {
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features) {
// Keep two tags of history in training
char *prev = NULL;
@@ -37,16 +37,27 @@ bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagg
if (prev) {
cstring_array_foreach(prev_tag_features, fidx, feature, {
feature_array_add(features, 2, (char *)feature, prev);
feature_array_add(features, 3, "prev", prev, (char *)feature);
})
}
if (prev2) {
cstring_array_foreach(prev2_tag_features, fidx, feature, {
feature_array_add(features, 3, (char *)feature, prev2, prev);
feature_array_add(features, 5, "prev2", prev2, "prev", prev, (char *)feature);
})
}
if (print_features) {
printf("{ ");
size_t num_features = cstring_array_num_strings(features);
cstring_array_foreach(features, fidx, feature, {
printf("%s", feature);
if (fidx < num_features - 1) printf(", ");
})
printf(" }\n");
}
uint32_t guess = averaged_perceptron_predict(model, features);
char *predicted = cstring_array_get_string(model->classes, guess);

View File

@@ -19,14 +19,12 @@ the current value.
#include "averaged_perceptron.h"
#include "features.h"
#include "tagger.h"
#include "tokens.h"
#define START "START"
#define START2 "START2"
// Arguments: tagger, context, tokenized str, index
typedef bool (*ap_tagger_feature_function)(void *, void *, tokenized_string_t *, uint32_t);
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized);
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features);
#endif