[parser] moving feature printing to averaged perceptron tagger, taking advantage of trie prefix-sharing in feature incorporating previous tags
This commit is contained in:
@@ -1691,23 +1691,9 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
|
||||
|
||||
char *prev_label = NULL;
|
||||
|
||||
if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str)) {
|
||||
if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str, parser->options.print_features)) {
|
||||
response = address_parser_response_new();
|
||||
|
||||
if (parser->options.print_features) {
|
||||
uint32_t fidx;
|
||||
char *feature;
|
||||
|
||||
printf("{ ");
|
||||
size_t num_features = cstring_array_num_strings(context->features);
|
||||
cstring_array_foreach(context->features, fidx, feature, {
|
||||
printf("%s", feature);
|
||||
if (fidx < num_features - 1) printf(", ");
|
||||
})
|
||||
printf(" }\n");
|
||||
}
|
||||
|
||||
|
||||
size_t num_strings = cstring_array_num_strings(tokenized_str->strings);
|
||||
|
||||
cstring_array *labels = cstring_array_new_size(num_strings);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "averaged_perceptron_tagger.h"
|
||||
#include "log/log.h"
|
||||
|
||||
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized) {
|
||||
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features) {
|
||||
|
||||
// Keep two tags of history in training
|
||||
char *prev = NULL;
|
||||
@@ -37,16 +37,27 @@ bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagg
|
||||
|
||||
if (prev) {
|
||||
cstring_array_foreach(prev_tag_features, fidx, feature, {
|
||||
feature_array_add(features, 2, (char *)feature, prev);
|
||||
feature_array_add(features, 3, "prev", prev, (char *)feature);
|
||||
})
|
||||
}
|
||||
|
||||
if (prev2) {
|
||||
cstring_array_foreach(prev2_tag_features, fidx, feature, {
|
||||
feature_array_add(features, 3, (char *)feature, prev2, prev);
|
||||
feature_array_add(features, 5, "prev2", prev2, "prev", prev, (char *)feature);
|
||||
})
|
||||
}
|
||||
|
||||
if (print_features) {
|
||||
printf("{ ");
|
||||
size_t num_features = cstring_array_num_strings(features);
|
||||
cstring_array_foreach(features, fidx, feature, {
|
||||
printf("%s", feature);
|
||||
if (fidx < num_features - 1) printf(", ");
|
||||
})
|
||||
printf(" }\n");
|
||||
}
|
||||
|
||||
|
||||
uint32_t guess = averaged_perceptron_predict(model, features);
|
||||
char *predicted = cstring_array_get_string(model->classes, guess);
|
||||
|
||||
|
||||
@@ -19,14 +19,12 @@ the current value.
|
||||
|
||||
#include "averaged_perceptron.h"
|
||||
#include "features.h"
|
||||
#include "tagger.h"
|
||||
#include "tokens.h"
|
||||
|
||||
#define START "START"
|
||||
#define START2 "START2"
|
||||
|
||||
// Arguments: tagger, context, tokenized str, index
|
||||
typedef bool (*ap_tagger_feature_function)(void *, void *, tokenized_string_t *, uint32_t);
|
||||
|
||||
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized);
|
||||
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user