[parser] moving feature printing to averaged perceptron tagger, taking advantage of trie prefix-sharing in feature incorporating previous tags
This commit is contained in:
@@ -1691,23 +1691,9 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
|
|||||||
|
|
||||||
char *prev_label = NULL;
|
char *prev_label = NULL;
|
||||||
|
|
||||||
if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str)) {
|
if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str, parser->options.print_features)) {
|
||||||
response = address_parser_response_new();
|
response = address_parser_response_new();
|
||||||
|
|
||||||
if (parser->options.print_features) {
|
|
||||||
uint32_t fidx;
|
|
||||||
char *feature;
|
|
||||||
|
|
||||||
printf("{ ");
|
|
||||||
size_t num_features = cstring_array_num_strings(context->features);
|
|
||||||
cstring_array_foreach(context->features, fidx, feature, {
|
|
||||||
printf("%s", feature);
|
|
||||||
if (fidx < num_features - 1) printf(", ");
|
|
||||||
})
|
|
||||||
printf(" }\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
size_t num_strings = cstring_array_num_strings(tokenized_str->strings);
|
size_t num_strings = cstring_array_num_strings(tokenized_str->strings);
|
||||||
|
|
||||||
cstring_array *labels = cstring_array_new_size(num_strings);
|
cstring_array *labels = cstring_array_new_size(num_strings);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#include "averaged_perceptron_tagger.h"
|
#include "averaged_perceptron_tagger.h"
|
||||||
#include "log/log.h"
|
#include "log/log.h"
|
||||||
|
|
||||||
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized) {
|
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features) {
|
||||||
|
|
||||||
// Keep two tags of history in training
|
// Keep two tags of history in training
|
||||||
char *prev = NULL;
|
char *prev = NULL;
|
||||||
@@ -37,16 +37,27 @@ bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagg
|
|||||||
|
|
||||||
if (prev) {
|
if (prev) {
|
||||||
cstring_array_foreach(prev_tag_features, fidx, feature, {
|
cstring_array_foreach(prev_tag_features, fidx, feature, {
|
||||||
feature_array_add(features, 2, (char *)feature, prev);
|
feature_array_add(features, 3, "prev", prev, (char *)feature);
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prev2) {
|
if (prev2) {
|
||||||
cstring_array_foreach(prev2_tag_features, fidx, feature, {
|
cstring_array_foreach(prev2_tag_features, fidx, feature, {
|
||||||
feature_array_add(features, 3, (char *)feature, prev2, prev);
|
feature_array_add(features, 5, "prev2", prev2, "prev", prev, (char *)feature);
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (print_features) {
|
||||||
|
printf("{ ");
|
||||||
|
size_t num_features = cstring_array_num_strings(features);
|
||||||
|
cstring_array_foreach(features, fidx, feature, {
|
||||||
|
printf("%s", feature);
|
||||||
|
if (fidx < num_features - 1) printf(", ");
|
||||||
|
})
|
||||||
|
printf(" }\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t guess = averaged_perceptron_predict(model, features);
|
uint32_t guess = averaged_perceptron_predict(model, features);
|
||||||
char *predicted = cstring_array_get_string(model->classes, guess);
|
char *predicted = cstring_array_get_string(model->classes, guess);
|
||||||
|
|
||||||
|
|||||||
@@ -19,14 +19,12 @@ the current value.
|
|||||||
|
|
||||||
#include "averaged_perceptron.h"
|
#include "averaged_perceptron.h"
|
||||||
#include "features.h"
|
#include "features.h"
|
||||||
|
#include "tagger.h"
|
||||||
#include "tokens.h"
|
#include "tokens.h"
|
||||||
|
|
||||||
#define START "START"
|
#define START "START"
|
||||||
#define START2 "START2"
|
#define START2 "START2"
|
||||||
|
|
||||||
// Arguments: tagger, context, tokenized str, index
|
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features);
|
||||||
typedef bool (*ap_tagger_feature_function)(void *, void *, tokenized_string_t *, uint32_t);
|
|
||||||
|
|
||||||
bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
Reference in New Issue
Block a user