diff --git a/src/address_parser.c b/src/address_parser.c index 906d13a6..c486de84 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -1691,23 +1691,9 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c char *prev_label = NULL; - if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str)) { + if (averaged_perceptron_tagger_predict(model, parser, context, context->features, context->prev_tag_features, context->prev2_tag_features, token_labels, &address_parser_features, tokenized_str, parser->options.print_features)) { response = address_parser_response_new(); - if (parser->options.print_features) { - uint32_t fidx; - char *feature; - - printf("{ "); - size_t num_features = cstring_array_num_strings(context->features); - cstring_array_foreach(context->features, fidx, feature, { - printf("%s", feature); - if (fidx < num_features - 1) printf(", "); - }) - printf(" }\n"); - } - - size_t num_strings = cstring_array_num_strings(tokenized_str->strings); cstring_array *labels = cstring_array_new_size(num_strings); diff --git a/src/averaged_perceptron_tagger.c b/src/averaged_perceptron_tagger.c index b4b66588..6aa6d47a 100644 --- a/src/averaged_perceptron_tagger.c +++ b/src/averaged_perceptron_tagger.c @@ -1,7 +1,7 @@ #include "averaged_perceptron_tagger.h" #include "log/log.h" -bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized) { +bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features) { // Keep two tags of history in training char *prev = NULL; @@ -37,16 +37,27 @@ bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagg if (prev) { cstring_array_foreach(prev_tag_features, fidx, feature, { - feature_array_add(features, 2, (char *)feature, prev); + feature_array_add(features, 3, "prev", prev, (char *)feature); }) } if (prev2) { cstring_array_foreach(prev2_tag_features, fidx, feature, { - feature_array_add(features, 3, (char *)feature, prev2, prev); + feature_array_add(features, 5, "prev2", prev2, "prev", prev, (char *)feature); }) } + if (print_features) { + printf("{ "); + size_t num_features = cstring_array_num_strings(features); + cstring_array_foreach(features, fidx, feature, { + printf("%s", feature); + if (fidx < num_features - 1) printf(", "); + }) + printf(" }\n"); + } + + uint32_t guess = averaged_perceptron_predict(model, features); char *predicted = cstring_array_get_string(model->classes, guess); diff --git a/src/averaged_perceptron_tagger.h b/src/averaged_perceptron_tagger.h index eae2e40a..7a43a045 100644 --- a/src/averaged_perceptron_tagger.h +++ b/src/averaged_perceptron_tagger.h @@ -19,14 +19,12 @@ the current value. #include "averaged_perceptron.h" #include "features.h" +#include "tagger.h" #include "tokens.h" #define START "START" #define START2 "START2" -// Arguments: tagger, context, tokenized str, index -typedef bool (*ap_tagger_feature_function)(void *, void *, tokenized_string_t *, uint32_t); - -bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, ap_tagger_feature_function feature_function, tokenized_string_t *tokenized); +bool averaged_perceptron_tagger_predict(averaged_perceptron_t *model, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *prev2_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features); #endif \ No newline at end of file