[parsing] Using the entire phrase as the ith word

This commit is contained in:
Al
2015-12-07 01:19:38 -05:00
parent 8186e2606e
commit cfd0dc69f2

View File

@@ -702,8 +702,14 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
log_debug("word not in vocab: %s\n", original_word);
word = (token.type != NUMERIC && token.type != IDEOGRAPHIC_NUMBER) ? UNKNOWN_WORD : UNKNOWN_NUMERIC;
}
} else if (component_phrase_string != NULL) {
word = component_phrase_string;
} else if (geo_phrase_string != NULL) {
word = geo_phrase_string;
}
if (prev != NULL && last_index == i - 1) {
// Previous tag and current word
feature_array_add(features, 3, "i-1 tag+word", prev, word);
@@ -727,7 +733,10 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
// Previous word
feature_array_add(features, 2, "i-1 word", prev_word);
feature_array_add(features, 3, "i-1 tag+i-1 word", prev, prev_word);
if (last_index == i - 1) {
feature_array_add(features, 3, "i-1 tag+i-1 word", prev, prev_word);
}
// Previous word and current word
feature_array_add(features, 3, "i-1 word+word", prev_word, word);