From 97906c86a8d44df5b2a810e3fdded7fc7adec76d Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 19 Dec 2015 02:10:41 -0500 Subject: [PATCH] [fix] Strip punctuation in final output in cases where there are no expansions --- src/libpostal.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/libpostal.c b/src/libpostal.c index 7de6a79c..d61f3985 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -353,8 +353,28 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) { } else { - string_tree_add_string(tree, str); - string_tree_finalize_token(tree); + + for (int j = 0; j < tokens->n; j++) { + token_t token = tokens->a[j]; + if (is_punctuation(token.type)) { + last_was_punctuation = true; + continue; + } + + if (token.type != WHITESPACE) { + if (last_was_punctuation) { + string_tree_add_string(tree, " "); + string_tree_finalize_token(tree); + } + + string_tree_add_string_len(tree, str + token.offset, token.len); + } else { + string_tree_add_string(tree, " "); + } + + last_was_punctuation = false; + string_tree_finalize_token(tree); + } } if (phrases != NULL) {