[expand] Adding exception for a few types of special punctuation (ampersand, plus, pound sign) which should be left in the original string and separated by whitespace. Closes #84. Closes #85

This commit is contained in:
Al
2016-07-17 15:02:33 -04:00
parent d8a0e19a32
commit 83381e9d8a
2 changed files with 6 additions and 4 deletions

View File

@@ -238,13 +238,13 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
for (int j = start; j < end; j++) {
log_debug("Adding token %d\n", j);
token_t token = tokens->a[j];
if (is_punctuation(token.type)) {
if (is_punctuation(token.type) && !is_special_punctuation(token.type)) {
last_was_punctuation = true;
continue;
}
if (token.type != WHITESPACE) {
if (phrase.start > 0 && last_was_punctuation && !last_added_was_whitespace) {
if ((phrase.start > 0 && last_was_punctuation && !last_added_was_whitespace) || (is_special_punctuation(token.type))) {
string_tree_add_string(tree, " ");
string_tree_finalize_token(tree);
}
@@ -260,7 +260,7 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
continue;
}
last_was_punctuation = false;
last_was_punctuation = is_special_punctuation(token.type);
string_tree_finalize_token(tree);
}
@@ -787,7 +787,7 @@ static inline void add_normalized_strings_tokenized(string_tree_t *tree, char *s
token_t token = tokens->a[i];
bool have_phrase = false;
if (is_special_token(token.type)) {
if (is_special_token(token.type) || is_special_punctuation(token.type)) {
string_tree_add_string_len(tree, str + token.offset, token.len);
string_tree_finalize_token(tree);
continue;

View File

@@ -69,6 +69,8 @@
#define is_punctuation(type) ((type) >= PERIOD && (type) < OTHER)
#define is_special_punctuation(type) ((type) == AMPERSAND || (type) == PLUS || (type) == POUND)
#define is_special_token(type) ((type) == EMAIL || (type) == URL || (type) == US_PHONE || (type) == INTL_PHONE)
#define is_whitespace(type) ((type) == WHITESPACE)