From ef098fd2e79c1f915c0094dc2b9b7f379abc85bd Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 24 Nov 2017 15:42:50 -0500 Subject: [PATCH] [numex] implementing the numex concat_only_if_number left context, which helps in the case of e.g. Columbus, OH in #271 --- src/numex.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/numex.c b/src/numex.c index 107768fa..b8a0f0e7 100644 --- a/src/numex.c +++ b/src/numex.c @@ -709,6 +709,8 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) { bool possible_complete_token = false; bool complete_token = false; + bool prev_rule_was_number = false; + log_debug("Converting numex for str=%s, lang=%s\n", str, lang); while (idx < len) { @@ -851,8 +853,27 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) { number_finished = true; advance_index = false; state = start_state; + prev_rule_was_number = true; rule = prev_rule = NUMEX_NULL_RULE; prev_result_len = 0; + } else if (rule.left_context_type == NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER && !prev_rule_was_number) { + log_debug("LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER, no context\n"); + prev_rule = rule; + last_was_separator = false; + rule = NUMEX_NULL_RULE; + prev_result_len = result.len; + result = NULL_NUMEX_RESULT; + stopword_phrase = NULL_PHRASE; + state.state = NUMEX_SEARCH_STATE_SKIP_TOKEN; + last_was_stopword = false; + continue; + } else if (rule.left_context_type == NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER && prev_rule_was_number) { + last_was_separator = false; + number_finished = true; + state = start_state; + last_was_stopword = false; + prev_rule_was_number = true; + log_debug("LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER, value = %" PRId64 "\n", result.value); } else if (rule.rule_type != NUMEX_STOPWORD) { result.value = rule.value; log_debug("Got number, result.value=%" PRId64 "\n", result.value); @@ -864,6 +885,9 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) { continue; } + + prev_rule_was_number = prev_rule_was_number || prev_rule.rule_type != NUMEX_NULL; + if (rule.rule_type != NUMEX_STOPWORD) { prev_rule = rule; prev_result_len = result.len; @@ -895,7 +919,6 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) { if (prev_rule.rule_type != NUMEX_NULL) { number_finished = true; } - } if (!set_rule) { @@ -1137,7 +1160,6 @@ char *replace_numeric_expressions(char *str, char *lang) { char_array_append(replacement, ordinal_suffix); } } - start = result.start + result.len; }