[parser] parser only inserts spaces in the output if there were spaces (or other ignorable tokens) in the normalized input
This commit is contained in:
@@ -1712,6 +1712,7 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
|
|||||||
|
|
||||||
// If the whole input string is a single known phrase at the SUBURB level or higher, bypass sequence prediction altogether
|
// If the whole input string is a single known phrase at the SUBURB level or higher, bypass sequence prediction altogether
|
||||||
phrase_t only_phrase = NULL_PHRASE;
|
phrase_t only_phrase = NULL_PHRASE;
|
||||||
|
token_t token, prev_token;
|
||||||
bool is_postal = false;
|
bool is_postal = false;
|
||||||
if (context->component_phrases->n == 1) {
|
if (context->component_phrases->n == 1) {
|
||||||
only_phrase = context->component_phrases->a[0];
|
only_phrase = context->component_phrases->a[0];
|
||||||
@@ -1784,8 +1785,11 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
|
|||||||
cstring_array *labels = cstring_array_new_size(num_strings);
|
cstring_array *labels = cstring_array_new_size(num_strings);
|
||||||
cstring_array *components = cstring_array_new_size(strlen(address) + num_strings);
|
cstring_array *components = cstring_array_new_size(strlen(address) + num_strings);
|
||||||
|
|
||||||
|
token_t *tokens = tokenized_str->tokens->a;
|
||||||
|
|
||||||
for (size_t i = 0; i < num_strings; i++) {
|
for (size_t i = 0; i < num_strings; i++) {
|
||||||
char *str = tokenized_string_get_token(tokenized_str, i);
|
char *str = tokenized_string_get_token(tokenized_str, i);
|
||||||
|
|
||||||
char *label = cstring_array_get_string(token_labels, i);
|
char *label = cstring_array_get_string(token_labels, i);
|
||||||
|
|
||||||
if (prev_label == NULL || strcmp(label, prev_label) != 0) {
|
if (prev_label == NULL || strcmp(label, prev_label) != 0) {
|
||||||
@@ -1795,7 +1799,11 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (prev_label != NULL && strcmp(label, prev_label) == 0) {
|
if (prev_label != NULL && strcmp(label, prev_label) == 0) {
|
||||||
cstring_array_cat_string(components, " ");
|
token = tokens[i];
|
||||||
|
prev_token = tokens[i - 1];
|
||||||
|
if (token.offset > prev_token.offset + prev_token.len) {
|
||||||
|
cstring_array_cat_string(components, " ");
|
||||||
|
}
|
||||||
cstring_array_cat_string(components, str);
|
cstring_array_cat_string(components, str);
|
||||||
} else {
|
} else {
|
||||||
cstring_array_append_string(components, str);
|
cstring_array_append_string(components, str);
|
||||||
|
|||||||
Reference in New Issue
Block a user