[fix] handle multiple separators (like parens used in https://www.openstreetmap.org/node/244081449). Creates bad trie entries otherwise, which affect more than just that toponym

This commit is contained in:
Al
2017-03-18 06:06:56 -04:00
parent c67678087f
commit 3b9b43f1b5
2 changed files with 2 additions and 0 deletions

View File

@@ -1664,6 +1664,7 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
for (size_t i = 0; i < tokens->n; i++) {
token_t token = tokens->a[i];
if (ADDRESS_PARSER_IS_SEPARATOR(token.type)) {
uint32_array_pop(context->separators);
uint32_array_push(context->separators, ADDRESS_SEPARATOR_FIELD_INTERNAL);
continue;
} else if (ADDRESS_PARSER_IS_IGNORABLE(token.type)) {

View File

@@ -136,6 +136,7 @@ bool address_parser_data_set_tokenize_line(address_parser_data_set_t *self, char
if (token.len == expected_len) {
if (ADDRESS_PARSER_IS_SEPARATOR(token.type)) {
uint32_array_pop(separators);
uint32_array_push(separators, ADDRESS_SEPARATOR_FIELD_INTERNAL);
continue;
} else if (ADDRESS_PARSER_IS_IGNORABLE(token.type)) {