From 3b9b43f1b5251b993900d3c94aa1cbf9c14d5c07 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 18 Mar 2017 06:06:56 -0400 Subject: [PATCH] [fix] handle multiple separators (like parens used in https://www.openstreetmap.org/node/244081449). Creates bad trie entries otherwise, which affect more than just that toponym --- src/address_parser.c | 1 + src/address_parser_io.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/address_parser.c b/src/address_parser.c index bd825ff5..d499b328 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -1664,6 +1664,7 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c for (size_t i = 0; i < tokens->n; i++) { token_t token = tokens->a[i]; if (ADDRESS_PARSER_IS_SEPARATOR(token.type)) { + uint32_array_pop(context->separators); uint32_array_push(context->separators, ADDRESS_SEPARATOR_FIELD_INTERNAL); continue; } else if (ADDRESS_PARSER_IS_IGNORABLE(token.type)) { diff --git a/src/address_parser_io.c b/src/address_parser_io.c index ca92f021..494fd5bd 100644 --- a/src/address_parser_io.c +++ b/src/address_parser_io.c @@ -136,6 +136,7 @@ bool address_parser_data_set_tokenize_line(address_parser_data_set_t *self, char if (token.len == expected_len) { if (ADDRESS_PARSER_IS_SEPARATOR(token.type)) { + uint32_array_pop(separators); uint32_array_push(separators, ADDRESS_SEPARATOR_FIELD_INTERNAL); continue; } else if (ADDRESS_PARSER_IS_IGNORABLE(token.type)) {