From e816b4f77e8c6a7f35207ca77282ffab3712c5b6 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 6 Jul 2016 14:56:46 -0400 Subject: [PATCH] [parser] Ignore language/country options explicitly in the parser. The purpose of these options is not to be able to create language-specific/country-specific models at some point, shouldn't be used in the global model --- src/address_parser.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/address_parser.c b/src/address_parser.c index db486410..507bee2a 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -834,6 +834,22 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c uint32_array_push(context->separators, ADDRESS_SEPARATOR_NONE); } + // This parser was trained without knowing language/country. + // If at some point we build country-specific/language-specific + // parsers, these parameters could be used to select a model. + // The language parameter does technically control which dictionaries + // are searched at the street level. It's possible with e.g. a phrase + // like "de", which can be either the German country code or a stopword + // in Spanish, that even in the case where it's being used as a country code, + // it's possible that both the street-level and admin-level phrase features + // may be working together as a kind of intercept. Depriving the model + // of the street-level phrase features by passing in a known language + // may change the decision threshold so explicitly ignore these + // options until there's a use for them (country-specific or language-specific + // parser models). + + language = NULL; + country = NULL; address_parser_context_fill(context, parser, tokenized_str, language, country); address_parser_response_t *response = NULL;