[parser] Ignore language/country options explicitly in the parser. The purpose of these options is not to be able to create language-specific/country-specific models at some point, shouldn't be used in the global model

This commit is contained in:
Al
2016-07-06 14:56:46 -04:00
parent 58a5dbe7e0
commit e816b4f77e

View File

@@ -834,6 +834,22 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
uint32_array_push(context->separators, ADDRESS_SEPARATOR_NONE);
}
// This parser was trained without knowing language/country.
// If at some point we build country-specific/language-specific
// parsers, these parameters could be used to select a model.
// The language parameter does technically control which dictionaries
// are searched at the street level. It's possible with e.g. a phrase
// like "de", which can be either the German country code or a stopword
// in Spanish, that even in the case where it's being used as a country code,
// it's possible that both the street-level and admin-level phrase features
// may be working together as a kind of intercept. Depriving the model
// of the street-level phrase features by passing in a known language
// may change the decision threshold so explicitly ignore these
// options until there's a use for them (country-specific or language-specific
// parser models).
language = NULL;
country = NULL;
address_parser_context_fill(context, parser, tokenized_str, language, country);
address_parser_response_t *response = NULL;