From c6af5cc0713db5fd53390fb5e7b87b0f2b1e7112 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 28 Jul 2016 15:19:48 -0400 Subject: [PATCH] [parser] Adding country_region label to parser as a boundary component --- src/address_parser.c | 5 +++++ src/address_parser.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/address_parser.c b/src/address_parser.c index f3cf95d8..4c4b90f2 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -814,6 +814,7 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_STATE_DISTRICT, "state_district", component_phrase_string, prev2, prev); add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_STATE, "state", component_phrase_string, prev2, prev); add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_POSTAL_CODE, "postal_code", component_phrase_string, prev2, prev); + add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_COUNTRY_REGION, "country_region", component_phrase_string, prev2, prev); add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_COUNTRY, "country", component_phrase_string, prev2, prev); } @@ -823,6 +824,8 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize feature_array_add(features, 2, "commonly state", component_phrase_string); } else if (most_common == ADDRESS_PARSER_COUNTRY) { feature_array_add(features, 2, "commonly country", component_phrase_string); + } else if (most_common == ADDRESS_PARSER_COUNTRY_REGION) { + feature_array_add(features, 2, "commonly country_region", component_phrase_string); } else if (most_common == ADDRESS_PARSER_STATE_DISTRICT) { feature_array_add(features, 2, "commonly state_district", component_phrase_string); } else if (most_common == ADDRESS_PARSER_ISLAND) { @@ -1045,6 +1048,8 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c label = strdup(ADDRESS_PARSER_LABEL_STATE); } else if (most_common == ADDRESS_PARSER_COUNTRY) { label = strdup(ADDRESS_PARSER_LABEL_COUNTRY); + } else if (most_common == ADDRESS_PARSER_COUNTRY_REGION) { + label = strdup(ADDRESS_PARSER_LABEL_COUNTRY_REGION); } else if (most_common == ADDRESS_PARSER_STATE_DISTRICT) { label = strdup(ADDRESS_PARSER_LABEL_STATE_DISTRICT); } else if (most_common == ADDRESS_PARSER_SUBURB) { diff --git a/src/address_parser.h b/src/address_parser.h index 5fcd4e98..d813077c 100644 --- a/src/address_parser.h +++ b/src/address_parser.h @@ -95,6 +95,7 @@ typedef enum { ADDRESS_PARSER_ISLAND, ADDRESS_PARSER_STATE, ADDRESS_PARSER_POSTAL_CODE, + ADDRESS_PARSER_COUNTRY_REGION, ADDRESS_PARSER_COUNTRY, NUM_ADDRESS_PARSER_TYPES } address_parser_components; @@ -109,6 +110,7 @@ typedef enum { #define ADDRESS_PARSER_LABEL_ISLAND "island" #define ADDRESS_PARSER_LABEL_STATE "state" #define ADDRESS_PARSER_LABEL_POSTAL_CODE "postcode" +#define ADDRESS_PARSER_LABEL_COUNTRY "country_region" #define ADDRESS_PARSER_LABEL_COUNTRY "country" typedef union address_parser_types {