[parser] Adding country_region label to parser as a boundary component
This commit is contained in:
@@ -814,6 +814,7 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
|
|||||||
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_STATE_DISTRICT, "state_district", component_phrase_string, prev2, prev);
|
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_STATE_DISTRICT, "state_district", component_phrase_string, prev2, prev);
|
||||||
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_STATE, "state", component_phrase_string, prev2, prev);
|
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_STATE, "state", component_phrase_string, prev2, prev);
|
||||||
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_POSTAL_CODE, "postal_code", component_phrase_string, prev2, prev);
|
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_POSTAL_CODE, "postal_code", component_phrase_string, prev2, prev);
|
||||||
|
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_COUNTRY_REGION, "country_region", component_phrase_string, prev2, prev);
|
||||||
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_COUNTRY, "country", component_phrase_string, prev2, prev);
|
add_phrase_features(features, component_phrase_types, ADDRESS_COMPONENT_COUNTRY, "country", component_phrase_string, prev2, prev);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -823,6 +824,8 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
|
|||||||
feature_array_add(features, 2, "commonly state", component_phrase_string);
|
feature_array_add(features, 2, "commonly state", component_phrase_string);
|
||||||
} else if (most_common == ADDRESS_PARSER_COUNTRY) {
|
} else if (most_common == ADDRESS_PARSER_COUNTRY) {
|
||||||
feature_array_add(features, 2, "commonly country", component_phrase_string);
|
feature_array_add(features, 2, "commonly country", component_phrase_string);
|
||||||
|
} else if (most_common == ADDRESS_PARSER_COUNTRY_REGION) {
|
||||||
|
feature_array_add(features, 2, "commonly country_region", component_phrase_string);
|
||||||
} else if (most_common == ADDRESS_PARSER_STATE_DISTRICT) {
|
} else if (most_common == ADDRESS_PARSER_STATE_DISTRICT) {
|
||||||
feature_array_add(features, 2, "commonly state_district", component_phrase_string);
|
feature_array_add(features, 2, "commonly state_district", component_phrase_string);
|
||||||
} else if (most_common == ADDRESS_PARSER_ISLAND) {
|
} else if (most_common == ADDRESS_PARSER_ISLAND) {
|
||||||
@@ -1045,6 +1048,8 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
|
|||||||
label = strdup(ADDRESS_PARSER_LABEL_STATE);
|
label = strdup(ADDRESS_PARSER_LABEL_STATE);
|
||||||
} else if (most_common == ADDRESS_PARSER_COUNTRY) {
|
} else if (most_common == ADDRESS_PARSER_COUNTRY) {
|
||||||
label = strdup(ADDRESS_PARSER_LABEL_COUNTRY);
|
label = strdup(ADDRESS_PARSER_LABEL_COUNTRY);
|
||||||
|
} else if (most_common == ADDRESS_PARSER_COUNTRY_REGION) {
|
||||||
|
label = strdup(ADDRESS_PARSER_LABEL_COUNTRY_REGION);
|
||||||
} else if (most_common == ADDRESS_PARSER_STATE_DISTRICT) {
|
} else if (most_common == ADDRESS_PARSER_STATE_DISTRICT) {
|
||||||
label = strdup(ADDRESS_PARSER_LABEL_STATE_DISTRICT);
|
label = strdup(ADDRESS_PARSER_LABEL_STATE_DISTRICT);
|
||||||
} else if (most_common == ADDRESS_PARSER_SUBURB) {
|
} else if (most_common == ADDRESS_PARSER_SUBURB) {
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ typedef enum {
|
|||||||
ADDRESS_PARSER_ISLAND,
|
ADDRESS_PARSER_ISLAND,
|
||||||
ADDRESS_PARSER_STATE,
|
ADDRESS_PARSER_STATE,
|
||||||
ADDRESS_PARSER_POSTAL_CODE,
|
ADDRESS_PARSER_POSTAL_CODE,
|
||||||
|
ADDRESS_PARSER_COUNTRY_REGION,
|
||||||
ADDRESS_PARSER_COUNTRY,
|
ADDRESS_PARSER_COUNTRY,
|
||||||
NUM_ADDRESS_PARSER_TYPES
|
NUM_ADDRESS_PARSER_TYPES
|
||||||
} address_parser_components;
|
} address_parser_components;
|
||||||
@@ -109,6 +110,7 @@ typedef enum {
|
|||||||
#define ADDRESS_PARSER_LABEL_ISLAND "island"
|
#define ADDRESS_PARSER_LABEL_ISLAND "island"
|
||||||
#define ADDRESS_PARSER_LABEL_STATE "state"
|
#define ADDRESS_PARSER_LABEL_STATE "state"
|
||||||
#define ADDRESS_PARSER_LABEL_POSTAL_CODE "postcode"
|
#define ADDRESS_PARSER_LABEL_POSTAL_CODE "postcode"
|
||||||
|
#define ADDRESS_PARSER_LABEL_COUNTRY "country_region"
|
||||||
#define ADDRESS_PARSER_LABEL_COUNTRY "country"
|
#define ADDRESS_PARSER_LABEL_COUNTRY "country"
|
||||||
|
|
||||||
typedef union address_parser_types {
|
typedef union address_parser_types {
|
||||||
|
|||||||
Reference in New Issue
Block a user