[parser/cli] adding .print_features option in address_parser client for debugging
This commit is contained in:
@@ -17,8 +17,6 @@
|
||||
|
||||
static address_parser_t *parser = NULL;
|
||||
|
||||
//#define PRINT_ADDRESS_PARSER_FEATURES
|
||||
|
||||
typedef enum {
|
||||
ADDRESS_PARSER_NULL_PHRASE,
|
||||
ADDRESS_PARSER_DICTIONARY_PHRASE,
|
||||
@@ -29,7 +27,8 @@ typedef enum {
|
||||
|
||||
|
||||
static parser_options_t PARSER_DEFAULT_OPTIONS = {
|
||||
.rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD
|
||||
.rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD,
|
||||
.print_features = false
|
||||
};
|
||||
|
||||
address_parser_t *address_parser_new_options(parser_options_t options) {
|
||||
@@ -873,7 +872,7 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
|
||||
log_warn("expansion_value is NULL. word=%s, sentence=%s\n", word, tokenized->str);
|
||||
}
|
||||
|
||||
if (address_phrase_types & (ADDRESS_STREET | ADDRESS_HOUSE_NUMBER | ADDRESS_NAME)) {
|
||||
if (address_phrase_types & (ADDRESS_STREET | ADDRESS_HOUSE_NUMBER | ADDRESS_NAME | ADDRESS_UNIT)) {
|
||||
phrase_string = cstring_array_get_phrase(context->normalized, phrase_tokens, phrase);
|
||||
|
||||
add_word_feature = false;
|
||||
@@ -1146,22 +1145,18 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
|
||||
//feature_array_add(features, 4, "prev tag+word+next word", prev || "START", word, next_word);
|
||||
}
|
||||
|
||||
#ifndef PRINT_ADDRESS_PARSER_FEATURES
|
||||
if (0) {
|
||||
#endif
|
||||
if (parser->options.print_features) {
|
||||
uint32_t fidx;
|
||||
char *feature;
|
||||
|
||||
uint32_t fidx;
|
||||
char *feature;
|
||||
|
||||
printf("{");
|
||||
cstring_array_foreach(features, fidx, feature, {
|
||||
printf(" %s, ", feature);
|
||||
})
|
||||
printf("}\n");
|
||||
|
||||
#ifndef PRINT_ADDRESS_PARSER_FEATURES
|
||||
printf("{ ");
|
||||
size_t num_features = cstring_array_num_strings(features);
|
||||
cstring_array_foreach(context->features, fidx, feature, {
|
||||
printf("%s", feature);
|
||||
if (fidx < num_features - 1) printf(", ");
|
||||
})
|
||||
printf(" }\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
||||
|
||||
@@ -166,6 +166,7 @@ typedef struct address_parser_context {
|
||||
|
||||
typedef struct parser_options {
|
||||
uint64_t rare_word_threshold;
|
||||
bool print_features;
|
||||
} parser_options_t;
|
||||
|
||||
// Can add other gazetteers as well
|
||||
|
||||
@@ -64,6 +64,8 @@ int main(int argc, char **argv) {
|
||||
|
||||
char *input = NULL;
|
||||
|
||||
address_parser_t *parser = get_address_parser();
|
||||
|
||||
while((input = linenoise("> ")) != NULL) {
|
||||
|
||||
if (input[0] != '\0') {
|
||||
@@ -101,6 +103,22 @@ int main(int argc, char **argv) {
|
||||
printf("Must specify country code\n");
|
||||
}
|
||||
|
||||
cstring_array_destroy(command);
|
||||
goto next_input;
|
||||
} else if (string_starts_with(input, ".print_features")) {
|
||||
size_t num_tokens = 0;
|
||||
cstring_array *command = cstring_array_split(input, " ", 1, &num_tokens);
|
||||
if (cstring_array_num_strings(command) > 1) {
|
||||
char *flag = cstring_array_get_string(command, 1);
|
||||
if (string_compare_case_insensitive(flag, "off") == 0) {
|
||||
parser->options.print_features = false;
|
||||
} else if (string_compare_case_insensitive(flag, "on") == 0) {
|
||||
parser->options.print_features = true;
|
||||
}
|
||||
} else {
|
||||
parser->options.print_features = true;
|
||||
}
|
||||
|
||||
cstring_array_destroy(command);
|
||||
goto next_input;
|
||||
} else if (strlen(input) == 0) {
|
||||
@@ -116,16 +134,9 @@ int main(int argc, char **argv) {
|
||||
printf("{\n");
|
||||
for (int i = 0; i < parsed->num_components; i++) {
|
||||
char *component = parsed->components[i];
|
||||
utf8proc_uint8_t *normalized = NULL;
|
||||
utf8proc_map((utf8proc_uint8_t *)component, 0, &normalized, UTF8PROC_NULLTERM | UTF8PROC_COMPOSE);
|
||||
if (normalized == NULL) {
|
||||
log_error("Error parsing address\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
char *json_string = json_encode_string((char *)normalized);
|
||||
char *json_string = json_encode_string(component);
|
||||
printf(" \"%s\": %s%s\n", parsed->labels[i], json_string, i < parsed->num_components - 1 ? "," : "");
|
||||
free(normalized);
|
||||
free(json_string);
|
||||
}
|
||||
printf("}\n");
|
||||
|
||||
Reference in New Issue
Block a user