[parser/cli] adding .print_features option in address_parser client for debugging
This commit is contained in:
@@ -17,8 +17,6 @@
|
|||||||
|
|
||||||
static address_parser_t *parser = NULL;
|
static address_parser_t *parser = NULL;
|
||||||
|
|
||||||
//#define PRINT_ADDRESS_PARSER_FEATURES
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
ADDRESS_PARSER_NULL_PHRASE,
|
ADDRESS_PARSER_NULL_PHRASE,
|
||||||
ADDRESS_PARSER_DICTIONARY_PHRASE,
|
ADDRESS_PARSER_DICTIONARY_PHRASE,
|
||||||
@@ -29,7 +27,8 @@ typedef enum {
|
|||||||
|
|
||||||
|
|
||||||
static parser_options_t PARSER_DEFAULT_OPTIONS = {
|
static parser_options_t PARSER_DEFAULT_OPTIONS = {
|
||||||
.rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD
|
.rare_word_threshold = DEFAULT_RARE_WORD_THRESHOLD,
|
||||||
|
.print_features = false
|
||||||
};
|
};
|
||||||
|
|
||||||
address_parser_t *address_parser_new_options(parser_options_t options) {
|
address_parser_t *address_parser_new_options(parser_options_t options) {
|
||||||
@@ -873,7 +872,7 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
|
|||||||
log_warn("expansion_value is NULL. word=%s, sentence=%s\n", word, tokenized->str);
|
log_warn("expansion_value is NULL. word=%s, sentence=%s\n", word, tokenized->str);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (address_phrase_types & (ADDRESS_STREET | ADDRESS_HOUSE_NUMBER | ADDRESS_NAME)) {
|
if (address_phrase_types & (ADDRESS_STREET | ADDRESS_HOUSE_NUMBER | ADDRESS_NAME | ADDRESS_UNIT)) {
|
||||||
phrase_string = cstring_array_get_phrase(context->normalized, phrase_tokens, phrase);
|
phrase_string = cstring_array_get_phrase(context->normalized, phrase_tokens, phrase);
|
||||||
|
|
||||||
add_word_feature = false;
|
add_word_feature = false;
|
||||||
@@ -1146,22 +1145,18 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
|
|||||||
//feature_array_add(features, 4, "prev tag+word+next word", prev || "START", word, next_word);
|
//feature_array_add(features, 4, "prev tag+word+next word", prev || "START", word, next_word);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PRINT_ADDRESS_PARSER_FEATURES
|
if (parser->options.print_features) {
|
||||||
if (0) {
|
uint32_t fidx;
|
||||||
#endif
|
char *feature;
|
||||||
|
|
||||||
uint32_t fidx;
|
printf("{ ");
|
||||||
char *feature;
|
size_t num_features = cstring_array_num_strings(features);
|
||||||
|
cstring_array_foreach(context->features, fidx, feature, {
|
||||||
printf("{");
|
printf("%s", feature);
|
||||||
cstring_array_foreach(features, fidx, feature, {
|
if (fidx < num_features - 1) printf(", ");
|
||||||
printf(" %s, ", feature);
|
})
|
||||||
})
|
printf(" }\n");
|
||||||
printf("}\n");
|
|
||||||
|
|
||||||
#ifndef PRINT_ADDRESS_PARSER_FEATURES
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|||||||
@@ -166,6 +166,7 @@ typedef struct address_parser_context {
|
|||||||
|
|
||||||
typedef struct parser_options {
|
typedef struct parser_options {
|
||||||
uint64_t rare_word_threshold;
|
uint64_t rare_word_threshold;
|
||||||
|
bool print_features;
|
||||||
} parser_options_t;
|
} parser_options_t;
|
||||||
|
|
||||||
// Can add other gazetteers as well
|
// Can add other gazetteers as well
|
||||||
|
|||||||
@@ -64,6 +64,8 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
char *input = NULL;
|
char *input = NULL;
|
||||||
|
|
||||||
|
address_parser_t *parser = get_address_parser();
|
||||||
|
|
||||||
while((input = linenoise("> ")) != NULL) {
|
while((input = linenoise("> ")) != NULL) {
|
||||||
|
|
||||||
if (input[0] != '\0') {
|
if (input[0] != '\0') {
|
||||||
@@ -101,6 +103,22 @@ int main(int argc, char **argv) {
|
|||||||
printf("Must specify country code\n");
|
printf("Must specify country code\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cstring_array_destroy(command);
|
||||||
|
goto next_input;
|
||||||
|
} else if (string_starts_with(input, ".print_features")) {
|
||||||
|
size_t num_tokens = 0;
|
||||||
|
cstring_array *command = cstring_array_split(input, " ", 1, &num_tokens);
|
||||||
|
if (cstring_array_num_strings(command) > 1) {
|
||||||
|
char *flag = cstring_array_get_string(command, 1);
|
||||||
|
if (string_compare_case_insensitive(flag, "off") == 0) {
|
||||||
|
parser->options.print_features = false;
|
||||||
|
} else if (string_compare_case_insensitive(flag, "on") == 0) {
|
||||||
|
parser->options.print_features = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
parser->options.print_features = true;
|
||||||
|
}
|
||||||
|
|
||||||
cstring_array_destroy(command);
|
cstring_array_destroy(command);
|
||||||
goto next_input;
|
goto next_input;
|
||||||
} else if (strlen(input) == 0) {
|
} else if (strlen(input) == 0) {
|
||||||
@@ -116,16 +134,9 @@ int main(int argc, char **argv) {
|
|||||||
printf("{\n");
|
printf("{\n");
|
||||||
for (int i = 0; i < parsed->num_components; i++) {
|
for (int i = 0; i < parsed->num_components; i++) {
|
||||||
char *component = parsed->components[i];
|
char *component = parsed->components[i];
|
||||||
utf8proc_uint8_t *normalized = NULL;
|
|
||||||
utf8proc_map((utf8proc_uint8_t *)component, 0, &normalized, UTF8PROC_NULLTERM | UTF8PROC_COMPOSE);
|
|
||||||
if (normalized == NULL) {
|
|
||||||
log_error("Error parsing address\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
char *json_string = json_encode_string((char *)normalized);
|
char *json_string = json_encode_string(component);
|
||||||
printf(" \"%s\": %s%s\n", parsed->labels[i], json_string, i < parsed->num_components - 1 ? "," : "");
|
printf(" \"%s\": %s%s\n", parsed->labels[i], json_string, i < parsed->num_components - 1 ? "," : "");
|
||||||
free(normalized);
|
|
||||||
free(json_string);
|
free(json_string);
|
||||||
}
|
}
|
||||||
printf("}\n");
|
printf("}\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user