#ifndef ADDRESS_PARSER_IO_H #define ADDRESS_PARSER_IO_H #include #include #include #include "address_parser.h" #include "collections.h" #include "file_utils.h" #include "scanner.h" #include "string_utils.h" #define AMBIGUOUS_LANGUAGE "xxx" #define UNKNOWN_LANGUAGE "unk" enum address_parser_training_data_fields { ADDRESS_PARSER_FIELD_LANGUAGE, ADDRESS_PARSER_FIELD_COUNTRY, ADDRESS_PARSER_FIELD_ADDRESS, ADDRESS_PARSER_FILE_NUM_TOKENS }; typedef struct address_parser_data_set { FILE *f; token_array *tokens; tokenized_string_t *tokenized_str; cstring_array *labels; uint32_array *separators; char_array *language; char_array *country; } address_parser_data_set_t; address_parser_data_set_t *address_parser_data_set_init(char *filename); bool address_parser_data_set_tokenize_line(char *input, token_array *tokens, uint32_array *separators, cstring_array *labels); bool address_parser_data_set_next(address_parser_data_set_t *data_set); void address_parser_data_set_destroy(address_parser_data_set_t *self); #endif