#include #include #include #include #include "log/log.h" #include "sparkey/sparkey.h" #include "collections.h" #include "constants.h" #include "libpostal_config.h" #include "file_utils.h" #include "gazetteers.h" #include "geonames.h" #include "geodb.h" #include "geo_disambiguation.h" #include "graph.h" #include "graph_builder.h" #include "msgpack_utils.h" #include "normalize.h" #include "string_utils.h" // These files are generated by create_geonames_tsv.py #include "geonames_fields.h" #include "postal_fields.h" #define DEFAULT_GEONAMES_TSV LIBPOSTAL_GEONAMES_DIR PATH_SEPARATOR "geonames.tsv"; /* Read line from generated geonames.tsv into a geoname struct */ static bool read_geoname_from_line(geoname_t *g, char *line) { size_t token_count; char *token; geoname_clear(g); cstring_array *tokens = cstring_array_split(line, TAB_SEPARATOR, TAB_SEPARATOR_LEN, &token_count); if (tokens == NULL) return false; if (token_count != NUM_GEONAMES_FIELDS) { log_error("Number of fields (%zu) != expected (%d)\n", token_count, NUM_GEONAMES_FIELDS); goto exit_geoname_free_tokens; } token = cstring_array_get_string(tokens, GEONAMES_ID); if (strlen(token) == 0) { log_error("geonames_id is required\n"); goto exit_geoname_free_tokens; } sscanf(token, "%d", &g->geonames_id); token = cstring_array_get_string(tokens, GEONAMES_CANONICAL); char_array_cat(g->canonical, token); token = cstring_array_get_string(tokens, GEONAMES_BOUNDARY_TYPE); sscanf(token, "%d", (int *)&g->type); token = cstring_array_get_string(tokens, GEONAMES_NAME); char_array_cat(g->name, token); token = cstring_array_get_string(tokens, GEONAMES_ISO_LANGUAGE); char_array_cat(g->iso_language, token); token = cstring_array_get_string(tokens, GEONAMES_HAS_WIKIPEDIA_ENTRY); if (strlen(token) > 0) { int has_wikipedia_entry; sscanf(token, "%d", &has_wikipedia_entry); g->has_wikipedia_entry = has_wikipedia_entry; } else { g->has_wikipedia_entry = false; } token = cstring_array_get_string(tokens, GEONAMES_IS_PREFERRED_NAME); if (strlen(token) > 0) { int is_preferred_name; sscanf(token, "%d", &is_preferred_name); g->is_preferred_name = is_preferred_name; } else { g->is_preferred_name = false; } token = cstring_array_get_string(tokens, GEONAMES_IS_SHORT_NAME); if (strlen(token) > 0) { int is_short_name; sscanf(token, "%d", &is_short_name); g->is_short_name = is_short_name; } else { g->is_short_name = false; } token = cstring_array_get_string(tokens, GEONAMES_IS_COLLOQUIAL); if (strlen(token) > 0) { int is_colloquial; sscanf(token, "%d", &is_colloquial); g->is_colloquial = is_colloquial; } else { g->is_colloquial = false; } token = cstring_array_get_string(tokens, GEONAMES_IS_HISTORICAL); if (strlen(token) > 0) { int is_historical; sscanf(token, "%d", &is_historical); g->is_historical = is_historical; } else { g->is_historical = false; } token = cstring_array_get_string(tokens, GEONAMES_POPULATION); if (strlen(token) > 0) { sscanf(token, "%d", &g->population); } else { g->population = 0; } token = cstring_array_get_string(tokens, GEONAMES_LATITUDE); if (strlen(token) > 0) { sscanf(token, "%lf", &g->latitude); } else { g->longitude = 0.0; } token = cstring_array_get_string(tokens, GEONAMES_LONGITUDE); if (strlen(token) > 0) { sscanf(token, "%lf", &g->longitude); } else { g->longitude = 0.0; } token = cstring_array_get_string(tokens, GEONAMES_FEATURE_CODE); char_array_cat(g->feature_code, token); token = cstring_array_get_string(tokens, GEONAMES_COUNTRY_CODE); char_array_cat(g->country_code, token); token = cstring_array_get_string(tokens, GEONAMES_COUNTRY_ID); if (strlen(token) > 0) { sscanf(token, "%d", &g->country_geonames_id); } else { g->country_geonames_id = 0; } token = cstring_array_get_string(tokens, GEONAMES_ADMIN1_CODE); char_array_cat(g->admin1_code, token); token = cstring_array_get_string(tokens, GEONAMES_ADMIN1_ID); if (strlen(token) > 0) { sscanf(token, "%d", &g->admin1_geonames_id); } else { g->admin1_geonames_id = 0; } token = cstring_array_get_string(tokens, GEONAMES_ADMIN2_CODE); char_array_cat(g->admin2_code, token); token = cstring_array_get_string(tokens, GEONAMES_ADMIN2_ID); if (strlen(token) > 0) { sscanf(token, "%d", &g->admin2_geonames_id); } else { g->admin2_geonames_id = 0; } token = cstring_array_get_string(tokens, GEONAMES_ADMIN3_CODE); char_array_cat(g->admin3_code, token); cstring_array_get_string(tokens, GEONAMES_ADMIN3_ID); if (strlen(token) > 0) { sscanf(token, "%d", &g->admin3_geonames_id); } else { g->admin3_geonames_id = 0; } token = cstring_array_get_string(tokens, GEONAMES_ADMIN4_CODE); char_array_cat(g->admin4_code, token); token = cstring_array_get_string(tokens, GEONAMES_ADMIN4_ID); if (strlen(token)) { sscanf(token, "%d", &g->admin4_geonames_id); } else { g->admin4_geonames_id = 0; } cstring_array_destroy(tokens); return true; exit_geoname_free_tokens: cstring_array_destroy(tokens); return false; } /* Read line from generated postal_codes.tsv into a gn_postal_code struct */ static bool read_gn_postal_code_from_line(gn_postal_code_t *postal, char *line) { size_t token_count; int i; gn_postal_code_clear(postal); char *token; cstring_array *tokens = cstring_array_split(line, TAB_SEPARATOR, TAB_SEPARATOR_LEN, &token_count); if (tokens == NULL) return false; if (token_count != NUM_POSTAL_FIELDS) { log_error("Number of fields (%zu) != expected (%d)\n", token_count, NUM_POSTAL_FIELDS); goto exit_postal_tokens_created; } token = cstring_array_get_string(tokens, GN_POSTAL_CODE); if (strlen(token) == 0) { log_error("postal_code field required\n"); goto exit_postal_tokens_created; } char_array_cat(postal->postal_code, token); token = cstring_array_get_string(tokens, GN_POSTAL_COUNTRY_CODE); char_array_cat(postal->country_code, token); token = cstring_array_get_string(tokens, GN_POSTAL_COUNTRY_GEONAMES_ID); if (strlen(token) > 0) { sscanf(token, "%d", &postal->country_geonames_id); } else { postal->country_geonames_id = 0; } token = cstring_array_get_string(tokens, GN_POSTAL_CONTAINING_GEONAME_ID); char_array_cat(postal->containing_geoname, token); char *admin1_field = cstring_array_get_string(tokens, GN_POSTAL_ADMIN1_IDS); size_t admin1_field_len = strlen(admin1_field); if (admin1_field_len > 0) { size_t admin1_token_count; cstring_array *admin1_tokens = cstring_array_split(admin1_field, COMMA_SEPARATOR, COMMA_SEPARATOR_LEN, &admin1_token_count); uint32_t admin1_id; if (admin1_token_count > 0) { for (i = 0; i < admin1_token_count; i++) { char *admin1_token = cstring_array_get_string(tokens, i); if (strlen(admin1_token) > 0) { sscanf(admin1_token, "%u", &admin1_id); uint32_array_push(postal->admin1_ids, admin1_id); } } } cstring_array_destroy(admin1_tokens); } char *admin2_field = cstring_array_get_string(tokens, GN_POSTAL_ADMIN2_IDS); size_t admin2_field_len = strlen(admin2_field); if (admin2_field_len > 0) { size_t admin2_token_count; cstring_array *admin2_tokens = cstring_array_split(admin2_field, COMMA_SEPARATOR, COMMA_SEPARATOR_LEN, &admin2_token_count); uint32_t admin2_id; if (admin2_token_count > 0) { for (i = 0; i < admin2_token_count; i++) { char *admin2_token = cstring_array_get_string(admin2_tokens, i); if (strlen(admin2_token) > 0) { sscanf(admin2_token, "%u", &admin2_id); uint32_array_push(postal->admin2_ids, admin2_id); } } } cstring_array_destroy(admin2_tokens); } char *admin3_field = cstring_array_get_string(tokens, GN_POSTAL_ADMIN3_IDS); size_t admin3_field_len = strlen(admin3_field); if (admin3_field_len > 0) { size_t admin3_token_count; cstring_array *admin3_tokens = cstring_array_split(admin3_field, COMMA_SEPARATOR, COMMA_SEPARATOR_LEN, &admin3_token_count); uint32_t admin3_id; if (admin3_token_count > 0) { for (i = 0; i < admin3_token_count; i++) { char *admin3_token = cstring_array_get_string(admin3_tokens, i); if (strlen(admin3_token) > 0) { sscanf(admin3_token, "%u", &admin3_id); uint32_array_push(postal->admin3_ids, admin3_id); } } } cstring_array_destroy(admin3_tokens); } cstring_array_destroy(tokens); return true; exit_postal_tokens_created: cstring_array_destroy(tokens); return false; } /* geodb_builder Creates the sparkey on-disk db for quick lookups by geonames_id or postal code. Builds the data structures needed for finalized geodb. */ typedef struct geodb_builder { trie_t *names; cstring_array *postal_codes; trie_t *features; graph_builder_t *feature_graph_builder; sparkey_logwriter *log_writer; } geodb_builder_t; void geodb_builder_destroy(geodb_builder_t *self) { if (self == NULL) return; if (self->names != NULL) { trie_destroy(self->names); } if (self->postal_codes != NULL) { cstring_array_destroy(self->postal_codes); } if (self->features != NULL) { trie_destroy(self->features); } if (self->feature_graph_builder != NULL) { graph_builder_destroy(self->feature_graph_builder); } if (self->log_writer != NULL) { sparkey_logwriter_close(&self->log_writer); } free(self); } geodb_builder_t *geodb_builder_new(char *log_filename) { geodb_builder_t *builder = malloc(sizeof(geodb_builder_t)); if (builder == NULL) return NULL; builder->names = trie_new(); if (builder->names == NULL) { goto exit_destroy_builder; } builder->features = trie_new(); if (builder->features == NULL) { goto exit_destroy_builder; } builder->postal_codes = cstring_array_new(); if (builder->postal_codes == NULL) { goto exit_destroy_builder; } bool fixed_rows = false; builder->feature_graph_builder = graph_builder_new(GRAPH_BIPARTITE, fixed_rows); if (builder->feature_graph_builder == NULL) { goto exit_destroy_builder; } sparkey_returncode ret_code = sparkey_logwriter_create(&builder->log_writer, log_filename, SPARKEY_COMPRESSION_NONE, 0); if (ret_code != SPARKEY_SUCCESS) { goto exit_destroy_builder; } return builder; exit_destroy_builder: geodb_builder_destroy(builder); return NULL; } /* Map of geonames boundary types to address components */ uint16_t get_address_component(uint32_t boundary_type) { if (boundary_type == GEONAMES_LOCALITY) { return ADDRESS_LOCALITY; } else if (boundary_type == GEONAMES_NEIGHBORHOOD) { return ADDRESS_NEIGHBORHOOD; } else if (boundary_type == GEONAMES_ADMIN1) { return ADDRESS_ADMIN1; } else if (boundary_type == GEONAMES_COUNTRY) { return ADDRESS_COUNTRY; } else if (boundary_type == GEONAMES_ADMIN2) { return ADDRESS_ADMIN2; } else if (boundary_type == GEONAMES_ADMIN3) { return ADDRESS_ADMIN3; } else if (boundary_type == GEONAMES_ADMIN4) { return ADDRESS_ADMIN4; } else if (boundary_type == GEONAMES_ADMIN_OTHER) { return ADDRESS_ADMIN_OTHER; } else { return 0; } } /* Add raw/unqualified name to the geodb trie */ bool geodb_builder_add_name(geodb_builder_t *self, char *key, bool is_canonical, uint16_t address_components) { if (self == NULL || self->names == NULL) return false; uint32_t node_id = trie_get(self->names, key); geodb_value_t value; value.value = 0; if (node_id == NULL_NODE_ID) { value.components |= address_components; value.is_canonical = is_canonical; value.count = 1; return trie_add(self->names, key, value.value); } else { if (!trie_get_data_at_index(self->names, node_id, &value.value)) { return false; } value.components |= address_components; value.is_canonical = is_canonical; value.count++; return trie_set_data_at_index(self->names, node_id, value.value); } } /* Get a feature string's id from the trie or add it and return the next id */ static inline uint32_t geodb_builder_get_feature_id(geodb_builder_t *self, char *key) { uint32_t feature_id; if (!trie_get_data(self->features, key, &feature_id)) { feature_id = self->features->num_keys; if (!trie_add(self->features, key, feature_id)) { log_error("Could not add key to trie, aborting\n"); exit(EXIT_FAILURE); } } return feature_id; } /* Destroy builder and create geodb files in the designated directory */ bool geodb_builder_finalize(geodb_builder_t *self, char *output_dir) { char_array *path = char_array_new_size(strlen(output_dir)); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_NAMES_TRIE_FILENAME); char *names_path = char_array_get_string(path); trie_save(self->names, names_path); char_array_clear(path); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_FEATURES_TRIE_FILENAME); char *features_path = char_array_get_string(path); trie_save(self->features, features_path); char_array_clear(path); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_POSTAL_CODES_FILENAME); char *postal_codes_path = char_array_get_string(path); FILE *f = fopen(postal_codes_path, "wb"); uint64_t num_postal_strings = 0; if (!file_write_uint64(f, (uint64_t)cstring_array_num_strings(self->postal_codes))) { log_error("Could not write number of postal code strings\n"); return false; } size_t postal_codes_str_len = self->postal_codes->str->n; if (!file_write_uint64(f, (uint64_t)postal_codes_str_len)) { log_error("Could not write postal codes strings length\n"); return false; } if (!file_write_chars(f, self->postal_codes->str->a, postal_codes_str_len)) { log_error("Could not write postal codes strings\n"); return false; } fclose(f); char_array_clear(path); bool sort_edges = false; bool remove_duplicates = false; graph_t *graph = graph_builder_finalize(self->feature_graph_builder, sort_edges, remove_duplicates); self->feature_graph_builder = NULL; char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_FEATURE_GRAPH_FILENAME); char *feature_graph_path = char_array_get_string(path); if (!graph_save(graph, feature_graph_path)) { log_error("Error saving graph\n"); return false; } graph_destroy(graph); char_array_clear(path); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_HASH_FILENAME); char *hash_filename = strdup(char_array_get_string(path)); char_array_clear(path); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_LOG_FILENAME); char *log_filename = char_array_get_string(path); if (self->log_writer != NULL) { sparkey_logwriter_close(&self->log_writer); self->log_writer = NULL; } if ((sparkey_hash_write(hash_filename, log_filename, 0)) != SPARKEY_SUCCESS) { log_error("Could not write Sparkey hash file\n"); free(hash_filename); char_array_destroy(path); return false; } free(hash_filename); char_array_destroy(path); return true; } /* Read generated geonames.tsv and add to the geodb builder */ void import_geonames(geodb_builder_t *self, char *filename) { FILE *f = fopen(filename, "r"); if (f == NULL) { printf("Couldn't open file\n"); exit(1); } char *line; char *prev_name = NULL; geoname_t *g = geoname_new(); char_array *serialized = char_array_new(); graph_builder_t *features = self->feature_graph_builder; // Just a set of all ids in GeoNames so we only add keys once, takes up < 50MB khash_t(int_set) *all_ids = kh_init(int_set); khash_t(int_set) *distinct_ids = kh_init(int_set); khiter_t key; int ret; cstring_array *geo_features = cstring_array_new(); uint32_array *feature_lengths = uint32_array_new(); char id_string[INT32_MAX_STRING_SIZE + 1]; int normalize_utf8_options = NORMALIZE_STRING_DECOMPOSE | NORMALIZE_STRING_LOWERCASE | NORMALIZE_STRING_TRIM; //int normalize_latin_options = normalize_utf8_options | NORMALIZE_STRING_LATIN_ASCII; int i = 0; int ambiguous = 0; int disambiguations = 0; while ((line = file_getline(f)) != NULL) { read_geoname_from_line(g, line); char *name = char_array_get_string(g->name); char *canonical = char_array_get_string(g->canonical); bool is_canonical = strcmp(name, canonical) == 0; char *utf8_normalized = NULL; size_t id_len = sprintf(id_string, "%d", g->geonames_id); if (name != NULL) { utf8_normalized = normalize_string_utf8(name, normalize_utf8_options); } if (utf8_normalized != NULL && (prev_name == NULL || strcmp(utf8_normalized, prev_name) != 0)) { // New name if (!geodb_builder_add_name(self, utf8_normalized, is_canonical, get_address_component(g->type))) { log_error("Error adding geoname %s\n", utf8_normalized); exit(EXIT_FAILURE); } // Only add disambiguation features if there's > 1 id for this name if (kh_size(distinct_ids) > 1) { ambiguous++; uint32_t string_index = 0; uint32_t lengths_index = 0; uint32_t geonames_id; kh_foreach_key(distinct_ids, key, { disambiguations++; uint32_t length = feature_lengths->a[lengths_index]; for (int i = 0; i < length; i++) { char *token = cstring_array_get_string(geo_features, string_index); uint32_t feature_id = geodb_builder_get_feature_id(self, token); graph_builder_add_edge(self->feature_graph_builder, feature_id, geonames_id); string_index++; } lengths_index++; }) } uint32_array_clear(feature_lengths); cstring_array_clear(geo_features); kh_clear(int_set, distinct_ids); } else if (utf8_normalized != NULL) { key = kh_get(int_set, distinct_ids, g->geonames_id); if (key == kh_end(distinct_ids)) { if (!geodb_builder_add_name(self, utf8_normalized, is_canonical, get_address_component(g->type))) { log_error("Error adding geoname %s\n", utf8_normalized); exit(EXIT_FAILURE); } } } else { log_error("normalization failed for name %s\n", name); exit(EXIT_FAILURE); } char_array_clear(serialized); if (!geoname_serialize(g, serialized)) { log_error("geoname_serialize failed for id=%d\n", g->geonames_id); exit(EXIT_FAILURE); } key = kh_get(int_set, all_ids, g->geonames_id); if (key == kh_end(all_ids)) { if ((sparkey_logwriter_put(self->log_writer, strlen(id_string), (uint8_t *)id_string, serialized->n, (uint8_t *)char_array_get_string(serialized))) != SPARKEY_SUCCESS) { log_error("Error writing to Sparkey with id=%d\n", g->geonames_id); exit(EXIT_FAILURE); } key = kh_put(int_set, all_ids, g->geonames_id, &ret); } key = kh_get(int_set, distinct_ids, g->geonames_id); if (key == kh_end(distinct_ids)) { key = kh_put(int_set, distinct_ids, g->geonames_id, &ret); if (ret < 0) { log_error("Error adding id %d to set\n", g->geonames_id); exit(EXIT_FAILURE); } char_array_clear(g->name); char_array_cat(g->name, utf8_normalized); size_t prev_num_geo_features = cstring_array_num_strings(geo_features); if (!geodisambig_add_geoname_features(geo_features, g)) { log_error("Could not add geonames features for id=%d\n", g->geonames_id); exit(EXIT_FAILURE); } uint32_t num_geo_features = cstring_array_num_strings(geo_features); uint32_t feature_length = (uint32_t)(num_geo_features - prev_num_geo_features); uint32_array_push(feature_lengths, feature_length); } if (prev_name != NULL) { free(prev_name); prev_name = NULL; } if (utf8_normalized != NULL) { prev_name = utf8_normalized; } free(line); i++; if (i % 1000 == 0) { log_info("Did %d geonames, %d ambiguous, %d disambiguations, names=%d, features=%d\n", i, ambiguous, disambiguations, self->names->num_keys, self->features->num_keys); } } if (prev_name != NULL) { free(prev_name); } uint32_array_destroy(feature_lengths); cstring_array_destroy(geo_features); kh_destroy(int_set, distinct_ids); kh_destroy(int_set, all_ids); char_array_destroy(serialized); geoname_destroy(g); fclose(f); } /* Read generated postal_codes.tsv and add to the geodb builder */ void import_geonames_postal_codes(geodb_builder_t *self, char *filename) { FILE *f = fopen(filename, "r"); if (f == NULL) { printf("Couldn't open file\n"); exit(1); } char *line; char *prev_code = NULL; gn_postal_code_t *pc = gn_postal_code_new(); char_array *postal_code = char_array_new(); char_array *serialized = char_array_new(); cstring_array *postal_code_features = cstring_array_new(); khiter_t key; int ret; int i = 0; // Always true for postal codes bool is_canonical = true; while ((line = file_getline(f)) != NULL) { if (!read_gn_postal_code_from_line(pc, line)) { log_error("Error reading line: %s\n", line); exit(EXIT_FAILURE); } char *code = char_array_get_string(pc->postal_code); char *utf8_normalized = normalize_string_utf8(code, NORMALIZE_STRING_LOWERCASE); if (utf8_normalized == NULL) { log_error("normalization failed for postal code %s\n", code); exit(EXIT_FAILURE); } geodb_builder_add_name(self, utf8_normalized, is_canonical, ADDRESS_POSTAL_CODE); char_array_clear(serialized); if (!gn_postal_code_serialize(pc, serialized)) { log_error("gn_postal_code_serialize failed for postal code=%s\n", code); exit(EXIT_FAILURE); } char *country_code = char_array_get_string(pc->country_code); char_array_clear(postal_code); char_array_cat_joined(postal_code, NAMESPACE_SEPARATOR_CHAR, false, 2, country_code, utf8_normalized); char *key = char_array_get_string(postal_code); cstring_array_add_string(self->postal_codes, key); uint32_t postal_code_index = (uint32_t)cstring_array_num_strings(self->postal_codes); cstring_array_clear(postal_code_features); char_array_clear(pc->postal_code); char_array_cat(pc->postal_code, utf8_normalized); if (sparkey_logwriter_put(self->log_writer, strlen(key), (uint8_t *)key, serialized->n, (uint8_t *)char_array_get_string(serialized)) != SPARKEY_SUCCESS) { log_error("Error writing key %s to Sparkey\n", key); } if (!geodisambig_add_postal_code_features(postal_code_features, pc)) { log_error("Could not add geonames features for postal code=%s\n", code); exit(EXIT_FAILURE); } /* In the Geonames case, the column indices in the graph refer to GeoNames ids. Since postal codes do not have ids, only names, the indices in the postal code feature graph refer to offsets in a cstring_array containing all the names. Since postal code features are namespaced differently, we can do this without offsets, etc. */ for (int i = 0; i < cstring_array_num_strings(postal_code_features); i++) { char *token = cstring_array_get_string(postal_code_features, i); uint32_t feature_id = geodb_builder_get_feature_id(self, token); graph_builder_add_edge(self->feature_graph_builder, feature_id, postal_code_index); } if (prev_code != NULL) { free(prev_code); prev_code = NULL; } if (utf8_normalized != NULL) { prev_code = utf8_normalized; } free(line); i++; if (i % 1000 == 0) { log_info("Did %d postal codes\n", i); } } if (prev_code != NULL) { free(prev_code); } char_array_destroy(postal_code); char_array_destroy(serialized); cstring_array_destroy(postal_code_features); gn_postal_code_destroy(pc); fclose(f); } /* Usage with no parameters: ./build_geodb Usage with parameters: ./build_geodb input_dir output_dir */ int main(int argc, char **argv) { char *input_dir; char *output_dir; if (argc > 2) { input_dir = argv[1]; output_dir = argv[2]; } else { input_dir = LIBPOSTAL_GEONAMES_DIR; output_dir = LIBPOSTAL_GEODB_DIR; } char *geonames_filename = "geonames.tsv"; char_array *path = char_array_new_size(strlen(input_dir)); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, input_dir, geonames_filename); char *geonames_path = strdup(char_array_get_string(path)); char_array_clear(path); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_LOG_FILENAME); char *log_filename = char_array_get_string(path); geodb_builder_t *builder = geodb_builder_new(log_filename); import_geonames(builder, geonames_path); free(geonames_path); printf("\n\n"); char *postal_codes_filename = "postal_codes.tsv"; char_array_clear(path); char_array_cat_joined(path, PATH_SEPARATOR, true, 2, input_dir, postal_codes_filename); char *postal_codes_path = char_array_get_string(path); log_info("Doing postal_codes\n"); import_geonames_postal_codes(builder, postal_codes_path); char_array_destroy(path); if (!geodb_builder_finalize(builder, output_dir)) { exit(EXIT_FAILURE); } geodb_builder_destroy(builder); exit(EXIT_SUCCESS); }