[api] doing this now since we're bumping a major version. Using a libpostal prefixes for all public header functions and definitions

This commit is contained in:
Al
2017-03-31 03:35:51 -04:00
parent f8d7bdf364
commit 6d4c7984df
16 changed files with 184 additions and 201 deletions

View File

@@ -132,15 +132,15 @@ int main(int argc, char **argv) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
address_parser_response_t *parsed = parse_address("781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA", options); libpostal_address_parser_response_t *parsed = libpostal_parse_address("781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA", options);
for (size_t i = 0; i < parsed->num_components; i++) { for (size_t i = 0; i < parsed->num_components; i++) {
printf("%s: %s\n", parsed->labels[i], parsed->components[i]); printf("%s: %s\n", parsed->labels[i], parsed->components[i]);
} }
// Free parse result // Free parse result
address_parser_response_destroy(parsed); libpostal_address_parser_response_destroy(parsed);
// Teardown (only called once at the end of your program) // Teardown (only called once at the end of your program)
libpostal_teardown(); libpostal_teardown();
@@ -220,15 +220,15 @@ int main(int argc, char **argv) {
} }
size_t num_expansions; size_t num_expansions;
normalize_options_t options = get_libpostal_default_options(); libpostal_normalize_options_t options = libpostal_get_default_options();
char **expansions = expand_address("Quatre-vingt-douze Ave des Champs-Élysées", options, &num_expansions); char **expansions = libpostal_expand_address("Quatre-vingt-douze Ave des Champs-Élysées", options, &num_expansions);
for (size_t i = 0; i < num_expansions; i++) { for (size_t i = 0; i < num_expansions; i++) {
printf("%s\n", expansions[i]); printf("%s\n", expansions[i]);
} }
// Free expansions // Free expansions
expansion_array_destroy(expansions, num_expansions); libpostal_expansion_array_destroy(expansions, num_expansions);
// Teardown (only called once at the end of your program) // Teardown (only called once at the end of your program)
libpostal_teardown(); libpostal_teardown();

View File

@@ -1,5 +1,3 @@
SUBDIRS = sparkey
# Inherited from autoconf / user-specified # Inherited from autoconf / user-specified
CFLAGS_CONF = @CFLAGS@ CFLAGS_CONF = @CFLAGS@
CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wdeclaration-after-statement -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wdeclaration-after-statement -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF)
@@ -14,9 +12,10 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include
CFLAGS = CFLAGS =
lib_LTLIBRARIES = libpostal.la lib_LTLIBRARIES = libpostal.la
libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c geodb.c geo_disambiguation.c normalize.c bloom.c features.c geonames.c geohash/geohash.c unicode_scripts.c msgpack_utils.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c normalize.c bloom.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
libpostal_la_LIBADD = libscanner.la sparkey/libsparkey.la $(CBLAS_LIBS) libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
libpostal_la_CFLAGS = $(CFLAGS_O2) libpostal_la_CFLAGS = $(CFLAGS_O2)
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@
dist_bin_SCRIPTS = libpostal_data dist_bin_SCRIPTS = libpostal_data
@@ -27,7 +26,7 @@ noinst_LTLIBRARIES = libscanner.la
libscanner_la_SOURCES = scanner.c libscanner_la_SOURCES = scanner.c
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA)
noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_geodb build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test
libpostal_SOURCES = main.c json_encode.c libpostal_SOURCES = main.c json_encode.c
libpostal_LDADD = libpostal.la libpostal_LDADD = libpostal.la
@@ -41,9 +40,6 @@ address_parser_CFLAGS = $(CFLAGS_O3)
build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
build_address_dictionary_CFLAGS = $(CFLAGS_O3) build_address_dictionary_CFLAGS = $(CFLAGS_O3)
build_geodb_SOURCES = geodb_builder.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c string_utils.c msgpack_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c
build_geodb_LDADD = sparkey/libsparkey.la
build_geodb_CFLAGS = $(CFLAGS_O3)
build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c
build_numex_table_CFLAGS = $(CFLAGS_O3) build_numex_table_CFLAGS = $(CFLAGS_O3)
build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c

View File

@@ -842,7 +842,7 @@ bool is_valid_dictionary_phrase(phrase_t phrase) {
} }
uint32_t address_phrase_types = expansion_value->components; uint32_t address_phrase_types = expansion_value->components;
if (address_phrase_types & (ADDRESS_STREET | ADDRESS_HOUSE_NUMBER | ADDRESS_NAME | ADDRESS_CATEGORY | ADDRESS_NEAR | ADDRESS_UNIT | ADDRESS_LEVEL | ADDRESS_ENTRANCE | ADDRESS_STAIRCASE | ADDRESS_POSTAL_CODE | ADDRESS_PO_BOX)) { if (address_phrase_types & (LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_HOUSE_NUMBER | LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_CATEGORY | LIBPOSTAL_ADDRESS_NEAR | LIBPOSTAL_ADDRESS_UNIT | LIBPOSTAL_ADDRESS_LEVEL | LIBPOSTAL_ADDRESS_ENTRANCE | LIBPOSTAL_ADDRESS_STAIRCASE | LIBPOSTAL_ADDRESS_POSTAL_CODE | LIBPOSTAL_ADDRESS_PO_BOX)) {
for (size_t i = 0; i < expansion_value->expansions->n; i++) { for (size_t i = 0; i < expansion_value->expansions->n; i++) {
address_expansion_t expansion = expansion_value->expansions->a[i]; address_expansion_t expansion = expansion_value->expansions->a[i];
if (!address_expansion_in_dictionary(expansion, DICTIONARY_TOPONYM)) { if (!address_expansion_in_dictionary(expansion, DICTIONARY_TOPONYM)) {
@@ -913,7 +913,7 @@ static address_parser_phrase_t word_or_phrase_at_index(address_parser_t *parser,
expansion_index = suffix_phrase.data; expansion_index = suffix_phrase.data;
expansion_value = address_dictionary_get_expansions(expansion_index); expansion_value = address_dictionary_get_expansions(expansion_index);
if (expansion_value->components & ADDRESS_STREET) { if (expansion_value->components & LIBPOSTAL_ADDRESS_STREET) {
response = (address_parser_phrase_t){ response = (address_parser_phrase_t){
word, word,
ADDRESS_PARSER_SUFFIX_PHRASE, ADDRESS_PARSER_SUFFIX_PHRASE,
@@ -928,8 +928,8 @@ static address_parser_phrase_t word_or_phrase_at_index(address_parser_t *parser,
expansion_index = prefix_phrase.data; expansion_index = prefix_phrase.data;
expansion_value = address_dictionary_get_expansions(expansion_index); expansion_value = address_dictionary_get_expansions(expansion_index);
// Don't include elisions like l', d', etc. which are in the ADDRESS_ANY category // Don't include elisions like l', d', etc. which are in the LIBPOSTAL_ADDRESS_ANY category
if (expansion_value->components ^ ADDRESS_ANY) { if (expansion_value->components ^ LIBPOSTAL_ADDRESS_ANY) {
response = (address_parser_phrase_t){ response = (address_parser_phrase_t){
word, word,
ADDRESS_PARSER_PREFIX_PHRASE, ADDRESS_PARSER_PREFIX_PHRASE,
@@ -1164,16 +1164,16 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
add_word_feature = false; add_word_feature = false;
log_debug("phrase_string=%s\n", phrase_string); log_debug("phrase_string=%s\n", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_STREET, "street", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_STREET, "street", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_NAME, "name", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_NAME, "name", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_CATEGORY, "category", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_CATEGORY, "category", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_UNIT, "unit", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_UNIT, "unit", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_PO_BOX, "po_box", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_PO_BOX, "po_box", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_LEVEL, "level", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_LEVEL, "level", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_ENTRANCE, "entrance", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_ENTRANCE, "entrance", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_STAIRCASE, "staircase", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_STAIRCASE, "staircase", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_HOUSE_NUMBER, "house_number", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_HOUSE_NUMBER, "house_number", phrase_string);
add_phrase_features(features, address_phrase_types, ADDRESS_POSTAL_CODE, "postal_code", phrase_string); add_phrase_features(features, address_phrase_types, LIBPOSTAL_ADDRESS_POSTAL_CODE, "postal_code", phrase_string);
} }
} }
@@ -1330,8 +1330,8 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
expansion_index = prefix_phrase.data; expansion_index = prefix_phrase.data;
expansion_value = address_dictionary_get_expansions(expansion_index); expansion_value = address_dictionary_get_expansions(expansion_index);
// Don't include elisions like l', d', etc. which are in the ADDRESS_ANY category // Don't include elisions like l', d', etc. which are in the LIBPOSTAL_ADDRESS_ANY category
if (expansion_value->components ^ ADDRESS_ANY) { if (expansion_value->components ^ LIBPOSTAL_ADDRESS_ANY) {
known_prefix = true; known_prefix = true;
char_array_clear(phrase_tokens); char_array_clear(phrase_tokens);
prefix_len = prefix_phrase.len; prefix_len = prefix_phrase.len;
@@ -1347,7 +1347,7 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
expansion_index = suffix_phrase.data; expansion_index = suffix_phrase.data;
expansion_value = address_dictionary_get_expansions(expansion_index); expansion_value = address_dictionary_get_expansions(expansion_index);
if (expansion_value->components & ADDRESS_STREET) { if (expansion_value->components & LIBPOSTAL_ADDRESS_STREET) {
known_suffix = true; known_suffix = true;
char_array_clear(context->suffix_phrase); char_array_clear(context->suffix_phrase);
suffix_len = suffix_phrase.len; suffix_len = suffix_phrase.len;
@@ -1582,20 +1582,20 @@ bool address_parser_features(void *self, void *ctx, tokenized_string_t *tokenize
right_context_affix = phrase_prefix(right_context_word, strlen(right_context_word_pre_norm), right_context_phrase, context->long_context_suffix_phrase); right_context_affix = phrase_prefix(right_context_word, strlen(right_context_word_pre_norm), right_context_phrase, context->long_context_suffix_phrase);
} }
if (right_context_components & ADDRESS_STREET && !(right_context_components & ADDRESS_NAME)) { if (right_context_components & LIBPOSTAL_ADDRESS_STREET && !(right_context_components & LIBPOSTAL_ADDRESS_NAME)) {
feature_array_add(features, 2, "first word unknown+street phrase right", relation_to_number); feature_array_add(features, 2, "first word unknown+street phrase right", relation_to_number);
feature_array_add(features, 3, "first word unknown+street phrase right", relation_to_number, right_context_word); feature_array_add(features, 3, "first word unknown+street phrase right", relation_to_number, right_context_word);
if (right_context_affix != NULL && right_affix_type != NULL) { if (right_context_affix != NULL && right_affix_type != NULL) {
feature_array_add(features, 4, "first word unknown+street affix right", relation_to_number, right_affix_type, right_context_affix); feature_array_add(features, 4, "first word unknown+street affix right", relation_to_number, right_affix_type, right_context_affix);
} }
break; break;
} else if (right_context_components & ADDRESS_NAME && !(right_context_components & ADDRESS_STREET)) { } else if (right_context_components & LIBPOSTAL_ADDRESS_NAME && !(right_context_components & LIBPOSTAL_ADDRESS_STREET)) {
feature_array_add(features, 2, "first word unknown+venue phrase right", relation_to_number); feature_array_add(features, 2, "first word unknown+venue phrase right", relation_to_number);
feature_array_add(features, 3, "first word unknown+venue phrase right", relation_to_number, right_context_word); feature_array_add(features, 3, "first word unknown+venue phrase right", relation_to_number, right_context_word);
if (right_context_affix != NULL && right_affix_type != NULL) { if (right_context_affix != NULL && right_affix_type != NULL) {
feature_array_add(features, 4, "first word unknown+venue affix right", relation_to_number, right_affix_type, right_context_affix); feature_array_add(features, 4, "first word unknown+venue affix right", relation_to_number, right_affix_type, right_context_affix);
} }
} else if (right_context_components & (ADDRESS_NAME | ADDRESS_STREET)) { } else if (right_context_components & (LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_STREET)) {
if (seen_number) { if (seen_number) {
feature_array_add(features, 1, "first word unknown+number+ambiguous phrase right"); feature_array_add(features, 1, "first word unknown+number+ambiguous phrase right");
feature_array_add(features, 2, "first word unknown+number+ambiguous phrase right", right_context_word); feature_array_add(features, 2, "first word unknown+number+ambiguous phrase right", right_context_word);
@@ -1637,12 +1637,12 @@ bool address_parser_predict(address_parser_t *self, address_parser_context_t *co
return false; return false;
} }
address_parser_response_t *address_parser_response_new(void) { libpostal_address_parser_response_t *address_parser_response_new(void) {
address_parser_response_t *response = malloc(sizeof(address_parser_response_t)); libpostal_address_parser_response_t *response = malloc(sizeof(libpostal_address_parser_response_t));
return response; return response;
} }
address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context) { libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context) {
if (address == NULL || context == NULL) return NULL; if (address == NULL || context == NULL) return NULL;
address_parser_t *parser = get_address_parser(); address_parser_t *parser = get_address_parser();
@@ -1693,7 +1693,7 @@ address_parser_response_t *address_parser_parse(char *address, char *language, c
country = NULL; country = NULL;
address_parser_context_fill(context, parser, tokenized_str, language, country); address_parser_context_fill(context, parser, tokenized_str, language, country);
address_parser_response_t *response = NULL; libpostal_address_parser_response_t *response = NULL;
// If the whole input string is a single known phrase at the SUBURB level or higher, bypass sequence prediction altogether // If the whole input string is a single known phrase at the SUBURB level or higher, bypass sequence prediction altogether
phrase_t only_phrase = NULL_PHRASE; phrase_t only_phrase = NULL_PHRASE;

View File

@@ -214,7 +214,7 @@ address_parser_t *address_parser_new_options(parser_options_t options);
address_parser_t *get_address_parser(void); address_parser_t *get_address_parser(void);
bool address_parser_load(char *dir); bool address_parser_load(char *dir);
address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context); libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context);
void address_parser_destroy(address_parser_t *self); void address_parser_destroy(address_parser_t *self);
char *address_parser_normalize_string(char *str); char *address_parser_normalize_string(char *str);

View File

@@ -117,10 +117,10 @@ int main(int argc, char **argv) {
goto next_input; goto next_input;
} }
address_parser_response_t *parsed; libpostal_address_parser_response_t *parsed;
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
if ((parsed = parse_address(input, options))) { if ((parsed = libpostal_parse_address(input, options))) {
printf("\n"); printf("\n");
printf("Result:\n\n"); printf("Result:\n\n");
printf("{\n"); printf("{\n");
@@ -134,7 +134,7 @@ int main(int argc, char **argv) {
printf("}\n"); printf("}\n");
printf("\n"); printf("\n");
address_parser_response_destroy(parsed); libpostal_address_parser_response_destroy(parsed);
} else { } else {
log_error("Error parsing address\n"); log_error("Error parsing address\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);

View File

@@ -98,8 +98,6 @@ bool address_parser_test(address_parser_t *parser, char *filename, address_parse
char *prev_label = NULL; char *prev_label = NULL;
address_parser_response_t *response = NULL;
size_t starting_errors = result->num_errors; size_t starting_errors = result->num_errors;
bool prediction_success = address_parser_predict(parser, context, token_labels, &address_parser_features, data_set->tokenized_str); bool prediction_success = address_parser_predict(parser, context, token_labels, &address_parser_features, data_set->tokenized_str);

View File

@@ -210,7 +210,7 @@ bool address_phrases_and_labels(address_parser_data_set_t *data_set, cstring_arr
if (sub_tokens->n > 1 && search_address_dictionaries_tokens_with_phrases(postal_code_normalized, sub_tokens, language, &postal_code_dictionary_phrases) && postal_code_dictionary_phrases->n > 0) { if (sub_tokens->n > 1 && search_address_dictionaries_tokens_with_phrases(postal_code_normalized, sub_tokens, language, &postal_code_dictionary_phrases) && postal_code_dictionary_phrases->n > 0) {
phrase_t first_postal_code_phrase = postal_code_dictionary_phrases->a[0]; phrase_t first_postal_code_phrase = postal_code_dictionary_phrases->a[0];
address_expansion_value_t *value = address_dictionary_get_expansions(first_postal_code_phrase.data); address_expansion_value_t *value = address_dictionary_get_expansions(first_postal_code_phrase.data);
if (value != NULL && value->components & ADDRESS_POSTAL_CODE) { if (value != NULL && value->components & LIBPOSTAL_ADDRESS_POSTAL_CODE) {
char_array_clear(token_builder); char_array_clear(token_builder);
size_t first_real_token_index = first_postal_code_phrase.start + first_postal_code_phrase.len; size_t first_real_token_index = first_postal_code_phrase.start + first_postal_code_phrase.len;
token_t first_real_token = sub_tokens->a[first_real_token_index]; token_t first_real_token = sub_tokens->a[first_real_token_index];
@@ -255,7 +255,7 @@ bool address_phrases_and_labels(address_parser_data_set_t *data_set, cstring_arr
address_expansion_value_t *phrase_value = address_dictionary_get_expansions(current_phrase.data); address_expansion_value_t *phrase_value = address_dictionary_get_expansions(current_phrase.data);
size_t current_phrase_end = current_phrase.start + current_phrase.len; size_t current_phrase_end = current_phrase.start + current_phrase.len;
if (phrase_value != NULL && phrase_value->components & ADDRESS_POSTAL_CODE) { if (phrase_value != NULL && phrase_value->components & LIBPOSTAL_ADDRESS_POSTAL_CODE) {
current_phrase_end = current_phrase.start; current_phrase_end = current_phrase.start;
} }

View File

@@ -28,8 +28,8 @@ int main(int argc, char **argv) {
char *languages[argc - 2]; char *languages[argc - 2];
for (int i = 0; i < argc - 2; i++) { for (int i = 0; i < argc - 2; i++) {
char *arg = argv[i + 2]; char *arg = argv[i + 2];
if (strlen(arg) >= MAX_LANGUAGE_LEN) { if (strlen(arg) >= LIBPOSTAL_MAX_LANGUAGE_LEN) {
printf("arg %d was longer than a language code (%d chars). Make sure to quote the input string\n", i + 2, MAX_LANGUAGE_LEN - 1); printf("arg %d was longer than a language code (%d chars). Make sure to quote the input string\n", i + 2, LIBPOSTAL_MAX_LANGUAGE_LEN - 1);
} }
languages[i] = arg; languages[i] = arg;
} }
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
normalize_options_t options = get_libpostal_default_options(); libpostal_normalize_options_t options = libpostal_get_default_options();
options.num_languages = 1; options.num_languages = 1;
options.languages = languages; options.languages = languages;
@@ -56,12 +56,8 @@ int main(int argc, char **argv) {
clock_t t1 = clock(); clock_t t1 = clock();
for (int i = 0; i < num_loops; i++) { for (int i = 0; i < num_loops; i++) {
strings = expand_address(str, options, &num_expansions); strings = libpostal_expand_address(str, options, &num_expansions);
for (uint64_t i = 0; i < num_expansions; i++) { libpostal_expansion_array_destroy(strings, num_expansions);
normalized = strings[i];
free(normalized);
}
free(strings);
} }
clock_t t2 = clock(); clock_t t2 = clock();

View File

@@ -1,46 +1,46 @@
// Only need these for the in-memory dictionaries // Only need these for the in-memory dictionaries
gazetteer_t gazetteer_config[] = { gazetteer_t gazetteer_config[] = {
{DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME}, {DICTIONARY_ACADEMIC_DEGREE, LIBPOSTAL_ADDRESS_NAME},
{DICTIONARY_AMBIGUOUS_EXPANSION, ADDRESS_NONE}, {DICTIONARY_AMBIGUOUS_EXPANSION, LIBPOSTAL_ADDRESS_NONE},
{DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}, {DICTIONARY_BUILDING_TYPE, LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_HOUSE_NUMBER | LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_UNIT},
{DICTIONARY_CATEGORY, ADDRESS_CATEGORY}, {DICTIONARY_CATEGORY, LIBPOSTAL_ADDRESS_CATEGORY},
{DICTIONARY_CHAIN, ADDRESS_NAME}, {DICTIONARY_CHAIN, LIBPOSTAL_ADDRESS_NAME},
{DICTIONARY_COMPANY_TYPE, ADDRESS_NAME}, {DICTIONARY_COMPANY_TYPE, LIBPOSTAL_ADDRESS_NAME},
{DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_ANY}, {DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_ANY}, {DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_ANY}, {DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_CROSS_STREET, ADDRESS_STREET}, {DICTIONARY_CROSS_STREET, LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_DIRECTIONAL, ADDRESS_ANY}, {DICTIONARY_DIRECTIONAL, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_ELISION, ADDRESS_ANY}, {DICTIONARY_ELISION, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_ENTRANCE, ADDRESS_ENTRANCE}, {DICTIONARY_ENTRANCE, LIBPOSTAL_ADDRESS_ENTRANCE},
{DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME}, {DICTIONARY_GIVEN_NAME, LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_NAME},
{DICTIONARY_HOUSE_NUMBER, ADDRESS_HOUSE_NUMBER}, {DICTIONARY_HOUSE_NUMBER, LIBPOSTAL_ADDRESS_HOUSE_NUMBER},
{DICTIONARY_LEVEL_NUMBERED, ADDRESS_LEVEL}, {DICTIONARY_LEVEL_NUMBERED, LIBPOSTAL_ADDRESS_LEVEL},
{DICTIONARY_LEVEL_STANDALONE, ADDRESS_LEVEL}, {DICTIONARY_LEVEL_STANDALONE, LIBPOSTAL_ADDRESS_LEVEL},
{DICTIONARY_LEVEL_MEZZANINE, ADDRESS_LEVEL}, {DICTIONARY_LEVEL_MEZZANINE, LIBPOSTAL_ADDRESS_LEVEL},
{DICTIONARY_LEVEL_BASEMENT, ADDRESS_LEVEL}, {DICTIONARY_LEVEL_BASEMENT, LIBPOSTAL_ADDRESS_LEVEL},
{DICTIONARY_LEVEL_SUB_BASEMENT, ADDRESS_LEVEL}, {DICTIONARY_LEVEL_SUB_BASEMENT, LIBPOSTAL_ADDRESS_LEVEL},
{DICTIONARY_NEAR, ADDRESS_NEAR}, {DICTIONARY_NEAR, LIBPOSTAL_ADDRESS_NEAR},
{DICTIONARY_NULL, ADDRESS_ANY}, {DICTIONARY_NULL, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_NAMED_ORGANIZATION, ADDRESS_NAME}, {DICTIONARY_NAMED_ORGANIZATION, LIBPOSTAL_ADDRESS_NAME},
{DICTIONARY_NAMED_PERSON, ADDRESS_NAME | ADDRESS_STREET}, {DICTIONARY_NAMED_PERSON, LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_NO_NUMBER, ADDRESS_HOUSE_NUMBER}, {DICTIONARY_NO_NUMBER, LIBPOSTAL_ADDRESS_HOUSE_NUMBER},
{DICTIONARY_NUMBER, ADDRESS_HOUSE_NUMBER | ADDRESS_UNIT | ADDRESS_LEVEL | ADDRESS_STAIRCASE | ADDRESS_ENTRANCE}, {DICTIONARY_NUMBER, LIBPOSTAL_ADDRESS_HOUSE_NUMBER | LIBPOSTAL_ADDRESS_UNIT | LIBPOSTAL_ADDRESS_LEVEL | LIBPOSTAL_ADDRESS_STAIRCASE | LIBPOSTAL_ADDRESS_ENTRANCE},
{DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET}, {DICTIONARY_PERSONAL_SUFFIX, LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET}, {DICTIONARY_PERSONAL_TITLE, LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET}, {DICTIONARY_PLACE_NAME, LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_POST_OFFICE, ADDRESS_PO_BOX}, {DICTIONARY_POST_OFFICE, LIBPOSTAL_ADDRESS_PO_BOX},
{DICTIONARY_POSTAL_CODE, ADDRESS_POSTAL_CODE}, {DICTIONARY_POSTAL_CODE, LIBPOSTAL_ADDRESS_POSTAL_CODE},
{DICTIONARY_QUALIFIER, ADDRESS_STREET}, {DICTIONARY_QUALIFIER, LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_STAIRCASE, ADDRESS_STAIRCASE}, {DICTIONARY_STAIRCASE, LIBPOSTAL_ADDRESS_STAIRCASE},
{DICTIONARY_STOPWORD, ADDRESS_ANY}, {DICTIONARY_STOPWORD, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_STREET_TYPE, ADDRESS_STREET}, {DICTIONARY_STREET_TYPE, LIBPOSTAL_ADDRESS_STREET},
{DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME}, {DICTIONARY_SURNAME, LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_NAME},
{DICTIONARY_SYNONYM, ADDRESS_ANY}, {DICTIONARY_SYNONYM, LIBPOSTAL_ADDRESS_ANY},
{DICTIONARY_TOPONYM, ADDRESS_NAME | ADDRESS_STREET | ADDRESS_TOPONYM}, {DICTIONARY_TOPONYM, LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_TOPONYM},
{DICTIONARY_UNIT_NUMBERED, ADDRESS_UNIT}, {DICTIONARY_UNIT_NUMBERED, LIBPOSTAL_ADDRESS_UNIT},
{DICTIONARY_UNIT_STANDALONE, ADDRESS_UNIT}, {DICTIONARY_UNIT_STANDALONE, LIBPOSTAL_ADDRESS_UNIT},
{DICTIONARY_UNIT_DIRECTION, ADDRESS_UNIT} {DICTIONARY_UNIT_DIRECTION, LIBPOSTAL_ADDRESS_UNIT}
}; };

View File

@@ -27,7 +27,7 @@ static const size_t GAMMA_SCHEDULE_SIZE = sizeof(GAMMA_SCHEDULE) / sizeof(double
#define DEFAULT_GAMMA_0 10.0 #define DEFAULT_GAMMA_0 10.0
static double LAMBDA_SCHEDULE[] = {0.0, 1e-5, 1e-4, 0.001, 0.01, 0.1, \ static double LAMBDA_SCHEDULE[] = {0.0, 1e-5, 1e-4, 0.001, 0.01, 0.1, \
0.2, 0.5, 1.0, 2.0, 5.0, 10.0}; 0.2, 0.5, 1.0};
static const size_t LAMBDA_SCHEDULE_SIZE = sizeof(LAMBDA_SCHEDULE) / sizeof(double); static const size_t LAMBDA_SCHEDULE_SIZE = sizeof(LAMBDA_SCHEDULE) / sizeof(double);
#define DEFAULT_LAMBDA 0.0 #define DEFAULT_LAMBDA 0.0

View File

@@ -32,10 +32,10 @@ KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language)
#define DEFAULT_KEY_LEN 32 #define DEFAULT_KEY_LEN 32
static normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = { static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = {
.languages = NULL, .languages = NULL,
.num_languages = 0, .num_languages = 0,
.address_components = ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_PO_BOX | ADDRESS_UNIT | ADDRESS_LEVEL | ADDRESS_ENTRANCE | ADDRESS_STAIRCASE | ADDRESS_POSTAL_CODE, .address_components = LIBPOSTAL_ADDRESS_NAME | LIBPOSTAL_ADDRESS_HOUSE_NUMBER | LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_PO_BOX | LIBPOSTAL_ADDRESS_UNIT | LIBPOSTAL_ADDRESS_LEVEL | LIBPOSTAL_ADDRESS_ENTRANCE | LIBPOSTAL_ADDRESS_STAIRCASE | LIBPOSTAL_ADDRESS_POSTAL_CODE,
.latin_ascii = true, .latin_ascii = true,
.transliterate = true, .transliterate = true,
.strip_accents = true, .strip_accents = true,
@@ -56,11 +56,11 @@ static normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = {
.roman_numerals = true .roman_numerals = true
}; };
normalize_options_t get_libpostal_default_options(void) { libpostal_normalize_options_t libpostal_get_default_options(void) {
return LIBPOSTAL_DEFAULT_OPTIONS; return LIBPOSTAL_DEFAULT_OPTIONS;
} }
static inline uint64_t get_normalize_token_options(normalize_options_t options) { static inline uint64_t get_normalize_token_options(libpostal_normalize_options_t options) {
uint64_t normalize_token_options = 0; uint64_t normalize_token_options = 0;
normalize_token_options |= options.delete_final_periods ? NORMALIZE_TOKEN_DELETE_FINAL_PERIOD : 0; normalize_token_options |= options.delete_final_periods ? NORMALIZE_TOKEN_DELETE_FINAL_PERIOD : 0;
@@ -71,7 +71,7 @@ static inline uint64_t get_normalize_token_options(normalize_options_t options)
return normalize_token_options; return normalize_token_options;
} }
static inline uint64_t get_normalize_string_options(normalize_options_t options) { static inline uint64_t get_normalize_string_options(libpostal_normalize_options_t options) {
uint64_t normalize_string_options = 0; uint64_t normalize_string_options = 0;
normalize_string_options |= options.transliterate ? NORMALIZE_STRING_TRANSLITERATE : 0; normalize_string_options |= options.transliterate ? NORMALIZE_STRING_TRANSLITERATE : 0;
normalize_string_options |= options.latin_ascii ? NORMALIZE_STRING_LATIN_ASCII : 0; normalize_string_options |= options.latin_ascii ? NORMALIZE_STRING_LATIN_ASCII : 0;
@@ -83,7 +83,7 @@ static inline uint64_t get_normalize_string_options(normalize_options_t options)
return normalize_string_options; return normalize_string_options;
} }
static void add_normalized_strings_token(cstring_array *strings, char *str, token_t token, normalize_options_t options) { static void add_normalized_strings_token(cstring_array *strings, char *str, token_t token, libpostal_normalize_options_t options) {
uint64_t normalize_token_options = get_normalize_token_options(options); uint64_t normalize_token_options = get_normalize_token_options(options);
@@ -135,7 +135,7 @@ static void add_normalized_strings_token(cstring_array *strings, char *str, toke
} }
} }
static string_tree_t *add_string_alternatives(char *str, normalize_options_t options) { static string_tree_t *add_string_alternatives(char *str, libpostal_normalize_options_t options) {
char_array *key = NULL; char_array *key = NULL;
log_debug("input=%s\n", str); log_debug("input=%s\n", str);
@@ -500,7 +500,7 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
return tree; return tree;
} }
static void add_postprocessed_string(cstring_array *strings, char *str, normalize_options_t options) { static void add_postprocessed_string(cstring_array *strings, char *str, libpostal_normalize_options_t options) {
cstring_array_add_string(strings, str); cstring_array_add_string(strings, str);
if (options.roman_numerals) { if (options.roman_numerals) {
@@ -516,7 +516,7 @@ static void add_postprocessed_string(cstring_array *strings, char *str, normaliz
static address_expansion_array *get_affix_expansions(phrase_t phrase, normalize_options_t options) { static address_expansion_array *get_affix_expansions(phrase_t phrase, libpostal_normalize_options_t options) {
uint32_t expansion_index = phrase.data; uint32_t expansion_index = phrase.data;
address_expansion_value_t *value = address_dictionary_get_expansions(expansion_index); address_expansion_value_t *value = address_dictionary_get_expansions(expansion_index);
if (value != NULL && value->components & options.address_components) { if (value != NULL && value->components & options.address_components) {
@@ -526,7 +526,7 @@ static address_expansion_array *get_affix_expansions(phrase_t phrase, normalize_
return NULL; return NULL;
} }
static inline void cat_affix_expansion(char_array *key, char *str, address_expansion_t expansion, token_t token, phrase_t phrase, normalize_options_t options) { static inline void cat_affix_expansion(char_array *key, char *str, address_expansion_t expansion, token_t token, phrase_t phrase, libpostal_normalize_options_t options) {
if (expansion.canonical_index != NULL_CANONICAL_INDEX) { if (expansion.canonical_index != NULL_CANONICAL_INDEX) {
char *canonical = address_dictionary_get_canonical(expansion.canonical_index); char *canonical = address_dictionary_get_canonical(expansion.canonical_index);
uint64_t normalize_string_options = get_normalize_string_options(options); uint64_t normalize_string_options = get_normalize_string_options(options);
@@ -542,7 +542,7 @@ static inline void cat_affix_expansion(char_array *key, char *str, address_expan
} }
} }
static bool add_affix_expansions(string_tree_t *tree, char *str, char *lang, token_t token, phrase_t prefix, phrase_t suffix, normalize_options_t options) { static bool add_affix_expansions(string_tree_t *tree, char *str, char *lang, token_t token, phrase_t prefix, phrase_t suffix, libpostal_normalize_options_t options) {
cstring_array *strings = tree->strings; cstring_array *strings = tree->strings;
bool have_suffix = suffix.len > 0 && suffix.len < token.len; bool have_suffix = suffix.len > 0 && suffix.len < token.len;
@@ -753,7 +753,7 @@ static bool add_affix_expansions(string_tree_t *tree, char *str, char *lang, tok
} }
static inline bool expand_affixes(string_tree_t *tree, char *str, char *lang, token_t token, normalize_options_t options) { static inline bool expand_affixes(string_tree_t *tree, char *str, char *lang, token_t token, libpostal_normalize_options_t options) {
phrase_t suffix = search_address_dictionaries_suffix(str + token.offset, token.len, lang); phrase_t suffix = search_address_dictionaries_suffix(str + token.offset, token.len, lang);
phrase_t prefix = search_address_dictionaries_prefix(str + token.offset, token.len, lang); phrase_t prefix = search_address_dictionaries_prefix(str + token.offset, token.len, lang);
@@ -764,7 +764,7 @@ static inline bool expand_affixes(string_tree_t *tree, char *str, char *lang, to
return add_affix_expansions(tree, str, lang, token, prefix, suffix, options); return add_affix_expansions(tree, str, lang, token, prefix, suffix, options);
} }
static inline void add_normalized_strings_tokenized(string_tree_t *tree, char *str, token_array *tokens, normalize_options_t options) { static inline void add_normalized_strings_tokenized(string_tree_t *tree, char *str, token_array *tokens, libpostal_normalize_options_t options) {
cstring_array *strings = tree->strings; cstring_array *strings = tree->strings;
for (size_t i = 0; i < tokens->n; i++) { for (size_t i = 0; i < tokens->n; i++) {
@@ -795,7 +795,7 @@ static inline void add_normalized_strings_tokenized(string_tree_t *tree, char *s
} }
static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_strings, char *str, normalize_options_t options) { static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_strings, char *str, libpostal_normalize_options_t options) {
size_t len = strlen(str); size_t len = strlen(str);
token_array *tokens = tokenize_keep_whitespace(str); token_array *tokens = tokenize_keep_whitespace(str);
string_tree_t *token_tree = string_tree_new_size(len); string_tree_t *token_tree = string_tree_new_size(len);
@@ -901,8 +901,8 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_
char_array_destroy(temp_string); char_array_destroy(temp_string);
} }
char **expand_address(char *input, normalize_options_t options, size_t *n) { char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
options.address_components |= ADDRESS_ANY; options.address_components |= LIBPOSTAL_ADDRESS_ANY;
uint64_t normalize_string_options = get_normalize_string_options(options); uint64_t normalize_string_options = get_normalize_string_options(options);
@@ -980,14 +980,14 @@ char **expand_address(char *input, normalize_options_t options, size_t *n) {
} }
void expansion_array_destroy(char **expansions, size_t n) { void libpostal_expansion_array_destroy(char **expansions, size_t n) {
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
free(expansions[i]); free(expansions[i]);
} }
free(expansions); free(expansions);
} }
void address_parser_response_destroy(address_parser_response_t *self) { void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
if (self == NULL) return; if (self == NULL) return;
for (size_t i = 0; i < self->num_components; i++) { for (size_t i = 0; i < self->num_components; i++) {
@@ -1011,23 +1011,23 @@ void address_parser_response_destroy(address_parser_response_t *self) {
free(self); free(self);
} }
static address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS = { static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS = {
.language = NULL, .language = NULL,
.country = NULL .country = NULL
}; };
inline address_parser_options_t get_libpostal_address_parser_default_options(void) { inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS; return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS;
} }
address_parser_response_t *parse_address(char *address, address_parser_options_t options) { libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
address_parser_context_t *context = address_parser_context_new(); address_parser_context_t *context = address_parser_context_new();
address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country, context); libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country, context);
if (parsed == NULL) { if (parsed == NULL) {
log_error("Parser returned NULL\n"); log_error("Parser returned NULL\n");
address_parser_context_destroy(context); address_parser_context_destroy(context);
address_parser_response_destroy(parsed); libpostal_address_parser_response_destroy(parsed);
return NULL; return NULL;
} }

View File

@@ -10,31 +10,31 @@ extern "C" {
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#define MAX_LANGUAGE_LEN 4 #define LIBPOSTAL_MAX_LANGUAGE_LEN 4
/* /*
Address dictionaries Address dictionaries
*/ */
// Bit set, should be able to keep it at a short (uint16_t) // Bit set, should be able to keep it at a short (uint16_t)
#define ADDRESS_NONE 0 #define LIBPOSTAL_ADDRESS_NONE 0
#define ADDRESS_ANY (1 << 0) #define LIBPOSTAL_ADDRESS_ANY (1 << 0)
#define ADDRESS_NAME (1 << 1) #define LIBPOSTAL_ADDRESS_NAME (1 << 1)
#define ADDRESS_HOUSE_NUMBER (1 << 2) #define LIBPOSTAL_ADDRESS_HOUSE_NUMBER (1 << 2)
#define ADDRESS_STREET (1 << 3) #define LIBPOSTAL_ADDRESS_STREET (1 << 3)
#define ADDRESS_UNIT (1 << 4) #define LIBPOSTAL_ADDRESS_UNIT (1 << 4)
#define ADDRESS_LEVEL (1 << 5) #define LIBPOSTAL_ADDRESS_LEVEL (1 << 5)
#define ADDRESS_STAIRCASE (1 << 6) #define LIBPOSTAL_ADDRESS_STAIRCASE (1 << 6)
#define ADDRESS_ENTRANCE (1 << 7) #define LIBPOSTAL_ADDRESS_ENTRANCE (1 << 7)
#define ADDRESS_CATEGORY (1 << 8) #define LIBPOSTAL_ADDRESS_CATEGORY (1 << 8)
#define ADDRESS_NEAR (1 << 9) #define LIBPOSTAL_ADDRESS_NEAR (1 << 9)
#define ADDRESS_TOPONYM (1 << 13) #define LIBPOSTAL_ADDRESS_TOPONYM (1 << 13)
#define ADDRESS_POSTAL_CODE (1 << 14) #define LIBPOSTAL_ADDRESS_POSTAL_CODE (1 << 14)
#define ADDRESS_PO_BOX (1 << 15) #define LIBPOSTAL_ADDRESS_PO_BOX (1 << 15)
#define ADDRESS_ALL ((1 << 16) - 1) #define LIBPOSTAL_ADDRESS_ALL ((1 << 16) - 1)
typedef struct normalize_options { typedef struct libpostal_normalize_options {
// List of language codes // List of language codes
char **languages; char **languages;
size_t num_languages; size_t num_languages;
@@ -60,34 +60,34 @@ typedef struct normalize_options {
bool expand_numex; bool expand_numex;
bool roman_numerals; bool roman_numerals;
} normalize_options_t; } libpostal_normalize_options_t;
normalize_options_t get_libpostal_default_options(void); libpostal_normalize_options_t libpostal_get_default_options(void);
char **expand_address(char *input, normalize_options_t options, size_t *n); char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
void expansion_array_destroy(char **expansions, size_t n); void libpostal_expansion_array_destroy(char **expansions, size_t n);
/* /*
Address parser Address parser
*/ */
typedef struct address_parser_response { typedef struct libpostal_address_parser_response {
size_t num_components; size_t num_components;
char **components; char **components;
char **labels; char **labels;
} address_parser_response_t; } libpostal_address_parser_response_t;
typedef struct address_parser_options { typedef struct libpostal_address_parser_options {
char *language; char *language;
char *country; char *country;
} address_parser_options_t; } libpostal_address_parser_options_t;
void address_parser_response_destroy(address_parser_response_t *self); void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self);
address_parser_options_t get_libpostal_address_parser_default_options(void); libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void);
address_parser_response_t *parse_address(char *address, address_parser_options_t options); libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options);
// Setup/teardown methods // Setup/teardown methods

View File

@@ -11,7 +11,6 @@ LIBPOSTAL_S3_BUCKET_NAME="libpostal"
LIBPOSTAL_S3_KEY="s3://$LIBPOSTAL_S3_BUCKET_NAME" LIBPOSTAL_S3_KEY="s3://$LIBPOSTAL_S3_BUCKET_NAME"
LIBPOSTAL_S3_BUCKET_URL="http://$LIBPOSTAL_S3_BUCKET_NAME.s3.amazonaws.com" LIBPOSTAL_S3_BUCKET_URL="http://$LIBPOSTAL_S3_BUCKET_NAME.s3.amazonaws.com"
LIBPOSTAL_DATA_FILE="libpostal_data.tar.gz" LIBPOSTAL_DATA_FILE="libpostal_data.tar.gz"
LIBPOSTAL_GEODB_FILE="geodb.tar.gz"
LIBPOSTAL_PARSER_FILE="parser.tar.gz" LIBPOSTAL_PARSER_FILE="parser.tar.gz"
LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz" LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz"
@@ -22,12 +21,10 @@ LIBPOSTAL_DATA_DIR=$3
mkdir -p $LIBPOSTAL_DATA_DIR mkdir -p $LIBPOSTAL_DATA_DIR
LIBPOSTAL_DATA_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated LIBPOSTAL_DATA_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated
LIBPOSTAL_GEO_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_geo
LIBPOSTAL_PARSER_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_parser LIBPOSTAL_PARSER_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_parser
LIBPOSTAL_LANG_CLASS_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_language_classifier LIBPOSTAL_LANG_CLASS_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_language_classifier
BASIC_MODULE_DIRS="address_expansions numex transliteration" BASIC_MODULE_DIRS="address_expansions numex transliteration"
GEODB_MODULE_DIR=geodb
PARSER_MODULE_DIR=address_parser PARSER_MODULE_DIR=address_parser
LANGUAGE_CLASSIFIER_MODULE_DIR=language_classifier LANGUAGE_CLASSIFIER_MODULE_DIR=language_classifier
@@ -133,11 +130,10 @@ if [ $COMMAND = "download" ]; then
if [ $FILE = "base" ] || [ $FILE = "all" ]; then if [ $FILE = "base" ] || [ $FILE = "all" ]; then
download_file $LIBPOSTAL_DATA_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_DATA_FILE "data file" download_file $LIBPOSTAL_DATA_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_DATA_FILE "data file"
fi fi
if [ $FILE = "geodb" ] || [ $FILE = "all" ]; then
download_file $LIBPOSTAL_GEO_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_GEODB_FILE "geodb data file"
fi
if [ $FILE = "parser" ] || [ $FILE = "all" ]; then if [ $FILE = "parser" ] || [ $FILE = "all" ]; then
download_file $LIBPOSTAL_PARSER_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_PARSER_FILE "parser data file" latest_parser=$(curl --silent $LIBPOSTAL_S3_BUCKET_URL/models/address_parser/latest)
parser_filename="models/address_parser/$latest_parser/$LIBPOSTAL_PARSER_FILE"
download_file $LIBPOSTAL_PARSER_UPDATED_PATH $LIBPOSTAL_DATA_DIR $parser_filename "parser data file"
fi fi
if [ $FILE = "language_classifier" ] || [ $FILE = "all" ]; then if [ $FILE = "language_classifier" ] || [ $FILE = "all" ]; then
download_file $LIBPOSTAL_LANG_CLASS_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_LANG_CLASS_FILE "language classifier data file" download_file $LIBPOSTAL_LANG_CLASS_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_LANG_CLASS_FILE "language classifier data file"
@@ -150,11 +146,6 @@ elif [ $COMMAND = "upload" ]; then
aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILE $LIBPOSTAL_S3_KEY aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILE $LIBPOSTAL_S3_KEY
fi fi
if [ $FILE = "geodb" ] || [ $FILE = "all" ]; then
tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_GEODB_FILE $GEODB_MODULE_DIR
aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_GEODB_FILE $LIBPOSTAL_S3_KEY
fi
if [ $FILE = "parser" ] || [ $FILE = "all" ]; then if [ $FILE = "parser" ] || [ $FILE = "all" ]; then
tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_PARSER_FILE $PARSER_MODULE_DIR tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_PARSER_FILE $PARSER_MODULE_DIR
aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_PARSER_FILE $LIBPOSTAL_S3_KEY aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_PARSER_FILE $LIBPOSTAL_S3_KEY

View File

@@ -13,10 +13,10 @@
#define LIBPOSTAL_USAGE "Usage: ./libpostal address [...languages] [--json]\n" #define LIBPOSTAL_USAGE "Usage: ./libpostal address [...languages] [--json]\n"
static inline void print_output(char *address, normalize_options_t options, bool use_json) { static inline void print_output(char *address, libpostal_normalize_options_t options, bool use_json) {
size_t num_expansions; size_t num_expansions;
char **strings = expand_address(address, options, &num_expansions); char **strings = libpostal_expand_address(address, options, &num_expansions);
char *normalized; char *normalized;
@@ -79,7 +79,7 @@ int main(int argc, char **argv) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
normalize_options_t options = get_libpostal_default_options(); libpostal_normalize_options_t options = libpostal_get_default_options();
if (languages != NULL) { if (languages != NULL) {
options.languages = languages->a; options.languages = languages->a;

View File

@@ -8,9 +8,9 @@
SUITE(libpostal_expansion_tests); SUITE(libpostal_expansion_tests);
static greatest_test_res test_expansion_contains(char *input, char *output, normalize_options_t options) { static greatest_test_res test_expansion_contains(char *input, char *output, libpostal_normalize_options_t options) {
size_t num_expansions; size_t num_expansions;
char **expansions = expand_address(input, options, &num_expansions); char **expansions = libpostal_expand_address(input, options, &num_expansions);
bool contains_expansion = false; bool contains_expansion = false;
char *expansion; char *expansion;
@@ -23,6 +23,8 @@ static greatest_test_res test_expansion_contains(char *input, char *output, norm
} }
libpostal_expansion_array_destroy(expansions, num_expansions);
if (!contains_expansion) { if (!contains_expansion) {
printf("Expansions should contain %s, got {", output); printf("Expansions should contain %s, got {", output);
for (size_t i = 0; i < num_expansions; i++) { for (size_t i = 0; i < num_expansions; i++) {
@@ -36,7 +38,7 @@ static greatest_test_res test_expansion_contains(char *input, char *output, norm
PASS(); PASS();
} }
static greatest_test_res test_expansion_contains_with_languages(char *input, char *output, normalize_options_t options, size_t num_languages, ...) { static greatest_test_res test_expansion_contains_with_languages(char *input, char *output, libpostal_normalize_options_t options, size_t num_languages, ...) {
char **languages = NULL; char **languages = NULL;
size_t i; size_t i;
@@ -50,7 +52,7 @@ static greatest_test_res test_expansion_contains_with_languages(char *input, cha
for (i = 0; i < num_languages; i++) { for (i = 0; i < num_languages; i++) {
lang = va_arg(args, char *); lang = va_arg(args, char *);
ASSERT(strlen(lang) < MAX_LANGUAGE_LEN); ASSERT(strlen(lang) < LIBPOSTAL_MAX_LANGUAGE_LEN);
languages[i] = strdup(lang); languages[i] = strdup(lang);
} }
@@ -75,7 +77,7 @@ static greatest_test_res test_expansion_contains_with_languages(char *input, cha
TEST test_expansions(void) { TEST test_expansions(void) {
normalize_options_t options = get_libpostal_default_options(); libpostal_normalize_options_t options = libpostal_get_default_options();
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en")); CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en")); CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
@@ -86,7 +88,7 @@ TEST test_expansions(void) {
} }
TEST test_expansions_language_classifier(void) { TEST test_expansions_language_classifier(void) {
normalize_options_t options = get_libpostal_default_options(); libpostal_normalize_options_t options = libpostal_get_default_options();
CHECK_CALL(test_expansion_contains_with_languages("V XX Sett", "via 20 settembre", options, 0, NULL)); CHECK_CALL(test_expansion_contains_with_languages("V XX Sett", "via 20 settembre", options, 0, NULL));
CHECK_CALL(test_expansion_contains_with_languages("C/ Ocho", "calle 8", options, 0, NULL)); CHECK_CALL(test_expansion_contains_with_languages("C/ Ocho", "calle 8", options, 0, NULL));
@@ -94,7 +96,7 @@ TEST test_expansions_language_classifier(void) {
} }
TEST test_expansions_no_options(void) { TEST test_expansions_no_options(void) {
normalize_options_t options = get_libpostal_default_options(); libpostal_normalize_options_t options = libpostal_get_default_options();
options.lowercase = false; options.lowercase = false;
options.latin_ascii = false; options.latin_ascii = false;
options.transliterate = false; options.transliterate = false;

View File

@@ -14,8 +14,8 @@ typedef struct labeled_component {
char *component; char *component;
} labeled_component_t; } labeled_component_t;
static greatest_test_res test_parse_result_equals(char *input, address_parser_options_t options, size_t output_len, ...) { static greatest_test_res test_parse_result_equals(char *input, libpostal_address_parser_options_t options, size_t output_len, ...) {
address_parser_response_t *response = parse_address(input, options); libpostal_address_parser_response_t *response = libpostal_parse_address(input, options);
va_list args; va_list args;
@@ -56,11 +56,11 @@ static greatest_test_res test_parse_result_equals(char *input, address_parser_op
printf("%s: %s\n", response->labels[i], response->components[i]); printf("%s: %s\n", response->labels[i], response->components[i]);
} }
va_end(args); va_end(args);
address_parser_response_destroy(response); libpostal_address_parser_response_destroy(response);
FAIL(); FAIL();
} }
address_parser_response_destroy(response); libpostal_address_parser_response_destroy(response);
PASS(); PASS();
} }
@@ -68,7 +68,7 @@ static greatest_test_res test_parse_result_equals(char *input, address_parser_op
TEST test_us_parses(void) { TEST test_us_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
"Black Alliance for Just Immigration 660 Nostrand Ave, Brooklyn, N.Y., 11216", "Black Alliance for Just Immigration 660 Nostrand Ave, Brooklyn, N.Y., 11216",
@@ -631,7 +631,7 @@ TEST test_us_parses(void) {
} }
TEST test_ca_parses(void) { TEST test_ca_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From: https://github.com/openvenues/libpostal/issues/55 // From: https://github.com/openvenues/libpostal/issues/55
@@ -694,7 +694,7 @@ TEST test_ca_parses(void) {
} }
TEST test_jm_parses(void) { TEST test_jm_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From https://github.com/openvenues/libpostal/issues/113 // From https://github.com/openvenues/libpostal/issues/113
@@ -730,7 +730,7 @@ TEST test_jm_parses(void) {
TEST test_gb_parses(void) { TEST test_gb_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
@@ -1069,7 +1069,7 @@ TEST test_gb_parses(void) {
} }
TEST test_im_parses(void) { TEST test_im_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Multiple house names // Multiple house names
@@ -1089,7 +1089,7 @@ TEST test_im_parses(void) {
} }
TEST test_nz_parses(void) { TEST test_nz_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
"wellington new zealand", "wellington new zealand",
@@ -1103,7 +1103,7 @@ TEST test_nz_parses(void) {
} }
TEST test_fr_parses(void) { TEST test_fr_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From https://github.com/pelias/pelias/issues/426 // From https://github.com/pelias/pelias/issues/426
"Chambéry", "Chambéry",
@@ -1169,7 +1169,7 @@ TEST test_fr_parses(void) {
TEST test_es_parses(void) { TEST test_es_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
// Use Spanish toponym // Use Spanish toponym
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
@@ -1214,7 +1214,7 @@ TEST test_es_parses(void) {
} }
TEST test_co_parses(void) { TEST test_co_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
"Cra 18#63-64 B Chapinero Bogotá DC Colombia", "Cra 18#63-64 B Chapinero Bogotá DC Colombia",
@@ -1277,7 +1277,7 @@ TEST test_co_parses(void) {
} }
TEST test_mx_parses(void) { TEST test_mx_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
// From: https://github.com/openvenues/libpostal/issues/126 // From: https://github.com/openvenues/libpostal/issues/126
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
@@ -1309,7 +1309,7 @@ TEST test_mx_parses(void) {
TEST test_br_parses(void) { TEST test_br_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Brazil address with sem número (s/n) and CEP used with postal code // Brazil address with sem número (s/n) and CEP used with postal code
@@ -1328,7 +1328,7 @@ TEST test_br_parses(void) {
} }
TEST test_cn_parses(void) { TEST test_cn_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From https://github.com/openvenues/libpostal/issues/71 // From https://github.com/openvenues/libpostal/issues/71
@@ -1351,7 +1351,7 @@ TEST test_cn_parses(void) {
TEST test_jp_parses(void) { TEST test_jp_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Example of a Kanji address // Example of a Kanji address
@@ -1410,7 +1410,7 @@ TEST test_jp_parses(void) {
} }
TEST test_kr_parses(void) { TEST test_kr_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// English/Romanized Korean, ro + gil address, English unit // English/Romanized Korean, ro + gil address, English unit
@@ -1429,7 +1429,7 @@ TEST test_kr_parses(void) {
} }
TEST test_my_parses(void) { TEST test_my_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From https://github.com/openvenues/libpostal/issues/121 // From https://github.com/openvenues/libpostal/issues/121
@@ -1448,7 +1448,7 @@ TEST test_my_parses(void) {
} }
TEST test_za_parses(void) { TEST test_za_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Contains HTML entity which should be normalized // Contains HTML entity which should be normalized
@@ -1469,7 +1469,7 @@ TEST test_za_parses(void) {
} }
TEST test_de_parses(void) { TEST test_de_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
/* Contains German concatenated street suffix /* Contains German concatenated street suffix
@@ -1518,7 +1518,7 @@ TEST test_de_parses(void) {
TEST test_at_parses(void) { TEST test_at_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
"Eduard Sueß Gasse 9", "Eduard Sueß Gasse 9",
@@ -1592,7 +1592,7 @@ TEST test_at_parses(void) {
TEST test_nl_parses(void) { TEST test_nl_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From: https://github.com/openvenues/libpostal/issues/162 // From: https://github.com/openvenues/libpostal/issues/162
"Nieuwe Binnenweg 17-19, Oude Westen, Rotterdam NL", "Nieuwe Binnenweg 17-19, Oude Westen, Rotterdam NL",
@@ -1637,7 +1637,7 @@ TEST test_nl_parses(void) {
} }
TEST test_da_parses(void) { TEST test_da_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
"Valdemarsgade 42 4 t.v. København, 1665 Danmark", "Valdemarsgade 42 4 t.v. København, 1665 Danmark",
@@ -1655,7 +1655,7 @@ TEST test_da_parses(void) {
} }
TEST test_fi_parses(void) { TEST test_fi_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
"1 Hämeenkatu, Tampere, Finland", "1 Hämeenkatu, Tampere, Finland",
@@ -1679,7 +1679,7 @@ TEST test_fi_parses(void) {
} }
TEST test_no_parses(void) { TEST test_no_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// From: https://github.com/openvenues/libpostal/issues/39#issuecomment-221027220 // From: https://github.com/openvenues/libpostal/issues/39#issuecomment-221027220
@@ -1696,7 +1696,7 @@ TEST test_no_parses(void) {
} }
TEST test_se_parses(void) { TEST test_se_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Uses the "en trappa upp" (one floor up) form in Swedish addresses // Uses the "en trappa upp" (one floor up) form in Swedish addresses
@@ -1714,7 +1714,7 @@ TEST test_se_parses(void) {
} }
TEST test_hu_parses(void) { TEST test_hu_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Hungarian, 4-digit postal code // Hungarian, 4-digit postal code
@@ -1730,7 +1730,7 @@ TEST test_hu_parses(void) {
} }
TEST test_ro_parses(void) { TEST test_ro_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Romanian address with staircase // Romanian address with staircase
@@ -1751,7 +1751,7 @@ TEST test_ro_parses(void) {
TEST test_ru_parses(void) { TEST test_ru_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options(); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
CHECK_CALL(test_parse_result_equals( CHECK_CALL(test_parse_result_equals(
// Contains Cyrillic with abbreviations // Contains Cyrillic with abbreviations