[test] Adding automated parser tests to the C library

This commit is contained in:
Al
2016-02-17 17:19:03 -05:00
parent 43f41230b6
commit 37cfe8ab3b
3 changed files with 250 additions and 1 deletions

View File

@@ -9,6 +9,6 @@ CFLAGS = $(CFLAGS_BASE)
TESTS = test_libpostal
noinst_PROGRAMS = test_libpostal
test_libpostal_SOURCES = test.c test_expand.c test_transliterate.c test_numex.c test_trie.c
test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c
test_libpostal_LDADD = ../src/libpostal.la
test_libpostal_CFLAGS = $(CFLAGS_O3)

View File

@@ -1,6 +1,7 @@
#include "greatest.h"
SUITE_EXTERN(libpostal_expansion_tests);
SUITE_EXTERN(libpostal_parser_tests);
SUITE_EXTERN(libpostal_transliteration_tests);
SUITE_EXTERN(libpostal_numex_tests);
SUITE_EXTERN(libpostal_trie_tests);
@@ -12,6 +13,7 @@ int main(int argc, char **argv) {
GREATEST_MAIN_BEGIN();
RUN_SUITE(libpostal_expansion_tests);
RUN_SUITE(libpostal_parser_tests);
RUN_SUITE(libpostal_transliteration_tests);
RUN_SUITE(libpostal_numex_tests);
RUN_SUITE(libpostal_trie_tests);

247
test/test_parser.c Normal file
View File

@@ -0,0 +1,247 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include "greatest.h"
#include "../src/libpostal.h"
SUITE(libpostal_parser_tests);
typedef struct labeled_component {
char *label;
char *component;
} labeled_component_t;
static greatest_test_res test_parse_result_equals(char *input, address_parser_options_t options, size_t output_len, ...) {
address_parser_response_t *response = parse_address(input, options);
va_list args;
size_t i;
if (output_len != response->num_components) {
va_start(args, output_len);
printf("Expected\n\n");
for (i = 0; i < output_len; i++) {
labeled_component_t lc = va_arg(args, labeled_component_t);
printf("%s: %s\n", lc.label, lc.component);
}
printf("\n\n");
printf("Got\n\n");
for (i = 0; i < response->num_components; i++) {
printf("%s: %s\n", response->labels[i], response->components[i]);
}
va_end(args);
address_parser_response_destroy(response);
FAIL();
}
va_start(args, output_len);
for (i = 0; i < response->num_components; i++) {
labeled_component_t lc = va_arg(args, labeled_component_t);
ASSERT_STR_EQ(lc.label, response->labels[i]);
ASSERT_STR_EQ(lc.component, response->components[i]);
}
va_end(args);
address_parser_response_destroy(response);
PASS();
}
TEST test_us_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
CHECK_CALL(test_parse_result_equals(
"Barboncino 781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA",
options,
9,
(labeled_component_t){"house", "barboncino"},
(labeled_component_t){"house_number", "781"},
(labeled_component_t){"road", "franklin ave"},
(labeled_component_t){"suburb", "crown heights"},
(labeled_component_t){"city_district", "brooklyn"},
(labeled_component_t){"city", "nyc"},
(labeled_component_t){"state", "ny"},
(labeled_component_t){"postcode", "11216"},
(labeled_component_t){"country", "usa"}
));
PASS();
}
TEST test_uk_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
CHECK_CALL(test_parse_result_equals(
"The Book Club 100-106 Leonard St, Shoreditch, London, Greater London, England, EC2A 4RH, United Kingdom",
options,
9,
(labeled_component_t){"house", "the book club"},
(labeled_component_t){"house_number", "100-106"},
(labeled_component_t){"road", "leonard st"},
(labeled_component_t){"suburb", "shoreditch"},
(labeled_component_t){"city", "london"},
(labeled_component_t){"state_district", "greater london"},
(labeled_component_t){"state", "england"},
(labeled_component_t){"postcode", "ec2a 4rh"},
(labeled_component_t){"country", "united kingdom"}
));
CHECK_CALL(test_parse_result_equals(
"The Book Club 100-106 Leonard St Shoreditch London EC2A 4RH United Kingdom",
options,
7,
(labeled_component_t){"house", "the book club"},
(labeled_component_t){"house_number", "100-106"},
(labeled_component_t){"road", "leonard st"},
(labeled_component_t){"suburb", "shoreditch"},
(labeled_component_t){"city", "london"},
(labeled_component_t){"postcode", "ec2a 4rh"},
(labeled_component_t){"country", "united kingdom"}
));
PASS();
}
TEST test_es_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
// Use Spanish toponym
CHECK_CALL(test_parse_result_equals(
"Museo del Prado C. de Ruiz de Alarcón, 23 28014 Madrid, España",
options,
6,
(labeled_component_t){"house", "museo del prado"},
(labeled_component_t){"road", "c. de ruiz de alarcón"},
(labeled_component_t){"house_number", "23"},
(labeled_component_t){"postcode", "28014"},
(labeled_component_t){"city", "madrid"},
(labeled_component_t){"country", "españa"}
));
// Use English toponym
CHECK_CALL(test_parse_result_equals(
"Museo del Prado C. de Ruiz de Alarcón, 23 28014 Madrid, Spain",
options,
6,
(labeled_component_t){"house", "museo del prado"},
(labeled_component_t){"road", "c. de ruiz de alarcón"},
(labeled_component_t){"house_number", "23"},
(labeled_component_t){"postcode", "28014"},
(labeled_component_t){"city", "madrid"},
(labeled_component_t){"country", "spain"}
));
PASS();
}
TEST test_za_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
CHECK_CALL(test_parse_result_equals(
// Contains HTML entity which should be normalized
// Contains 4-digit postcode, which can be confusable with a house number
"Double Shot Tea &amp; Coffee 15 Melle St. Braamfontein Johannesburg, 2000, South Africa",
options,
7,
(labeled_component_t){"house", "double shot tea & coffee"},
(labeled_component_t){"house_number", "15"},
(labeled_component_t){"road", "melle st."},
(labeled_component_t){"suburb", "braamfontein"},
(labeled_component_t){"city", "johannesburg"},
(labeled_component_t){"postcode", "2000"},
(labeled_component_t){"country", "south africa"}
));
PASS();
}
TEST test_de_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
CHECK_CALL(test_parse_result_equals(
/* Contains Latin-ASCII normalizations
Contains German concatenated street suffix
N.B. We may want to move ä => ae out of the Latin-ASCII transliterator
which will change the output of this test to e.g. eschenbräu bräurei
*/
"Eschenbräu Bräurei Triftstraße 67 13353 Berlin Deutschland",
options,
6,
(labeled_component_t){"house", "eschenbraeu braeurei"},
(labeled_component_t){"road", "triftstrasse"},
(labeled_component_t){"house_number", "67"},
(labeled_component_t){"postcode", "13353"},
(labeled_component_t){"city", "berlin"},
(labeled_component_t){"country", "deutschland"}
));
PASS();
}
TEST test_hu_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
CHECK_CALL(test_parse_result_equals(
// Hungarian, 4-digit postal code
"Szimpla Kert Kazinczy utca 14 Budapest 1075, Magyarország",
options,
6,
(labeled_component_t){"house", "szimpla kert"},
(labeled_component_t){"road", "kazinczy utca"},
(labeled_component_t){"house_number", "14"},
(labeled_component_t){"city", "budapest"},
(labeled_component_t){"postcode", "1075"},
(labeled_component_t){"country", "magyarország"}
));
PASS();
}
TEST test_ru_parses(void) {
address_parser_options_t options = get_libpostal_address_parser_default_options();
CHECK_CALL(test_parse_result_equals(
// Contains Cyrillic with abbreviations
// Contains 6 digit postcode
// Contains script change, English toponyms
"Государственный Эрмитаж Дворцовая наб., 34 191186, St. Petersburg, Russia",
options,
6,
(labeled_component_t){"house", "государственный эрмитаж"},
(labeled_component_t){"road", "дворцовая наб."},
(labeled_component_t){"house_number", "34"},
(labeled_component_t){"postcode", "191186"},
(labeled_component_t){"city", "st. petersburg"},
(labeled_component_t){"country", "russia"}
));
PASS();
}
SUITE(libpostal_parser_tests) {
if (!libpostal_setup() || !libpostal_setup_parser()) {
printf("Could not setup libpostal\n");
exit(EXIT_FAILURE);
}
RUN_TEST(test_us_parses);
RUN_TEST(test_uk_parses);
RUN_TEST(test_es_parses);
RUN_TEST(test_za_parses);
RUN_TEST(test_de_parses);
RUN_TEST(test_hu_parses);
RUN_TEST(test_ru_parses);
libpostal_teardown();
libpostal_teardown_parser();
}