diff --git a/test/test_parser.c b/test/test_parser.c index db6a11cf..2a7b7446 100644 --- a/test/test_parser.c +++ b/test/test_parser.c @@ -5,6 +5,7 @@ #include "greatest.h" #include "../src/libpostal.h" +#include "../src/address_parser.h" SUITE(libpostal_parser_tests); @@ -19,7 +20,30 @@ static greatest_test_res test_parse_result_equals(char *input, address_parser_op va_list args; size_t i; - if (output_len != response->num_components) { + + + bool valid = output_len == response->num_components; + + if (valid) { + va_start(args, output_len); + + for (i = 0; i < response->num_components; i++) { + labeled_component_t lc = va_arg(args, labeled_component_t); + + if (!string_equals(lc.label, response->labels[i])) { + valid = false; + break; + } + if (!string_equals(lc.component, response->components[i])) { + valid = false; + break; + } + } + + va_end(args); + } + + if (!valid) { va_start(args, output_len); printf("Expected\n\n"); for (i = 0; i < output_len; i++) { @@ -36,17 +60,6 @@ static greatest_test_res test_parse_result_equals(char *input, address_parser_op FAIL(); } - va_start(args, output_len); - - for (i = 0; i < response->num_components; i++) { - labeled_component_t lc = va_arg(args, labeled_component_t); - - ASSERT_STR_EQ(lc.label, response->labels[i]); - ASSERT_STR_EQ(lc.component, response->components[i]); - } - - va_end(args); - address_parser_response_destroy(response); PASS(); @@ -58,6 +71,8 @@ TEST test_us_parses(void) { address_parser_options_t options = get_libpostal_address_parser_default_options(); CHECK_CALL(test_parse_result_equals( + // Rare venue name without any common venue tokens following it + // Neighborhood name "Barboncino 781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA", options, 9, @@ -71,11 +86,404 @@ TEST test_us_parses(void) { (labeled_component_t){"postcode", "11216"}, (labeled_component_t){"country", "usa"} )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/464 + "103 BEAL PKWY SE, FT WALTON BEACH, FL", + options, + 4, + (labeled_component_t){"house_number", "103"}, + (labeled_component_t){"road", "beal pkwy se"}, + (labeled_component_t){"city", "ft walton beach"}, + (labeled_component_t){"state", "fl"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/463 + "Canal Rd, Deltona FL", + options, + 3, + (labeled_component_t){"road", "canal rd"}, + (labeled_component_t){"city", "deltona"}, + (labeled_component_t){"state", "fl"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/125 + "123 Main St # 456 Oakland CA 94789", + options, + 6, + (labeled_component_t){"house_number", "123"}, + (labeled_component_t){"road", "main st"}, + (labeled_component_t){"unit", "# 456"}, + (labeled_component_t){"city", "oakland"}, + (labeled_component_t){"state", "ca"}, + (labeled_component_t){"postcode", "94789"} + + )); + + CHECK_CALL(test_parse_result_equals( + "123 Main St Apt 456 Oakland CA 94789", + options, + 6, + (labeled_component_t){"house_number", "123"}, + (labeled_component_t){"road", "main st"}, + (labeled_component_t){"unit", "apt 456"}, + (labeled_component_t){"city", "oakland"}, + (labeled_component_t){"state", "ca"}, + (labeled_component_t){"postcode", "94789"} + )); + + CHECK_CALL(test_parse_result_equals( + "123 Main St Apt #456 Oakland CA 94789", + options, + 6, + (labeled_component_t){"house_number", "123"}, + (labeled_component_t){"road", "main st"}, + (labeled_component_t){"unit", "apt #456"}, + (labeled_component_t){"city", "oakland"}, + (labeled_component_t){"state", "ca"}, + (labeled_component_t){"postcode", "94789"} + )); + + CHECK_CALL(test_parse_result_equals( + "123 Main St Apt No. 456 Oakland CA 94789", + options, + 6, + (labeled_component_t){"house_number", "123"}, + (labeled_component_t){"road", "main st"}, + (labeled_component_t){"unit", "apt no. 456"}, + (labeled_component_t){"city", "oakland"}, + (labeled_component_t){"state", "ca"}, + (labeled_component_t){"postcode", "94789"} + )); + + CHECK_CALL(test_parse_result_equals( + "whole foods nyc", + options, + 2, + (labeled_component_t){"house", "whole foods"}, + (labeled_component_t){"city", "nyc"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/427 + "921 83 street, nyc", + options, + 3, + (labeled_component_t){"house_number", "921"}, + (labeled_component_t){"road", "83 street"}, + (labeled_component_t){"city", "nyc"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/424 + "30 w 26 st", + options, + 2, + (labeled_component_t){"house_number", "30"}, + (labeled_component_t){"road", "w 26 st"} + )); + + CHECK_CALL(test_parse_result_equals( + "30 West 26th Street Sixth Floor", + options, + 3, + (labeled_component_t){"house_number", "30"}, + (labeled_component_t){"road", "west 26th street"}, + (labeled_component_t){"level", "sixth floor"} + )); + + CHECK_CALL(test_parse_result_equals( + "30 W 26th St 6th Fl", + options, + 3, + (labeled_component_t){"house_number", "30"}, + (labeled_component_t){"road", "w 26th st"}, + (labeled_component_t){"level", "6th fl"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/440 + "301 Commons Park S, Stamford, CT 06902", + options, + 5, + (labeled_component_t){"house_number", "301"}, + (labeled_component_t){"road", "commons park s"}, + (labeled_component_t){"city", "stamford"}, + (labeled_component_t){"state", "ct"}, + (labeled_component_t){"postcode", "06902"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/151 + // House number range + "912-914 8TH ST, CLARKSTON, WA 99403", + options, + 5, + (labeled_component_t){"house_number", "912-914"}, + (labeled_component_t){"road", "8th st"}, + (labeled_component_t){"city", "clarkston"}, + (labeled_component_t){"state", "wa"}, + (labeled_component_t){"postcode", "99403"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/151 + "2120 E Hill Street #104 Signal Hill CA 90755", + options, + 6, + (labeled_component_t){"house_number", "2120"}, + (labeled_component_t){"road", "e hill street"}, + (labeled_component_t){"unit", "#104"}, + (labeled_component_t){"city", "signal hill"}, + (labeled_component_t){"state", "ca"}, + (labeled_component_t){"postcode", "90755"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/151 + // 5-digit house number + "12200 Montecito Road #H206 Seal Beach 90740", + options, + 5, + (labeled_component_t){"house_number", "12200"}, + (labeled_component_t){"road", "montecito road"}, + (labeled_component_t){"unit", "#h206"}, + (labeled_component_t){"city", "seal beach"}, + (labeled_component_t){"postcode", "90740"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/151 + // space between mc and carroll + "1036-1038 MC CARROLL ST CLARKSTON WA 99403", + options, + 5, + (labeled_component_t){"house_number", "1036-1038"}, + (labeled_component_t){"road", "mc carroll st"}, + (labeled_component_t){"city", "clarkston"}, + (labeled_component_t){"state", "wa"}, + (labeled_component_t){"postcode", "99403"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/151 + // hyphenated house number + "2455-B W BENCH RD OTHELLO WA 99344", + options, + 5, + (labeled_component_t){"house_number", "2455-b"}, + (labeled_component_t){"road", "w bench rd"}, + (labeled_component_t){"city", "othello"}, + (labeled_component_t){"state", "wa"}, + (labeled_component_t){"postcode", "99344"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/151 + // city name is part of street + "473 Boston Rd, Wilbraham, MA", + options, + 4, + (labeled_component_t){"house_number", "473"}, + (labeled_component_t){"road", "boston rd"}, + (labeled_component_t){"city", "wilbraham"}, + (labeled_component_t){"state", "ma"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/45 + // House number is a valid postcode but not in context + // Postcode is a ZIP+4 so have to rely on masked digits + "25050 ALESSANDRO BLVD, STE B, MORENO VALLEY, CA, 92553-4313", + options, + 6, + (labeled_component_t){"house_number", "25050"}, + (labeled_component_t){"road", "alessandro blvd"}, + (labeled_component_t){"unit", "ste b"}, + (labeled_component_t){"city", "moreno valley"}, + (labeled_component_t){"state", "ca"}, + (labeled_component_t){"postcode", "92553-4313"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/21 + // PO box example + "PO Box 1, Seattle, WA 98103", + options, + 4, + (labeled_component_t){"po_box", "po box 1"}, + (labeled_component_t){"city", "seattle"}, + (labeled_component_t){"state", "wa"}, + (labeled_component_t){"postcode", "98103"} + )); + + + CHECK_CALL(test_parse_result_equals( + "4411 Stone Way North Seattle, King County, WA 98103", + options, + 6, + (labeled_component_t){"house_number", "4411"}, + (labeled_component_t){"road", "stone way north"}, + (labeled_component_t){"city", "seattle"}, + (labeled_component_t){"state_district", "king county"}, + (labeled_component_t){"state", "wa"}, + (labeled_component_t){"postcode", "98103"} + )); + + + // Tests of simple place names + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/114 + "Columbus, OH", + options, + 2, + (labeled_component_t){"city", "columbus"}, + (labeled_component_t){"state", "oh"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/114 + "San Francisco CA", + options, + 2, + (labeled_component_t){"city", "san francisco"}, + (labeled_component_t){"state", "ca"} + )); + + CHECK_CALL(test_parse_result_equals( + // Common alternative name for San Francicso + "SF CA", + options, + 2, + (labeled_component_t){"city", "sf"}, + (labeled_component_t){"state", "ca"} + )); + + CHECK_CALL(test_parse_result_equals( + // Carmel-by-the-Sea hyphenated + "Carmel-by-the-Sea, CA", + options, + 2, + (labeled_component_t){"city", "carmel-by-the-sea"}, + (labeled_component_t){"state", "ca"} + )); + + CHECK_CALL(test_parse_result_equals( + // Carmel-by-the-Sea de-hyphenated + "Carmel by the Sea, CA", + options, + 2, + (labeled_component_t){"city", "carmel by the sea"}, + (labeled_component_t){"state", "ca"} + )); + + // Disambiguation tests + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/53 + // Manhattan as city_district + "Manhattan, NY", + options, + 2, + (labeled_component_t){"city_district", "manhattan"}, + (labeled_component_t){"state", "ny"} + )); + + CHECK_CALL(test_parse_result_equals( + // Manhattan, Kansas - city + "Manhattan, KS", + options, + 2, + (labeled_component_t){"city", "manhattan"}, + (labeled_component_t){"state", "ks"} + )); + + CHECK_CALL(test_parse_result_equals( + // Brooklyn, NY - city_district + "Brooklyn, NY", + options, + 2, + (labeled_component_t){"city_district", "brooklyn"}, + (labeled_component_t){"state", "ny"} + )); + + CHECK_CALL(test_parse_result_equals( + // Brooklyn, Connecticut - city + "Brooklyn CT", + options, + 2, + (labeled_component_t){"city", "brooklyn"}, + (labeled_component_t){"state", "ct"} + )); + + CHECK_CALL(test_parse_result_equals( + // Here Brooklyn CT means "Brooklyn Court", a small street in Oregon + "18312 SE Brooklyn CT Gresham OR", + options, + 4, + (labeled_component_t){"house_number", "18312"}, + (labeled_component_t){"road", "se brooklyn ct"}, + (labeled_component_t){"city", "gresham"}, + (labeled_component_t){"state", "or"} + )); + PASS(); } +TEST test_ca_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); -TEST test_uk_parses(void) { + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/55 + "332 Menzies Street, Victoria, BC V8V 2G9", + options, + 5, + (labeled_component_t){"house_number", "332"}, + (labeled_component_t){"road", "menzies street"}, + (labeled_component_t){"city", "victoria"}, + (labeled_component_t){"state", "bc"}, + (labeled_component_t){"postcode", "v8v 2g9"} + )); +} + +TEST test_jm_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/113 + // Kingston postcode, rare case where single-digit number is a postcode + // Uses W.I for "West Indies" + "237 Old Hope Road, Kingston 6, Jamaica W.I", + options, + 6, + (labeled_component_t){"house_number", "237"}, + (labeled_component_t){"road", "old hope road"}, + (labeled_component_t){"city", "kingston"}, + (labeled_component_t){"postcode", "6"}, + (labeled_component_t){"country", "jamaica"}, + (labeled_component_t){"world_region", "w.i"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/113 + // Fractional house number + "16 1/2 Windward Road, Kingston 2, Jamaica", + options, + 5, + (labeled_component_t){"house_number", "16 1/2"}, + (labeled_component_t){"road", "windward road"}, + (labeled_component_t){"city", "kingston"}, + (labeled_component_t){"postcode", "2"}, + (labeled_component_t){"country", "jamaica"} + + )); + +} + + +TEST test_gb_parses(void) { address_parser_options_t options = get_libpostal_address_parser_default_options(); @@ -107,6 +515,139 @@ TEST test_uk_parses(void) { (labeled_component_t){"country", "united kingdom"} )); + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openownership/data-standard/issues/18 + "Aston House, Cornwall Avenue, London, N3 1LF", + options, + 4, + (labeled_component_t){"house", "aston house"}, + (labeled_component_t){"road", "cornwall avenue"}, + (labeled_component_t){"city", "london"}, + (labeled_component_t){"postcode", "n3 1lf"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/39 + "318 Upper Street, N1 2XQ London", + options, + 4, + (labeled_component_t){"house_number", "318"}, + (labeled_component_t){"road", "upper street"}, + (labeled_component_t){"postcode", "n1 2xq"}, + (labeled_component_t){"city", "london"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/39 + "21, Kingswood Road SW2 4JE, London", + options, + 4, + (labeled_component_t){"house_number", "21"}, + (labeled_component_t){"road", "kingswood road"}, + (labeled_component_t){"postcode", "sw2 4je"}, + (labeled_component_t){"city", "london"} + )); + + CHECK_CALL(test_parse_result_equals( + // From Moz tests + "1 Riverside Dr Liverpool, Merseyside L3 4EN", + options, + 5, + (labeled_component_t){"house_number", "1"}, + (labeled_component_t){"road", "riverside dr"}, + (labeled_component_t){"city", "liverpool"}, + (labeled_component_t){"state_district", "merseyside"}, + (labeled_component_t){"postcode", "l3 4en"} + )); + + CHECK_CALL(test_parse_result_equals( + "Knutsford, Cheshire East WA16 9EX, UK", + options, + 4, + (labeled_component_t){"city", "knutsford"}, + (labeled_component_t){"state_district", "cheshire east"}, + (labeled_component_t){"postcode", "wa16 9ex"}, + (labeled_component_t){"country", "uk"} + )); + + PASS(); +} + +TEST test_nz_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + "wellington new zealand", + options, + 2, + (labeled_component_t){"city", "wellington"}, + (labeled_component_t){"country", "new zealand"} + )); + + PASS(); +} + +TEST test_fr_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/426 + "Chambéry", + options, + 1, + (labeled_component_t){"city", "chambéry"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/pelias/pelias/issues/426 + "Chambery", + options, + 1, + (labeled_component_t){"city", "chambery"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/114 + "Paris, France", + options, + 2, + (labeled_component_t){"city", "paris"}, + (labeled_component_t){"country", "france"} + )); + + CHECK_CALL(test_parse_result_equals( + // Variant of above + "Paris", + options, + 1, + (labeled_component_t){"city", "paris"} + )); + + CHECK_CALL(test_parse_result_equals( + // Variant of above + "Paris, FR", + options, + 2, + (labeled_component_t){"city", "paris"}, + (labeled_component_t){"country", "fr"} + )); + + CHECK_CALL(test_parse_result_equals( + // Arrondissement Roman numerals + "IXe arrondissement Paris", + options, + 2, + (labeled_component_t){"city_district", "ixe arrondissement"}, + (labeled_component_t){"city", "paris"} + )); + + CHECK_CALL(test_parse_result_equals( + // Arrondissement Arabic numerals + "9e arrondissement Paris", + options, + 2, + (labeled_component_t){"city_district", "9e arrondissement"}, + (labeled_component_t){"city", "paris"} + )); PASS(); } @@ -121,11 +662,11 @@ TEST test_es_parses(void) { options, 6, (labeled_component_t){"house", "museo del prado"}, - (labeled_component_t){"road", "c. de ruiz de alarcón"}, + (labeled_component_t){"road", "c. de ruiz de alarcón"}, (labeled_component_t){"house_number", "23"}, (labeled_component_t){"postcode", "28014"}, (labeled_component_t){"city", "madrid"}, - (labeled_component_t){"country", "españa"} + (labeled_component_t){"country", "españa"} )); // Use English toponym @@ -134,13 +675,253 @@ TEST test_es_parses(void) { options, 6, (labeled_component_t){"house", "museo del prado"}, - (labeled_component_t){"road", "c. de ruiz de alarcón"}, + (labeled_component_t){"road", "c. de ruiz de alarcón"}, (labeled_component_t){"house_number", "23"}, (labeled_component_t){"postcode", "28014"}, (labeled_component_t){"city", "madrid"}, (labeled_component_t){"country", "spain"} )); + CHECK_CALL(test_parse_result_equals( + // Spanish-style floor number + "Paseo de la Castellana, 185 - 5º, 28046 Madrid Madrid", + options, + 6, + (labeled_component_t){"road", "paseo de la castellana"}, + (labeled_component_t){"house_number", "185"}, + (labeled_component_t){"level", "5º"}, + (labeled_component_t){"postcode", "28046"}, + (labeled_component_t){"city", "madrid"}, + (labeled_component_t){"state", "madrid"} + )); + + CHECK_CALL(test_parse_result_equals( + // Spanish-style floor number + side (unit) + "Av. de las Delicias, 14, 1º Dcha, 28045 Madrid", + options, + 6, + (labeled_component_t){"road", "av. de las delicias"}, + (labeled_component_t){"house_number", "14"}, + (labeled_component_t){"level", "1º"}, + (labeled_component_t){"unit", "dcha"}, + (labeled_component_t){"postcode", "28045"}, + (labeled_component_t){"city", "madrid"} + )); + PASS(); +} + +TEST test_co_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + "Cra 18#63-64 B Chapinero Bogotá DC Colombia", + options, + 5, + (labeled_component_t){"road", "cra 18"}, + (labeled_component_t){"house_number", "#63-64 b"}, + (labeled_component_t){"city_district", "chapinero"}, + (labeled_component_t){"city", "bogotá dc"}, + (labeled_component_t){"country", "colombia"} + )); + + CHECK_CALL(test_parse_result_equals( + "Bogotá Colombia", + options, + 2, + (labeled_component_t){"city", "bogotá"}, + (labeled_component_t){"country", "colombia"} + )); + + // Test with country code (could also be Colorado, company, etc.) + CHECK_CALL(test_parse_result_equals( + "Bogotá CO", + options, + 2, + (labeled_component_t){"city", "bogotá"}, + (labeled_component_t){"country", "co"} + )); + + // Same tests without accent + CHECK_CALL(test_parse_result_equals( + "Cra 18#63-64 B Chapinero Bogota DC Colombia", + options, + 5, + (labeled_component_t){"road", "cra 18"}, + (labeled_component_t){"house_number", "#63-64 b"}, + (labeled_component_t){"city_district", "chapinero"}, + (labeled_component_t){"city", "bogota dc"}, + (labeled_component_t){"country", "colombia"} + )); + + CHECK_CALL(test_parse_result_equals( + "Bogota Colombia", + options, + 2, + (labeled_component_t){"city", "bogota"}, + (labeled_component_t){"country", "colombia"} + )); + + CHECK_CALL(test_parse_result_equals( + "Bogota CO", + options, + 2, + (labeled_component_t){"city", "bogota"}, + (labeled_component_t){"country", "co"} + )); + + + PASS(); +} + +TEST test_mx_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + // From: https://github.com/openvenues/libpostal/issues/126 + CHECK_CALL(test_parse_result_equals( + "LÓPEZ MATEOS, 106, 21840, MEXICALI, baja-california, mx", + options, + 6, + (labeled_component_t){"road", "lópez mateos"}, + (labeled_component_t){"house_number", "106"}, + (labeled_component_t){"postcode", "21840"}, + (labeled_component_t){"city", "mexicali"}, + (labeled_component_t){"state", "baja-california"}, + (labeled_component_t){"country", "mx"} + )); + + CHECK_CALL(test_parse_result_equals( + "LORENZO DE ZOVELA, 1126, 22715, PLAYAS DE ROSARITO, baja-california, mx", + options, + 6, + (labeled_component_t){"road", "lorenzo de zovela"}, + (labeled_component_t){"house_number", "1126"}, + (labeled_component_t){"postcode", "22715"}, + (labeled_component_t){"city", "playas de rosarito"}, + (labeled_component_t){"state", "baja-california"}, + (labeled_component_t){"country", "mx"} + )); + + PASS(); +} + + +TEST test_cn_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/71 + // Level, unit, road name containing a city (Hong Kong) + "中国,山东省,青岛市 香港东路6号,5号楼,8号室 李小方 先生收", + options, + 8, + (labeled_component_t){"country", "中国"}, + (labeled_component_t){"state", "山东省"}, + (labeled_component_t){"city", "青岛市"}, + (labeled_component_t){"road", "香港东路"}, + (labeled_component_t){"house_number", "6号"}, + (labeled_component_t){"level", "5号楼"}, + (labeled_component_t){"unit", "8号室"}, + (labeled_component_t){"house", "李小方 先生收"} + )); + PASS(); +} + + + +TEST test_jp_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // Example of a Kanji address + "〒601-8446京都市南区西九条高畠町25-1京都醸造株式会社", + options, + 6, + (labeled_component_t){"postcode", "〒601-8446"}, + (labeled_component_t){"city", "京都市"}, + (labeled_component_t){"city_district", "南区"}, + (labeled_component_t){"suburb", "西九条高畠町"}, + (labeled_component_t){"house_number", "25-1"}, + (labeled_component_t){"house", "京都醸造株式会社"} + )); + + CHECK_CALL(test_parse_result_equals( + // Ban-go style house number, level and unit + "日本国〒113-0001文京区4丁目3番2号3階323号室", + options, + 7, + (labeled_component_t){"country", "日本国"}, + (labeled_component_t){"postcode", "〒113-0001"}, + (labeled_component_t){"city", "文京区"}, + (labeled_component_t){"suburb", "4丁目"}, + (labeled_component_t){"house_number", "3番2号"}, + (labeled_component_t){"level", "3階"}, + (labeled_component_t){"unit", "323号室"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/123 + // University (slightly ambiguous i.e. the 2nd "Osaka" can be part of a campus name) + // English toponyms + "osaka university, osaka, japan, 565-0871", + options, + 4, + (labeled_component_t){"house", "osaka university"}, + (labeled_component_t){"city", "osaka"}, + (labeled_component_t){"country", "japan"}, + (labeled_component_t){"postcode", "565-0871"} + )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/62 + // Romaji + // Has road name (I think?) + "135, Higashifunahashi 2 Chome, Hirakata-shi Osaka-fu", + options, + 5, + (labeled_component_t){"house_number", "135"}, + (labeled_component_t){"road", "higashifunahashi"}, + (labeled_component_t){"suburb", "2 chome"}, + (labeled_component_t){"city", "hirakata-shi"}, + (labeled_component_t){"state", "osaka-fu"} + + )); + PASS(); +} + +TEST test_kr_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // English/Romanized Korean, ro + gil address, English unit + "Suite 1005, 36, Teheran-ro 87-gil, Gangnam-gu Seoul 06164 Republic of Korea", + options, + 7, + (labeled_component_t){"unit", "suite 1005"}, + (labeled_component_t){"house_number", "36"}, + (labeled_component_t){"road", "teheran-ro 87-gil"}, + (labeled_component_t){"city_district", "gangnam-gu"}, + (labeled_component_t){"city", "seoul"}, + (labeled_component_t){"postcode", "06164"}, + (labeled_component_t){"country", "republic of korea"} + )); + PASS(); +} + +TEST test_my_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/121 + // Not adding the block format yet in case we change how it's parsed + "IBS Centre Jalan Chan Sow Lin, 55200 Kuala Lumpur, Malaysia", + options, + 5, + (labeled_component_t){"house", "ibs centre"}, + (labeled_component_t){"road", "jalan chan sow lin"}, + (labeled_component_t){"postcode", "55200"}, + (labeled_component_t){"city", "kuala lumpur"}, + (labeled_component_t){"country", "malaysia"} + )); PASS(); } @@ -150,7 +931,7 @@ TEST test_za_parses(void) { CHECK_CALL(test_parse_result_equals( // Contains HTML entity which should be normalized // Contains 4-digit postcode, which can be confusable with a house number - "Double Shot Tea & Coffee 15 Melle St. Braamfontein Johannesburg, 2000, South Africa", + "Double Shot Tea & Coffee 15 Melle St. Braamfontein Johannesburg, 2001, South Africa", options, 7, (labeled_component_t){"house", "double shot tea & coffee"}, @@ -158,7 +939,7 @@ TEST test_za_parses(void) { (labeled_component_t){"road", "melle st."}, (labeled_component_t){"suburb", "braamfontein"}, (labeled_component_t){"city", "johannesburg"}, - (labeled_component_t){"postcode", "2000"}, + (labeled_component_t){"postcode", "2001"}, (labeled_component_t){"country", "south africa"} )); PASS(); @@ -169,8 +950,7 @@ TEST test_de_parses(void) { address_parser_options_t options = get_libpostal_address_parser_default_options(); CHECK_CALL(test_parse_result_equals( - /* Contains Latin-ASCII normalizations - Contains German concatenated street suffix + /* Contains German concatenated street suffix N.B. We may want to move ä => ae out of the Latin-ASCII transliterator which will change the output of this test to e.g. eschenbräu bräurei @@ -178,13 +958,235 @@ TEST test_de_parses(void) { "Eschenbräu Bräurei Triftstraße 67 13353 Berlin Deutschland", options, 6, - (labeled_component_t){"house", "eschenbraeu braeurei"}, + (labeled_component_t){"house", "eschenbräu bräurei"}, + (labeled_component_t){"road", "triftstraße"}, + (labeled_component_t){"house_number", "67"}, + (labeled_component_t){"postcode", "13353"}, + (labeled_component_t){"city", "berlin"}, + (labeled_component_t){"country", "deutschland"} + )); + + // Test transliterated versions + CHECK_CALL(test_parse_result_equals( + "Eschenbrau Braurei Triftstrasse 67 13353 Berlin Deutschland", + options, + 6, + (labeled_component_t){"house", "eschenbrau braurei"}, (labeled_component_t){"road", "triftstrasse"}, (labeled_component_t){"house_number", "67"}, (labeled_component_t){"postcode", "13353"}, (labeled_component_t){"city", "berlin"}, (labeled_component_t){"country", "deutschland"} )); + + CHECK_CALL(test_parse_result_equals( + "Eschenbraeu Braeurei Triftstrasse 67 13353 Berlin DE", + options, + 6, + (labeled_component_t){"house", "eschenbraeu braeurei"}, + (labeled_component_t){"road", "triftstrasse"}, + (labeled_component_t){"house_number", "67"}, + (labeled_component_t){"postcode", "13353"}, + (labeled_component_t){"city", "berlin"}, + (labeled_component_t){"country", "de"} + )); + + PASS(); +} + + +TEST test_at_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + "Eduard Sueß Gasse 9", + options, + 2, + (labeled_component_t){"road", "eduard sueß gasse"}, + (labeled_component_t){"house_number", "9"} + )); + + CHECK_CALL(test_parse_result_equals( + "Eduard-Sueß Gasse 9", + options, + 2, + (labeled_component_t){"road", "eduard-sueß gasse"}, + (labeled_component_t){"house_number", "9"} + )); + + CHECK_CALL(test_parse_result_equals( + "Eduard-Sueß-Gasse 9", + options, + 2, + (labeled_component_t){"road", "eduard-sueß-gasse"}, + (labeled_component_t){"house_number", "9"} + )); + + CHECK_CALL(test_parse_result_equals( + "Eduard Sueß-Gasse 9", + options, + 2, + (labeled_component_t){"road", "eduard sueß-gasse"}, + (labeled_component_t){"house_number", "9"} + )); + + // From https://github.com/openvenues/libpostal/issues/128 + CHECK_CALL(test_parse_result_equals( + "Wien, Österreich", + options, + 2, + (labeled_component_t){"city", "wien"}, + (labeled_component_t){"country", "österreich"} + )); + + // Transliterations + CHECK_CALL(test_parse_result_equals( + "Wien, Osterreich", + options, + 2, + (labeled_component_t){"city", "wien"}, + (labeled_component_t){"country", "osterreich"} + )); + + CHECK_CALL(test_parse_result_equals( + "Wien, Oesterreich", + options, + 2, + (labeled_component_t){"city", "wien"}, + (labeled_component_t){"country", "oesterreich"} + )); + + CHECK_CALL(test_parse_result_equals( + // English names + "Vienna, Austria", + options, + 2, + (labeled_component_t){"city", "vienna"}, + (labeled_component_t){"country", "austria"} + )); + + PASS(); +} + + +TEST test_nl_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/162 + "Nieuwe Binnenweg 17-19, Oude Westen, Rotterdam NL", + options, + 5, + (labeled_component_t){"road", "nieuwe binnenweg"}, + (labeled_component_t){"house_number", "17-19"}, + (labeled_component_t){"suburb", "oude westen"}, + (labeled_component_t){"city", "rotterdam"}, + (labeled_component_t){"country", "nl"} + )); + + CHECK_CALL(test_parse_result_equals( + "Nieuwe Binnenweg 17-19, Oude Westen, Rotterdam", + options, + 4, + (labeled_component_t){"road", "nieuwe binnenweg"}, + (labeled_component_t){"house_number", "17-19"}, + (labeled_component_t){"suburb", "oude westen"}, + (labeled_component_t){"city", "rotterdam"} + )); + + CHECK_CALL(test_parse_result_equals( + "Oude Westen, Rotterdam", + options, + 2, + (labeled_component_t){"suburb", "oude westen"}, + (labeled_component_t){"city", "rotterdam"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/75 + "Olympia 1 A begane gro", + options, + 3, + (labeled_component_t){"road", "olympia"}, + (labeled_component_t){"house_number", "1 a"}, + (labeled_component_t){"level", "begane gro"} + )); + + PASS(); +} + +TEST test_da_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + "Valdemarsgade 42 4 t.v. København, 1665 Danmark", + options, + 6, + (labeled_component_t){"road", "valdemarsgade"}, + (labeled_component_t){"house_number", "42"}, + (labeled_component_t){"unit", "4 t.v."}, + (labeled_component_t){"city", "københavn"}, + (labeled_component_t){"postcode", "1665"}, + (labeled_component_t){"country", "danmark"} + )); + + PASS(); +} + +TEST test_fi_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + "1 Hämeenkatu, Tampere, Finland", + options, + 4, + (labeled_component_t){"house_number", "1"}, + (labeled_component_t){"road", "hämeenkatu"}, + (labeled_component_t){"city", "tampere"}, + (labeled_component_t){"country", "finland"} + )); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/111 + "Pitkämäentie", + options, + 1, + (labeled_component_t){"road", "pitkämäentie"} + )); + + PASS(); +} + +TEST test_no_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // From: https://github.com/openvenues/libpostal/issues/39#issuecomment-221027220 + "Sars gate 2 A, 562 OSLO", + options, + 4, + (labeled_component_t){"road", "sars gate"}, + (labeled_component_t){"house_number", "2 a"}, + (labeled_component_t){"postcode", "562"}, + (labeled_component_t){"city", "oslo"} + )); + PASS(); +} + +TEST test_se_parses(void) { + address_parser_options_t options = get_libpostal_address_parser_default_options(); + + CHECK_CALL(test_parse_result_equals( + // Uses the "en trappa upp" (one floor up) form in Swedish addresses + "Storgatan 1 en trappa upp 112 01 Stockholm Sweden", + options, + 6, + (labeled_component_t){"road", "storgatan"}, + (labeled_component_t){"house_number", "1"}, + (labeled_component_t){"level", "en trappa upp"}, + (labeled_component_t){"postcode", "112 01"}, + (labeled_component_t){"city", "stockholm"}, + (labeled_component_t){"country", "sweden"} + )); PASS(); } @@ -193,20 +1195,17 @@ TEST test_hu_parses(void) { CHECK_CALL(test_parse_result_equals( // Hungarian, 4-digit postal code - "Szimpla Kert Kazinczy utca 14 Budapest 1075, Magyarország", + "1075 Budapest kazinczy utca, 14", options, - 6, - (labeled_component_t){"house", "szimpla kert"}, - (labeled_component_t){"road", "kazinczy utca"}, - (labeled_component_t){"house_number", "14"}, - (labeled_component_t){"city", "budapest"}, + 4, (labeled_component_t){"postcode", "1075"}, - (labeled_component_t){"country", "magyarország"} + (labeled_component_t){"city", "budapest"}, + (labeled_component_t){"road", "kazinczy utca"}, + (labeled_component_t){"house_number", "14"} )); PASS(); } - TEST test_ru_parses(void) { address_parser_options_t options = get_libpostal_address_parser_default_options(); @@ -217,13 +1216,68 @@ TEST test_ru_parses(void) { "Государственный Эрмитаж Дворцовая наб., 34 191186, St. Petersburg, Russia", options, 6, - (labeled_component_t){"house", "государственный эрмитаж"}, + (labeled_component_t){"house", "государственный эрмитаж"}, (labeled_component_t){"road", "дворцовая наб."}, (labeled_component_t){"house_number", "34"}, (labeled_component_t){"postcode", "191186"}, (labeled_component_t){"city", "st. petersburg"}, (labeled_component_t){"country", "russia"} )); + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/138 + "Петрозаводск Карелия Российская Федерация", + options, + 3, + (labeled_component_t){"city", "петрозаводск"}, + (labeled_component_t){"state", "карелия"}, + (labeled_component_t){"country", "российская федерация"} + )); + + + CHECK_CALL(test_parse_result_equals( + // From https://github.com/openvenues/libpostal/issues/138 + "Автолюбителейроезд 24 Петрозаводск Карелия Российская Федерация 185013", + options, + 6, + (labeled_component_t){"road", "автолюбителейроезд"}, + (labeled_component_t){"house_number", "24"}, + (labeled_component_t){"city", "петрозаводск"}, + (labeled_component_t){"state", "карелия"}, + (labeled_component_t){"country", "российская федерация"}, + (labeled_component_t){"postcode", "185013"} + )); + + CHECK_CALL(test_parse_result_equals( + // Old Soviet format, from https://github.com/openvenues/libpostal/issues/125#issuecomment-269319652 + // Uses "г."" prefix for the city + // Uses "д." for house number + // Has apartment number with "кв." + "197198, г. Санкт-Петербург, ул. Съезжинская д. 10 кв. 40", + options, + 5, + (labeled_component_t){"postcode", "197198"}, + (labeled_component_t){"city", "г. санкт-петербург"}, + (labeled_component_t){"road", "ул. съезжинская"}, + (labeled_component_t){"house_number", "д. 10"}, + (labeled_component_t){"unit", "кв. 40"} + )); + + /* + CHECK_CALL(test_parse_result_equals( + // Uses genitive place names, see https://github.com/openvenues/libpostal/issues/125#issuecomment-269438636 + "188541, г. Сосновый Бор Ленинградской области, пр. Героев 40, кв. 400", + options, + 6, + (labeled_component_t){"postcode", "188541"}, + (labeled_component_t){"city", "г. сосновый бор"}, + (labeled_component_t){"state", "ленинградской области"}, + (labeled_component_t){"road", "пр. героев"}, + (labeled_component_t){"house_number", "40"}, + (labeled_component_t){"unit", "кв. 400"} + )); + */ + PASS(); } @@ -234,14 +1288,28 @@ SUITE(libpostal_parser_tests) { } RUN_TEST(test_us_parses); - RUN_TEST(test_uk_parses); + RUN_TEST(test_jm_parses); + RUN_TEST(test_gb_parses); + RUN_TEST(test_nz_parses); + RUN_TEST(test_fr_parses); RUN_TEST(test_es_parses); + RUN_TEST(test_co_parses); + RUN_TEST(test_mx_parses); + RUN_TEST(test_cn_parses); + RUN_TEST(test_jp_parses); + RUN_TEST(test_kr_parses); + RUN_TEST(test_my_parses); RUN_TEST(test_za_parses); RUN_TEST(test_de_parses); + RUN_TEST(test_at_parses); + RUN_TEST(test_nl_parses); + RUN_TEST(test_da_parses); + RUN_TEST(test_fi_parses); + RUN_TEST(test_no_parses); + RUN_TEST(test_se_parses); RUN_TEST(test_hu_parses); RUN_TEST(test_ru_parses); libpostal_teardown(); libpostal_teardown_parser(); } -