Initial fork commit

This commit is contained in:
2025-09-06 22:03:29 -04:00
commit 2d238cd339
1748 changed files with 932506 additions and 0 deletions

BIN
test/.libs/test_libpostal Executable file

Binary file not shown.

14
test/Makefile.am Normal file
View File

@@ -0,0 +1,14 @@
CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g
CFLAGS_O0 = $(CFLAGS_BASE) -O0
CFLAGS_O1 = $(CFLAGS_BASE) -O1
CFLAGS_O2 = $(CFLAGS_BASE) -O2
CFLAGS_O3 = $(CFLAGS_BASE) -O3
DEFAULT_INCLUDES = -I.. -I/usr/local/include
CFLAGS = $(CFLAGS_BASE)
TESTS = test_libpostal
noinst_PROGRAMS = test_libpostal
test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c ../src/strndup.c ../src/file_utils.c ../src/string_utils.c ../src/utf8proc/utf8proc.c ../src/trie.c ../src/trie_search.c ../src/transliterate.c ../src/numex.c ../src/features.c
test_libpostal_LDADD = ../src/libpostal.la ../src/libscanner.la $(CBLAS_LIBS)
test_libpostal_CFLAGS = $(CFLAGS_O3)

1356
test/Makefile.in Normal file

File diff suppressed because it is too large Load Diff

908
test/greatest.h Normal file
View File

@@ -0,0 +1,908 @@
/*
* Copyright (c) 2011-2015 Scott Vokes <vokes.s@gmail.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef GREATEST_H
#define GREATEST_H
/* 1.1.0 */
#define GREATEST_VERSION_MAJOR 1
#define GREATEST_VERSION_MINOR 1
#define GREATEST_VERSION_PATCH 0
/* A unit testing system for C, contained in 1 file.
* It doesn't use dynamic allocation or depend on anything
* beyond ANSI C89.
*
* An up-to-date version can be found at:
* https://github.com/silentbicycle/greatest/
*/
/*********************************************************************
* Minimal test runner template
*********************************************************************/
#if 0
#include "greatest.h"
TEST foo_should_foo(void) {
PASS();
}
static void setup_cb(void *data) {
printf("setup callback for each test case\n");
}
static void teardown_cb(void *data) {
printf("teardown callback for each test case\n");
}
SUITE(suite) {
/* Optional setup/teardown callbacks which will be run before/after
* every test case. If using a test suite, they will be cleared when
* the suite finishes. */
SET_SETUP(setup_cb, voidp_to_callback_data);
SET_TEARDOWN(teardown_cb, voidp_to_callback_data);
RUN_TEST(foo_should_foo);
}
/* Add definitions that need to be in the test runner's main file. */
GREATEST_MAIN_DEFS();
/* Set up, run suite(s) of tests, report pass/fail/skip stats. */
int run_tests(void) {
GREATEST_INIT(); /* init. greatest internals */
/* List of suites to run (if any). */
RUN_SUITE(suite);
/* Tests can also be run directly, without using test suites. */
RUN_TEST(foo_should_foo);
GREATEST_PRINT_REPORT(); /* display results */
return greatest_all_passed();
}
/* main(), for a standalone command-line test runner.
* This replaces run_tests above, and adds command line option
* handling and exiting with a pass/fail status. */
int main(int argc, char **argv) {
GREATEST_MAIN_BEGIN(); /* init & parse command-line args */
RUN_SUITE(suite);
GREATEST_MAIN_END(); /* display results */
}
#endif
/*********************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/***********
* Options *
***********/
/* Default column width for non-verbose output. */
#ifndef GREATEST_DEFAULT_WIDTH
#define GREATEST_DEFAULT_WIDTH 72
#endif
/* FILE *, for test logging. */
#ifndef GREATEST_STDOUT
#define GREATEST_STDOUT stdout
#endif
/* Remove GREATEST_ prefix from most commonly used symbols? */
#ifndef GREATEST_USE_ABBREVS
#define GREATEST_USE_ABBREVS 1
#endif
/* Set to 0 to disable all use of setjmp/longjmp. */
#ifndef GREATEST_USE_LONGJMP
#define GREATEST_USE_LONGJMP 1
#endif
#if GREATEST_USE_LONGJMP
#include <setjmp.h>
#endif
/* Set to 0 to disable all use of time.h / clock(). */
#ifndef GREATEST_USE_TIME
#define GREATEST_USE_TIME 1
#endif
#if GREATEST_USE_TIME
#include <time.h>
#endif
/* Floating point type, for ASSERT_IN_RANGE. */
#ifndef GREATEST_FLOAT
#define GREATEST_FLOAT double
#define GREATEST_FLOAT_FMT "%g"
#endif
/*********
* Types *
*********/
/* Info for the current running suite. */
typedef struct greatest_suite_info {
unsigned int tests_run;
unsigned int passed;
unsigned int failed;
unsigned int skipped;
#if GREATEST_USE_TIME
/* timers, pre/post running suite and individual tests */
clock_t pre_suite;
clock_t post_suite;
clock_t pre_test;
clock_t post_test;
#endif
} greatest_suite_info;
/* Type for a suite function. */
typedef void (greatest_suite_cb)(void);
/* Types for setup/teardown callbacks. If non-NULL, these will be run
* and passed the pointer to their additional data. */
typedef void (greatest_setup_cb)(void *udata);
typedef void (greatest_teardown_cb)(void *udata);
/* Type for an equality comparison between two pointers of the same type.
* Should return non-0 if equal, otherwise 0.
* UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */
typedef int greatest_equal_cb(const void *exp, const void *got, void *udata);
/* Type for a callback that prints a value pointed to by T.
* Return value has the same meaning as printf's.
* UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */
typedef int greatest_printf_cb(const void *t, void *udata);
/* Callbacks for an arbitrary type; needed for type-specific
* comparisons via GREATEST_ASSERT_EQUAL_T[m].*/
typedef struct greatest_type_info {
greatest_equal_cb *equal;
greatest_printf_cb *print;
} greatest_type_info;
/* Callbacks for string type. */
extern greatest_type_info greatest_type_info_string;
typedef enum {
GREATEST_FLAG_FIRST_FAIL = 0x01,
GREATEST_FLAG_LIST_ONLY = 0x02
} greatest_flag_t;
/* Struct containing all test runner state. */
typedef struct greatest_run_info {
unsigned char flags;
unsigned char verbosity;
unsigned int tests_run; /* total test count */
/* overall pass/fail/skip counts */
unsigned int passed;
unsigned int failed;
unsigned int skipped;
unsigned int assertions;
/* currently running test suite */
greatest_suite_info suite;
/* info to print about the most recent failure */
const char *fail_file;
unsigned int fail_line;
const char *msg;
/* current setup/teardown hooks and userdata */
greatest_setup_cb *setup;
void *setup_udata;
greatest_teardown_cb *teardown;
void *teardown_udata;
/* formatting info for ".....s...F"-style output */
unsigned int col;
unsigned int width;
/* only run a specific suite or test */
const char *suite_filter;
const char *test_filter;
#if GREATEST_USE_TIME
/* overall timers */
clock_t begin;
clock_t end;
#endif
#if GREATEST_USE_LONGJMP
jmp_buf jump_dest;
#endif
} greatest_run_info;
struct greatest_report_t {
/* overall pass/fail/skip counts */
unsigned int passed;
unsigned int failed;
unsigned int skipped;
unsigned int assertions;
};
/* Global var for the current testing context.
* Initialized by GREATEST_MAIN_DEFS(). */
extern greatest_run_info greatest_info;
/**********************
* Exported functions *
**********************/
/* These are used internally by greatest. */
void greatest_do_pass(const char *name);
void greatest_do_fail(const char *name);
void greatest_do_skip(const char *name);
int greatest_pre_test(const char *name);
void greatest_post_test(const char *name, int res);
void greatest_usage(const char *name);
int greatest_do_assert_equal_t(const void *exp, const void *got,
greatest_type_info *type_info, void *udata);
/* These are part of the public greatest API. */
void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata);
void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, void *udata);
int greatest_all_passed(void);
void greatest_set_test_filter(const char *name);
void greatest_set_suite_filter(const char *name);
void greatest_get_report(struct greatest_report_t *report);
unsigned int greatest_get_verbosity(void);
void greatest_set_verbosity(unsigned int verbosity);
void greatest_set_flag(greatest_flag_t flag);
/********************
* Language Support *
********************/
/* If __VA_ARGS__ (C99) is supported, allow parametric testing
* without needing to manually manage the argument struct. */
#if __STDC_VERSION__ >= 19901L || _MSC_VER >= 1800
#define GREATEST_VA_ARGS
#endif
/**********
* Macros *
**********/
/* Define a suite. */
#define GREATEST_SUITE(NAME) void NAME(void); void NAME(void)
/* Declare a suite, provided by another compilation unit. */
#define GREATEST_SUITE_EXTERN(NAME) void NAME(void)
/* Start defining a test function.
* The arguments are not included, to allow parametric testing. */
#define GREATEST_TEST static greatest_test_res
/* PASS/FAIL/SKIP result from a test. Used internally. */
typedef enum {
GREATEST_TEST_RES_PASS = 0,
GREATEST_TEST_RES_FAIL = -1,
GREATEST_TEST_RES_SKIP = 1
} greatest_test_res;
/* Run a suite. */
#define GREATEST_RUN_SUITE(S_NAME) greatest_run_suite(S_NAME, #S_NAME)
/* Run a test in the current suite. */
#define GREATEST_RUN_TEST(TEST) \
do { \
if (greatest_pre_test(#TEST) == 1) { \
greatest_test_res res = GREATEST_SAVE_CONTEXT(); \
if (res == GREATEST_TEST_RES_PASS) { \
res = TEST(); \
} \
greatest_post_test(#TEST, res); \
} else if (GREATEST_LIST_ONLY()) { \
fprintf(GREATEST_STDOUT, " %s\n", #TEST); \
} \
} while (0)
/* Run a test in the current suite with one void * argument,
* which can be a pointer to a struct with multiple arguments. */
#define GREATEST_RUN_TEST1(TEST, ENV) \
do { \
if (greatest_pre_test(#TEST) == 1) { \
int res = TEST(ENV); \
greatest_post_test(#TEST, res); \
} else if (GREATEST_LIST_ONLY()) { \
fprintf(GREATEST_STDOUT, " %s\n", #TEST); \
} \
} while (0)
#ifdef GREATEST_VA_ARGS
#define GREATEST_RUN_TESTp(TEST, ...) \
do { \
if (greatest_pre_test(#TEST) == 1) { \
int res = TEST(__VA_ARGS__); \
greatest_post_test(#TEST, res); \
} else if (GREATEST_LIST_ONLY()) { \
fprintf(GREATEST_STDOUT, " %s\n", #TEST); \
} \
} while (0)
#endif
/* Check if the test runner is in verbose mode. */
#define GREATEST_IS_VERBOSE() ((greatest_info.verbosity) > 0)
#define GREATEST_LIST_ONLY() \
(greatest_info.flags & GREATEST_FLAG_LIST_ONLY)
#define GREATEST_FIRST_FAIL() \
(greatest_info.flags & GREATEST_FLAG_FIRST_FAIL)
#define GREATEST_FAILURE_ABORT() \
(greatest_info.suite.failed > 0 && GREATEST_FIRST_FAIL())
/* Message-less forms of tests defined below. */
#define GREATEST_PASS() GREATEST_PASSm(NULL)
#define GREATEST_FAIL() GREATEST_FAILm(NULL)
#define GREATEST_SKIP() GREATEST_SKIPm(NULL)
#define GREATEST_ASSERT(COND) \
GREATEST_ASSERTm(#COND, COND)
#define GREATEST_ASSERT_OR_LONGJMP(COND) \
GREATEST_ASSERT_OR_LONGJMPm(#COND, COND)
#define GREATEST_ASSERT_FALSE(COND) \
GREATEST_ASSERT_FALSEm(#COND, COND)
#define GREATEST_ASSERT_EQ(EXP, GOT) \
GREATEST_ASSERT_EQm(#EXP " != " #GOT, EXP, GOT)
#define GREATEST_ASSERT_EQ_FMT(EXP, GOT, FMT) \
GREATEST_ASSERT_EQ_FMTm(#EXP " != " #GOT, EXP, GOT, FMT)
#define GREATEST_ASSERT_IN_RANGE(EXP, GOT, TOL) \
GREATEST_ASSERT_IN_RANGEm(#EXP " != " #GOT " +/- " #TOL, EXP, GOT, TOL)
#define GREATEST_ASSERT_EQUAL_T(EXP, GOT, TYPE_INFO, UDATA) \
GREATEST_ASSERT_EQUAL_Tm(#EXP " != " #GOT, EXP, GOT, TYPE_INFO, UDATA)
#define GREATEST_ASSERT_STR_EQ(EXP, GOT) \
GREATEST_ASSERT_STR_EQm(#EXP " != " #GOT, EXP, GOT)
/* The following forms take an additional message argument first,
* to be displayed by the test runner. */
/* Fail if a condition is not true, with message. */
#define GREATEST_ASSERTm(MSG, COND) \
do { \
greatest_info.assertions++; \
if (!(COND)) { GREATEST_FAILm(MSG); } \
} while (0)
/* Fail if a condition is not true, longjmping out of test. */
#define GREATEST_ASSERT_OR_LONGJMPm(MSG, COND) \
do { \
greatest_info.assertions++; \
if (!(COND)) { GREATEST_FAIL_WITH_LONGJMPm(MSG); } \
} while (0)
/* Fail if a condition is not false, with message. */
#define GREATEST_ASSERT_FALSEm(MSG, COND) \
do { \
greatest_info.assertions++; \
if ((COND)) { GREATEST_FAILm(MSG); } \
} while (0)
/* Fail if EXP != GOT (equality comparison by ==). */
#define GREATEST_ASSERT_EQm(MSG, EXP, GOT) \
do { \
greatest_info.assertions++; \
if ((EXP) != (GOT)) { GREATEST_FAILm(MSG); } \
} while (0)
/* Fail if EXP != GOT (equality comparison by ==). */
#define GREATEST_ASSERT_EQ_FMTm(MSG, EXP, GOT, FMT) \
do { \
const char *fmt = ( FMT ); \
greatest_info.assertions++; \
if ((EXP) != (GOT)) { \
fprintf(GREATEST_STDOUT, "\nExpected: "); \
fprintf(GREATEST_STDOUT, fmt, EXP); \
fprintf(GREATEST_STDOUT, "\nGot: "); \
fprintf(GREATEST_STDOUT, fmt, GOT); \
fprintf(GREATEST_STDOUT, "\n"); \
GREATEST_FAILm(MSG); \
} \
} while (0)
/* Fail if GOT not in range of EXP +|- TOL. */
#define GREATEST_ASSERT_IN_RANGEm(MSG, EXP, GOT, TOL) \
do { \
GREATEST_FLOAT exp = (EXP); \
GREATEST_FLOAT got = (GOT); \
GREATEST_FLOAT tol = (TOL); \
greatest_info.assertions++; \
if ((exp > got && exp - got > tol) || \
(exp < got && got - exp > tol)) { \
fprintf(GREATEST_STDOUT, \
"\nExpected: " GREATEST_FLOAT_FMT \
" +/- " GREATEST_FLOAT_FMT "\n" \
"Got: " GREATEST_FLOAT_FMT "\n", \
exp, tol, got); \
GREATEST_FAILm(MSG); \
} \
} while (0)
/* Fail if EXP is not equal to GOT, according to strcmp. */
#define GREATEST_ASSERT_STR_EQm(MSG, EXP, GOT) \
do { \
GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, \
&greatest_type_info_string, NULL); \
} while (0) \
/* Fail if EXP is not equal to GOT, according to a comparison
* callback in TYPE_INFO. If they are not equal, optionally use a
* print callback in TYPE_INFO to print them. */
#define GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, TYPE_INFO, UDATA) \
do { \
greatest_type_info *type_info = (TYPE_INFO); \
greatest_info.assertions++; \
if (!greatest_do_assert_equal_t(EXP, GOT, \
type_info, UDATA)) { \
if (type_info == NULL || type_info->equal == NULL) { \
GREATEST_FAILm("type_info->equal callback missing!"); \
} else { \
GREATEST_FAILm(MSG); \
} \
} \
} while (0) \
/* Pass. */
#define GREATEST_PASSm(MSG) \
do { \
greatest_info.msg = MSG; \
return GREATEST_TEST_RES_PASS; \
} while (0)
/* Fail. */
#define GREATEST_FAILm(MSG) \
do { \
greatest_info.fail_file = __FILE__; \
greatest_info.fail_line = __LINE__; \
greatest_info.msg = MSG; \
return GREATEST_TEST_RES_FAIL; \
} while (0)
/* Optional GREATEST_FAILm variant that longjmps. */
#if GREATEST_USE_LONGJMP
#define GREATEST_FAIL_WITH_LONGJMP() GREATEST_FAIL_WITH_LONGJMPm(NULL)
#define GREATEST_FAIL_WITH_LONGJMPm(MSG) \
do { \
greatest_info.fail_file = __FILE__; \
greatest_info.fail_line = __LINE__; \
greatest_info.msg = MSG; \
longjmp(greatest_info.jump_dest, GREATEST_TEST_RES_FAIL); \
} while (0)
#endif
/* Skip the current test. */
#define GREATEST_SKIPm(MSG) \
do { \
greatest_info.msg = MSG; \
return GREATEST_TEST_RES_SKIP; \
} while (0)
/* Check the result of a subfunction using ASSERT, etc. */
#define GREATEST_CHECK_CALL(RES) \
do { \
int _check_call_res = RES; \
if (_check_call_res != GREATEST_TEST_RES_PASS) { \
return _check_call_res; \
} \
} while (0) \
#if GREATEST_USE_TIME
#define GREATEST_SET_TIME(NAME) \
NAME = clock(); \
if (NAME == (clock_t) -1) { \
fprintf(GREATEST_STDOUT, \
"clock error: %s\n", #NAME); \
exit(EXIT_FAILURE); \
}
#define GREATEST_CLOCK_DIFF(C1, C2) \
fprintf(GREATEST_STDOUT, " (%lu ticks, %.3f sec)", \
(long unsigned int) (C2) - (long unsigned int)(C1), \
(double)((C2) - (C1)) / (1.0 * (double)CLOCKS_PER_SEC))
#else
#define GREATEST_SET_TIME(UNUSED)
#define GREATEST_CLOCK_DIFF(UNUSED1, UNUSED2)
#endif
#if GREATEST_USE_LONGJMP
#define GREATEST_SAVE_CONTEXT() \
/* setjmp returns 0 (GREATEST_TEST_RES_PASS) on first call */ \
/* so the test runs, then RES_FAIL from FAIL_WITH_LONGJMP. */ \
((greatest_test_res)(setjmp(greatest_info.jump_dest)))
#else
#define GREATEST_SAVE_CONTEXT() \
/*a no-op, since setjmp/longjmp aren't being used */ \
GREATEST_TEST_RES_PASS
#endif
/* Include several function definitions in the main test file. */
#define GREATEST_MAIN_DEFS() \
\
/* Is FILTER a subset of NAME? */ \
static int greatest_name_match(const char *name, \
const char *filter) { \
size_t offset = 0; \
size_t filter_len = strlen(filter); \
while (name[offset] != '\0') { \
if (name[offset] == filter[0]) { \
if (0 == strncmp(&name[offset], filter, filter_len)) { \
return 1; \
} \
} \
offset++; \
} \
\
return 0; \
} \
\
int greatest_pre_test(const char *name) { \
if (!GREATEST_LIST_ONLY() \
&& (!GREATEST_FIRST_FAIL() || greatest_info.suite.failed == 0) \
&& (greatest_info.test_filter == NULL || \
greatest_name_match(name, greatest_info.test_filter))) { \
GREATEST_SET_TIME(greatest_info.suite.pre_test); \
if (greatest_info.setup) { \
greatest_info.setup(greatest_info.setup_udata); \
} \
return 1; /* test should be run */ \
} else { \
return 0; /* skipped */ \
} \
} \
\
void greatest_post_test(const char *name, int res) { \
GREATEST_SET_TIME(greatest_info.suite.post_test); \
if (greatest_info.teardown) { \
void *udata = greatest_info.teardown_udata; \
greatest_info.teardown(udata); \
} \
\
if (res <= GREATEST_TEST_RES_FAIL) { \
greatest_do_fail(name); \
} else if (res >= GREATEST_TEST_RES_SKIP) { \
greatest_do_skip(name); \
} else if (res == GREATEST_TEST_RES_PASS) { \
greatest_do_pass(name); \
} \
greatest_info.suite.tests_run++; \
greatest_info.col++; \
if (GREATEST_IS_VERBOSE()) { \
GREATEST_CLOCK_DIFF(greatest_info.suite.pre_test, \
greatest_info.suite.post_test); \
fprintf(GREATEST_STDOUT, "\n"); \
} else if (greatest_info.col % greatest_info.width == 0) { \
fprintf(GREATEST_STDOUT, "\n"); \
greatest_info.col = 0; \
} \
if (GREATEST_STDOUT == stdout) fflush(stdout); \
} \
\
static void report_suite(void) { \
if (greatest_info.suite.tests_run > 0) { \
fprintf(GREATEST_STDOUT, \
"\n%u test%s - %u passed, %u failed, %u skipped", \
greatest_info.suite.tests_run, \
greatest_info.suite.tests_run == 1 ? "" : "s", \
greatest_info.suite.passed, \
greatest_info.suite.failed, \
greatest_info.suite.skipped); \
GREATEST_CLOCK_DIFF(greatest_info.suite.pre_suite, \
greatest_info.suite.post_suite); \
fprintf(GREATEST_STDOUT, "\n"); \
} \
} \
\
static void update_counts_and_reset_suite(void) { \
greatest_info.setup = NULL; \
greatest_info.setup_udata = NULL; \
greatest_info.teardown = NULL; \
greatest_info.teardown_udata = NULL; \
greatest_info.passed += greatest_info.suite.passed; \
greatest_info.failed += greatest_info.suite.failed; \
greatest_info.skipped += greatest_info.suite.skipped; \
greatest_info.tests_run += greatest_info.suite.tests_run; \
memset(&greatest_info.suite, 0, sizeof(greatest_info.suite)); \
greatest_info.col = 0; \
} \
\
static void greatest_run_suite(greatest_suite_cb *suite_cb, \
const char *suite_name) { \
if (greatest_info.suite_filter && \
!greatest_name_match(suite_name, greatest_info.suite_filter)) { \
return; \
} \
if (GREATEST_FIRST_FAIL() && greatest_info.failed > 0) { return; } \
if (greatest_info.suite.tests_run > 0) { /* tests w/out suite */ \
update_counts_and_reset_suite(); \
} \
fprintf(GREATEST_STDOUT, "\n* Suite %s:\n", suite_name); \
GREATEST_SET_TIME(greatest_info.suite.pre_suite); \
suite_cb(); \
GREATEST_SET_TIME(greatest_info.suite.post_suite); \
report_suite(); \
} \
\
void greatest_do_pass(const char *name) { \
if (GREATEST_IS_VERBOSE()) { \
fprintf(GREATEST_STDOUT, "PASS %s: %s", \
name, greatest_info.msg ? greatest_info.msg : ""); \
} else { \
fprintf(GREATEST_STDOUT, "."); \
} \
greatest_info.suite.passed++; \
} \
\
void greatest_do_fail(const char *name) { \
if (GREATEST_IS_VERBOSE()) { \
fprintf(GREATEST_STDOUT, \
"FAIL %s: %s (%s:%u)", \
name, greatest_info.msg ? greatest_info.msg : "", \
greatest_info.fail_file, greatest_info.fail_line); \
} else { \
fprintf(GREATEST_STDOUT, "F"); \
greatest_info.col++; \
/* add linebreak if in line of '.'s */ \
if (greatest_info.col != 0) { \
fprintf(GREATEST_STDOUT, "\n"); \
greatest_info.col = 0; \
} \
fprintf(GREATEST_STDOUT, "FAIL %s: %s (%s:%u)\n", \
name, \
greatest_info.msg ? greatest_info.msg : "", \
greatest_info.fail_file, greatest_info.fail_line); \
} \
greatest_info.suite.failed++; \
} \
\
void greatest_do_skip(const char *name) { \
if (GREATEST_IS_VERBOSE()) { \
fprintf(GREATEST_STDOUT, "SKIP %s: %s", \
name, \
greatest_info.msg ? \
greatest_info.msg : "" ); \
} else { \
fprintf(GREATEST_STDOUT, "s"); \
} \
greatest_info.suite.skipped++; \
} \
\
int greatest_do_assert_equal_t(const void *exp, const void *got, \
greatest_type_info *type_info, void *udata) { \
int eq = 0; \
if (type_info == NULL || type_info->equal == NULL) { \
return 0; \
} \
eq = type_info->equal(exp, got, udata); \
if (!eq) { \
if (type_info->print != NULL) { \
fprintf(GREATEST_STDOUT, "\nExpected: "); \
(void)type_info->print(exp, udata); \
fprintf(GREATEST_STDOUT, "\nGot: "); \
(void)type_info->print(got, udata); \
fprintf(GREATEST_STDOUT, "\n"); \
} else { \
fprintf(GREATEST_STDOUT, \
"GREATEST_ASSERT_EQUAL_T failure at %s:%u\n", \
greatest_info.fail_file, \
greatest_info.fail_line); \
} \
} \
return eq; \
} \
\
void greatest_usage(const char *name) { \
fprintf(GREATEST_STDOUT, \
"Usage: %s [-hlfv] [-s SUITE] [-t TEST]\n" \
" -h print this Help\n" \
" -l List suites and their tests, then exit\n" \
" -f Stop runner after first failure\n" \
" -v Verbose output\n" \
" -s SUITE only run suites containing string SUITE\n" \
" -t TEST only run tests containing string TEST\n", \
name); \
} \
\
static void greatest_parse_args(int argc, char **argv) { \
int i = 0; \
for (i = 1; i < argc; i++) { \
if (0 == strncmp("-t", argv[i], 2)) { \
if (argc <= i + 1) { \
greatest_usage(argv[0]); \
exit(EXIT_FAILURE); \
} \
greatest_info.test_filter = argv[i+1]; \
i++; \
} else if (0 == strncmp("-s", argv[i], 2)) { \
if (argc <= i + 1) { \
greatest_usage(argv[0]); \
exit(EXIT_FAILURE); \
} \
greatest_info.suite_filter = argv[i+1]; \
i++; \
} else if (0 == strncmp("-f", argv[i], 2)) { \
greatest_info.flags |= GREATEST_FLAG_FIRST_FAIL; \
} else if (0 == strncmp("-v", argv[i], 2)) { \
greatest_info.verbosity++; \
} else if (0 == strncmp("-l", argv[i], 2)) { \
greatest_info.flags |= GREATEST_FLAG_LIST_ONLY; \
} else if (0 == strncmp("-h", argv[i], 2)) { \
greatest_usage(argv[0]); \
exit(EXIT_SUCCESS); \
} else if (0 == strncmp("--", argv[i], 2)) { \
break; \
} else { \
fprintf(GREATEST_STDOUT, \
"Unknown argument '%s'\n", argv[i]); \
greatest_usage(argv[0]); \
exit(EXIT_FAILURE); \
} \
} \
} \
\
int greatest_all_passed(void) { return (greatest_info.failed == 0); } \
\
void greatest_set_test_filter(const char *name) { \
greatest_info.test_filter = name; \
} \
\
void greatest_set_suite_filter(const char *name) { \
greatest_info.suite_filter = name; \
} \
\
void greatest_get_report(struct greatest_report_t *report) { \
if (report) { \
report->passed = greatest_info.passed; \
report->failed = greatest_info.failed; \
report->skipped = greatest_info.skipped; \
report->assertions = greatest_info.assertions; \
} \
} \
\
unsigned int greatest_get_verbosity(void) { \
return greatest_info.verbosity; \
} \
\
void greatest_set_verbosity(unsigned int verbosity) { \
greatest_info.verbosity = (unsigned char)verbosity; \
} \
\
void greatest_set_flag(greatest_flag_t flag) { \
greatest_info.flags |= flag; \
} \
\
void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata) { \
greatest_info.setup = cb; \
greatest_info.setup_udata = udata; \
} \
\
void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, \
void *udata) { \
greatest_info.teardown = cb; \
greatest_info.teardown_udata = udata; \
} \
\
static int greatest_string_equal_cb(const void *exp, const void *got, \
void *udata) { \
(void)udata; \
return (0 == strcmp((const char *)exp, (const char *)got)); \
} \
\
static int greatest_string_printf_cb(const void *t, void *udata) { \
(void)udata; \
return fprintf(GREATEST_STDOUT, "%s", (const char *)t); \
} \
\
greatest_type_info greatest_type_info_string = { \
greatest_string_equal_cb, \
greatest_string_printf_cb, \
}; \
\
greatest_run_info greatest_info
/* Init internals. */
#define GREATEST_INIT() \
do { \
/* Suppress unused function warning if features aren't used */ \
(void)greatest_run_suite; \
(void)greatest_parse_args; \
\
memset(&greatest_info, 0, sizeof(greatest_info)); \
greatest_info.width = GREATEST_DEFAULT_WIDTH; \
GREATEST_SET_TIME(greatest_info.begin); \
} while (0) \
/* Handle command-line arguments, etc. */
#define GREATEST_MAIN_BEGIN() \
do { \
GREATEST_INIT(); \
greatest_parse_args(argc, argv); \
} while (0)
/* Report passes, failures, skipped tests, the number of
* assertions, and the overall run time. */
#define GREATEST_PRINT_REPORT() \
do { \
if (!GREATEST_LIST_ONLY()) { \
update_counts_and_reset_suite(); \
GREATEST_SET_TIME(greatest_info.end); \
fprintf(GREATEST_STDOUT, \
"\nTotal: %u test%s", \
greatest_info.tests_run, \
greatest_info.tests_run == 1 ? "" : "s"); \
GREATEST_CLOCK_DIFF(greatest_info.begin, \
greatest_info.end); \
fprintf(GREATEST_STDOUT, ", %u assertion%s\n", \
greatest_info.assertions, \
greatest_info.assertions == 1 ? "" : "s"); \
fprintf(GREATEST_STDOUT, \
"Pass: %u, fail: %u, skip: %u.\n", \
greatest_info.passed, \
greatest_info.failed, greatest_info.skipped); \
} \
} while (0)
/* Report results, exit with exit status based on results. */
#define GREATEST_MAIN_END() \
do { \
GREATEST_PRINT_REPORT(); \
return (greatest_all_passed() ? EXIT_SUCCESS : EXIT_FAILURE); \
} while (0)
/* Make abbreviations without the GREATEST_ prefix for the
* most commonly used symbols. */
#if GREATEST_USE_ABBREVS
#define TEST GREATEST_TEST
#define SUITE GREATEST_SUITE
#define SUITE_EXTERN GREATEST_SUITE_EXTERN
#define RUN_TEST GREATEST_RUN_TEST
#define RUN_TEST1 GREATEST_RUN_TEST1
#define RUN_SUITE GREATEST_RUN_SUITE
#define ASSERT GREATEST_ASSERT
#define ASSERTm GREATEST_ASSERTm
#define ASSERT_FALSE GREATEST_ASSERT_FALSE
#define ASSERT_EQ GREATEST_ASSERT_EQ
#define ASSERT_EQ_FMT GREATEST_ASSERT_EQ_FMT
#define ASSERT_IN_RANGE GREATEST_ASSERT_IN_RANGE
#define ASSERT_EQUAL_T GREATEST_ASSERT_EQUAL_T
#define ASSERT_STR_EQ GREATEST_ASSERT_STR_EQ
#define ASSERT_FALSEm GREATEST_ASSERT_FALSEm
#define ASSERT_EQm GREATEST_ASSERT_EQm
#define ASSERT_EQ_FMTm GREATEST_ASSERT_EQ_FMTm
#define ASSERT_IN_RANGEm GREATEST_ASSERT_IN_RANGEm
#define ASSERT_EQUAL_Tm GREATEST_ASSERT_EQUAL_Tm
#define ASSERT_STR_EQm GREATEST_ASSERT_STR_EQm
#define PASS GREATEST_PASS
#define FAIL GREATEST_FAIL
#define SKIP GREATEST_SKIP
#define PASSm GREATEST_PASSm
#define FAILm GREATEST_FAILm
#define SKIPm GREATEST_SKIPm
#define SET_SETUP GREATEST_SET_SETUP_CB
#define SET_TEARDOWN GREATEST_SET_TEARDOWN_CB
#define CHECK_CALL GREATEST_CHECK_CALL
#ifdef GREATEST_VA_ARGS
#define RUN_TESTp GREATEST_RUN_TESTp
#endif
#if GREATEST_USE_LONGJMP
#define ASSERT_OR_LONGJMP GREATEST_ASSERT_OR_LONGJMP
#define ASSERT_OR_LONGJMPm GREATEST_ASSERT_OR_LONGJMPm
#define FAIL_WITH_LONGJMP GREATEST_FAIL_WITH_LONGJMP
#define FAIL_WITH_LONGJMPm GREATEST_FAIL_WITH_LONGJMPm
#endif
#endif /* USE_ABBREVS */
#endif

25
test/test.c Normal file
View File

@@ -0,0 +1,25 @@
#include "greatest.h"
SUITE_EXTERN(libpostal_expansion_tests);
SUITE_EXTERN(libpostal_parser_tests);
SUITE_EXTERN(libpostal_transliteration_tests);
SUITE_EXTERN(libpostal_numex_tests);
SUITE_EXTERN(libpostal_string_utils_tests);
SUITE_EXTERN(libpostal_trie_tests);
SUITE_EXTERN(libpostal_crf_context_tests);
GREATEST_MAIN_DEFS();
int main(int argc, char **argv) {
GREATEST_MAIN_BEGIN();
RUN_SUITE(libpostal_expansion_tests);
RUN_SUITE(libpostal_parser_tests);
RUN_SUITE(libpostal_transliteration_tests);
RUN_SUITE(libpostal_numex_tests);
RUN_SUITE(libpostal_string_utils_tests);
RUN_SUITE(libpostal_trie_tests);
RUN_SUITE(libpostal_crf_context_tests);
GREATEST_MAIN_END();
}

268
test/test_crf_context.c Normal file
View File

@@ -0,0 +1,268 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include "greatest.h"
#include "../src/float_utils.c"
#include "../src/crf_context.c"
SUITE(libpostal_crf_context_tests);
static greatest_test_res check_values(double cv, double tv) {
ASSERT_IN_RANGE(cv, tv, 1e-9);
PASS();
}
static greatest_test_res check_matrix_size(double_matrix_t *x, size_t m, size_t n) {
ASSERT(x);
ASSERT_EQ(x->m, m);
ASSERT_EQ(x->n, n);
PASS();
}
TEST test_crf_context(void) {
int y1, y2, y3;
double norm = 0;
const size_t L = 3;
const size_t T = 3;
crf_context_t *ctx = crf_context_new(CRF_CONTEXT_ALL, L, 1);
ASSERT(ctx != NULL);
const size_t T_large = 100;
bool ret = crf_context_set_num_items(ctx, T_large);
ASSERT(ret);
check_matrix_size(ctx->state, T_large, L);
check_matrix_size(ctx->exp_state, T_large, L);
check_matrix_size(ctx->state_trans, T_large, L * L);
check_matrix_size(ctx->exp_state_trans, T_large, L * L);
check_matrix_size(ctx->trans, L, L);
check_matrix_size(ctx->exp_trans, L, L);
ret = crf_context_set_num_items(ctx, T);
ASSERT(ret);
check_matrix_size(ctx->state, T, L);
check_matrix_size(ctx->exp_state, T, L);
check_matrix_size(ctx->state_trans, T, L * L);
check_matrix_size(ctx->exp_state_trans, T, L * L);
check_matrix_size(ctx->trans, L, L);
check_matrix_size(ctx->exp_trans, L, L);
double *state_trans = NULL;
double *state = NULL;
double *trans = NULL;
double scores[T][L][L];
uint32_t labels[L];
/* Initialize the state scores. */
state = state_score(ctx, 0);
state[0] = .4; state[1] = .5; state[2] = .1;
state = state_score(ctx, 1);
state[0] = .4; state[1] = .1; state[2] = .5;
state = state_score(ctx, 2);
state[0] = .4; state[1] = .1; state[2] = .5;
printf("state\n");
/* Initialize the state scores. */
state_trans = state_trans_score(ctx, 0, 0);
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .5;
state_trans = state_trans_score(ctx, 0, 1);
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .5;
state_trans = state_trans_score(ctx, 0, 2);
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .5;
state_trans = state_trans_score(ctx, 1, 0);
state_trans[0] = .3; state_trans[1] = .1; state_trans[2] = .6;
state_trans = state_trans_score(ctx, 1, 1);
state_trans[0] = .5; state_trans[1] = .1; state_trans[2] = .3;
state_trans = state_trans_score(ctx, 1, 2);
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .4;
state_trans = state_trans_score(ctx, 2, 0);
state_trans[0] = .3; state_trans[1] = .1; state_trans[2] = .6;
state_trans = state_trans_score(ctx, 2, 1);
state_trans[0] = .5; state_trans[1] = .1; state_trans[2] = .3;
state_trans = state_trans_score(ctx, 2, 2);
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .4;
printf("state_trans\n");
trans = trans_score(ctx, 0);
trans[0] = .3; trans[1] = .1; trans[2] = .4;
trans = trans_score(ctx, 1);
trans[0] = .6; trans[1] = .2; trans[2] = .1;
trans = trans_score(ctx, 2);
trans[0] = .5; trans[1] = .2; trans[2] = .1;
printf("trans\n");
crf_context_exp_state(ctx);
printf("exp state\n");
crf_context_exp_state_trans(ctx);
printf("exp state_trans\n");
crf_context_exp_trans(ctx);
printf("exp trans\n");
crf_context_alpha_score(ctx);
printf("alpha\n");
crf_context_beta_score(ctx);
printf("beta\n");
/* Compute the score of every label sequence. */
for (y1 = 0; y1 < T; y1++) {
double s1 = exp_state_score(ctx, 0)[y1];
for (y2 = 0; y2 < L; y2++) {
double s2 = s1;
s2 *= exp_state_trans_score(ctx, 1, y1)[y2];
s2 *= exp_trans_score(ctx, y1)[y2];
s2 *= exp_state_score(ctx, 1)[y2];
for (y3 = 0; y3 < L; y3++) {
double s3 = s2;
s3 *= exp_state_trans_score(ctx, 2, y2)[y3];
s3 *= exp_trans_score(ctx, y2)[y3];
s3 *= exp_state_score(ctx, 2)[y3];
scores[y1][y2][y3] = s3;
}
}
}
/* Compute the partition factor. */
norm = 0.;
for (y1 = 0; y1 < T; y1++) {
for (y2 = 0; y2 < L; y2++) {
for (y3 = 0; y3 < L; y3++) {
norm += scores[y1][y2][y3];
}
}
}
/* Check the partition factor. */
printf("Check for the partition factor...\n");
CHECK_CALL(check_values(exp(ctx->log_norm), norm));
/* Compute the sequence probabilities. */
for (y1 = 0; y1 < T; y1++) {
for (y2 = 0; y2 < L; y2++) {
for (y3 = 0; y3 < L; y3++) {
double logp;
labels[0] = y1;
labels[1] = y2;
labels[2] = y3;
logp = crf_context_score(ctx, labels) - crf_context_lognorm(ctx);
printf("Check for the sequence %d-%d-%d...\n", y1, y2, y3);
CHECK_CALL(check_values(exp(logp), scores[y1][y2][y3] / norm));
}
}
}
/* Compute the marginal probability at t=0 */
for (y1 = 0; y1 < T; y1++) {
double a, b, c, s = 0.;
for (y2 = 0; y2 < L; y2++) {
for (y3 = 0; y3 < L; y3++) {
s += scores[y1][y2][y3];
}
}
a = alpha_score(ctx, 0)[y1];
b = beta_score(ctx, 0)[y1];
c = 1. / ctx->scale_factor->a[0];
printf("Check for the marginal probability (0,%d)...\n", y1);
CHECK_CALL(check_values(a * b * c, s / norm));
}
/* Compute the marginal probability at t=1 */
for (y2 = 0; y2 < L; y2++) {
double a, b, c, s = 0.;
for (y1 = 0; y1 < T; y1++) {
for (y3 = 0; y3 < L; y3++) {
s += scores[y1][y2][y3];
}
}
a = alpha_score(ctx, 1)[y2];
b = beta_score(ctx, 1)[y2];
c = 1. / ctx->scale_factor->a[1];
printf("Check for the marginal probability (1,%d)...\n", y2);
CHECK_CALL(check_values(a * b * c, s / norm));
}
/* Compute the marginal probability at t=2 */
for (y3 = 0; y3 < L; y3++) {
double a, b, c, s = 0.;
for (y1 = 0; y1 < T; y1++) {
for (y2 = 0; y2 < L; y2++) {
s += scores[y1][y2][y3];
}
}
a = alpha_score(ctx, 2)[y3];
b = beta_score(ctx, 2)[y3];
c = 1. / ctx->scale_factor->a[2];
printf("Check for the marginal probability (2,%d)...\n", y3);
CHECK_CALL(check_values(a * b * c, s / norm));
}
/* Compute the marginal probabilities of transitions. */
for (y1 = 0; y1 < T; y1++) {
for (y2 = 0; y2 < L; y2++) {
double a, b, s, st, t, p = 0.;
for (y3 = 0; y3 < L; y3++) {
p += scores[y1][y2][y3];
}
a = alpha_score(ctx, 0)[y1];
b = beta_score(ctx, 1)[y2];
s = exp_state_score(ctx, 1)[y2];
st = exp_state_trans_score(ctx, 1, y1)[y2];
t = exp_trans_score(ctx, y1)[y2];
printf("Check for the marginal probability (0,%d)-(1,%d)...\n", y1, y2);
CHECK_CALL(check_values(a * t * st * s * b, p / norm));
}
}
for (y2 = 0; y2 < L; y2++) {
for (y3 = 0; y3 < L; y3++) {
double a, b, s, st, t, p = 0.;
for (y1 = 0; y1 < T; y1++) {
p += scores[y1][y2][y3];
}
a = alpha_score(ctx, 1)[y2];
b = beta_score(ctx, 2)[y3];
s = exp_state_score(ctx, 2)[y3];
st = exp_state_trans_score(ctx, 2, y2)[y3];
t = exp_trans_score(ctx, y2)[y3];
printf("Check for the marginal probability (1,%d)-(2,%d)...\n", y2, y3);
CHECK_CALL(check_values(a * t * st * s * b, p / norm));
}
}
double viterbi = crf_context_viterbi(ctx, labels);
printf("viterbi score=%f\n", viterbi);
for (int i = 0; i < L; i++) {
printf("label[%d]=%d\n", i, labels[i]);
}
crf_context_destroy(ctx);
PASS();
}
SUITE(libpostal_crf_context_tests) {
RUN_TEST(test_crf_context);
}

339
test/test_expand.c Normal file
View File

@@ -0,0 +1,339 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include "greatest.h"
#include "../src/string_utils.h"
#include "../src/libpostal.h"
SUITE(libpostal_expansion_tests);
static greatest_test_res test_expansion_contains_phrase_option(char *input, char *output, libpostal_normalize_options_t options, bool root) {
size_t num_expansions;
char **expansions = NULL;
if (!root) {
expansions = libpostal_expand_address(input, options, &num_expansions);
} else {
expansions = libpostal_expand_address_root(input, options, &num_expansions);
}
bool contains_expansion = false;
char *expansion;
for (size_t i = 0; i < num_expansions; i++) {
expansion = expansions[i];
printf("expansion = %s\n", expansion);
if (string_equals(output, expansion)) {
contains_expansion = true;
break;
}
}
if (!contains_expansion) {
printf("Expansions should contain %s, got {", output);
for (size_t i = 0; i < num_expansions; i++) {
expansion = expansions[i];
printf("%s%s", expansion, i < num_expansions - 1 ? "," : "");
}
printf("}\n");
FAIL();
}
libpostal_expansion_array_destroy(expansions, num_expansions);
PASS();
}
static greatest_test_res test_expansion_contains(char *input, char *output, libpostal_normalize_options_t options) {
bool root = false;
CHECK_CALL(test_expansion_contains_phrase_option(input, output, options, root));
PASS();
}
static greatest_test_res test_root_expansion_contains(char *input, char *output, libpostal_normalize_options_t options) {
bool root = true;
CHECK_CALL(test_expansion_contains_phrase_option(input, output, options, root));
PASS();
}
static greatest_test_res test_expansion_contains_phrase_option_with_languages(char *input, char *output, libpostal_normalize_options_t options, bool root, size_t num_languages, va_list args) {
char **languages = NULL;
size_t i;
if (num_languages > 0) {
languages = malloc(sizeof(char *) * num_languages);
char *lang;
for (i = 0; i < num_languages; i++) {
lang = va_arg(args, char *);
ASSERT(strlen(lang) < LIBPOSTAL_MAX_LANGUAGE_LEN);
languages[i] = strdup(lang);
}
options.num_languages = num_languages;
options.languages = (char **)languages;
} else {
options.languages = NULL;
options.num_languages = 0;
}
CHECK_CALL(test_expansion_contains_phrase_option(input, output, options, root));
if (languages != NULL) {
for (i = 0; i < num_languages; i++) {
free(languages[i]);
}
free(languages);
}
PASS();
}
static greatest_test_res test_expansion_contains_with_languages(char *input, char *output, libpostal_normalize_options_t options, size_t num_languages, ...) {
bool root = false;
va_list args;
if (num_languages > 0) {
va_start(args, num_languages);
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
va_end(args);
} else {
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
}
PASS();
}
static greatest_test_res test_root_expansion_contains_with_languages(char *input, char *output, libpostal_normalize_options_t options, size_t num_languages, ...) {
bool root = true;
va_list args;
if (num_languages > 0) {
va_start(args, num_languages);
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
va_end(args);
} else {
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
}
PASS();
}
TEST test_expansions(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St", "120 east 96 street", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("120 E Ninety-sixth St", "120 east 96 street", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("4998 Vanderbilt Dr, Columbus, OH 43213", "4998 vanderbilt drive columbus ohio 43213", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("Nineteen oh one W El Segundo Blvd", "1901 west el segundo boulevard", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
CHECK_CALL(test_expansion_contains_with_languages("Quatre vingt douze Ave des Champs-Élysées", "92 avenue des champs-elysees", options, 1, "fr"));
CHECK_CALL(test_expansion_contains_with_languages("Quatre vingt douze Ave des Champs-Élysées", "92 avenue des champs elysees", options, 1, "fr"));
CHECK_CALL(test_expansion_contains_with_languages("Quatre vingt douze Ave des Champs-Élysées", "92 avenue des champselysees", options, 1, "fr"));
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th"));
PASS();
}
TEST test_expansion_for_non_address_input(void) {
size_t num_expansions;
// This is tested as the input caused a segfault in expand_alternative_phrase_option
char **expansions = libpostal_expand_address("ida-b@wells.co", libpostal_get_default_options(), &num_expansions);
libpostal_expansion_array_destroy(expansions, num_expansions);
PASS();
}
TEST test_street_root_expansions(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
options.address_components = LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_ANY;
// English - normal cases
CHECK_CALL(test_root_expansion_contains("Malcolm X Blvd", "malcolm x", options));
CHECK_CALL(test_root_expansion_contains("E 106th St", "106", options));
CHECK_CALL(test_root_expansion_contains("S Park Ave", "park", options));
CHECK_CALL(test_root_expansion_contains("Park South", "park", options));
CHECK_CALL(test_root_expansion_contains("Rev Dr. MLK Dr S", "martin luther king junior", options));
CHECK_CALL(test_root_expansion_contains("Rev Dr. Martin Luther King Jr Dr S", "martin luther king junior", options));
CHECK_CALL(test_root_expansion_contains("East 6th Street", "6th", options));
// English - edge cases
CHECK_CALL(test_root_expansion_contains("Avenue B", "b", options));
CHECK_CALL(test_root_expansion_contains("Avenue C", "c", options));
CHECK_CALL(test_root_expansion_contains("Avenue D", "d", options));
CHECK_CALL(test_root_expansion_contains("Avenue E", "e", options));
CHECK_CALL(test_root_expansion_contains("Avenue N", "n", options));
CHECK_CALL(test_root_expansion_contains("U St SE", "u", options));
CHECK_CALL(test_root_expansion_contains("S Park", "park", options));
CHECK_CALL(test_root_expansion_contains("Park S", "park", options));
CHECK_CALL(test_root_expansion_contains("Avenue Rd", "avenue", options));
CHECK_CALL(test_root_expansion_contains("Broadway", "broadway", options));
CHECK_CALL(test_root_expansion_contains("E Broadway", "broadway", options));
CHECK_CALL(test_root_expansion_contains("E Center St", "center", options));
CHECK_CALL(test_root_expansion_contains("E Ctr St", "center", options));
CHECK_CALL(test_root_expansion_contains("E Center Street", "center", options));
CHECK_CALL(test_root_expansion_contains("E Ctr Street", "center", options));
CHECK_CALL(test_root_expansion_contains("Center St E", "center", options));
CHECK_CALL(test_root_expansion_contains("Ctr St E", "center", options));
CHECK_CALL(test_root_expansion_contains("Center Street E", "center", options));
CHECK_CALL(test_root_expansion_contains("Ctr Street E", "center", options));
CHECK_CALL(test_root_expansion_contains_with_languages("W. UNION STREET", "union", options, 2, "en", "es"));
// Spanish
CHECK_CALL(test_root_expansion_contains("C/ Ocho", "8", options));
PASS();
}
TEST test_house_number_root_expansions(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
options.address_components = LIBPOSTAL_ADDRESS_HOUSE_NUMBER | LIBPOSTAL_ADDRESS_ANY;
// English - normal cases
CHECK_CALL(test_root_expansion_contains("1A", "1 a", options));
CHECK_CALL(test_root_expansion_contains("A1", "a 1", options));
CHECK_CALL(test_root_expansion_contains("1", "1", options));
CHECK_CALL(test_root_expansion_contains_with_languages("# 1", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("No. 1", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("House No. 1", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("House #1", "1", options, 1, "en"));
PASS();
}
TEST test_level_root_expansions(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
options.address_components = LIBPOSTAL_ADDRESS_LEVEL | LIBPOSTAL_ADDRESS_ANY;
// English - normal cases
CHECK_CALL(test_root_expansion_contains_with_languages("1st Fl", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1st Floor", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("First Fl", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("First Floor", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("2nd Fl", "2", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("2nd Floor", "2", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Second Fl", "2", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Second Floor", "2", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Fl #1", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Fl No. 1", "1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Floor No. 1", "1", options, 1, "en"));
// Specifiers
CHECK_CALL(test_root_expansion_contains_with_languages("SB 1", "sub basement 1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Bsmt", "basement", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Bsmt 1", "basement 1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1G", "1 ground", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("G", "ground", options, 1, "en"));
PASS();
}
TEST test_unit_root_expansions(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
options.address_components = LIBPOSTAL_ADDRESS_UNIT | LIBPOSTAL_ADDRESS_ANY;
// English - normal cases
CHECK_CALL(test_root_expansion_contains_with_languages("1A", "1 a", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("A1", "a 1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Apt 101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Apt No 101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Apt #101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Apartment 101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Apartment #101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Ste 101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Ste No 101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Ste #101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Suite 101", "101", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Suite #101", "101", options, 1, "en"));
// Specifiers
CHECK_CALL(test_root_expansion_contains_with_languages("PH 1", "penthouse 1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("PH1", "penthouse 1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("Penthouse 1", "penthouse 1", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1L", "1l", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1L", "1 left", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1F", "1f", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1F", "1f", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1R", "1r", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("1R", "1r", options, 1, "en"));
PASS();
}
TEST test_po_box_root_expansions(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
options.address_components = LIBPOSTAL_ADDRESS_PO_BOX | LIBPOSTAL_ADDRESS_ANY;
CHECK_CALL(test_root_expansion_contains_with_languages("PO Box 1234", "1234", options, 1, "en"));
CHECK_CALL(test_root_expansion_contains_with_languages("PO Box #1234", "1234", options, 1, "en"));
PASS();
}
TEST test_expansions_language_classifier(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
CHECK_CALL(test_expansion_contains_with_languages("V XX Sett", "via 20 settembre", options, 0, NULL));
CHECK_CALL(test_expansion_contains_with_languages("C/ Ocho", "calle 8", options, 0, NULL));
PASS();
}
TEST test_expansions_no_options(void) {
libpostal_normalize_options_t options = libpostal_get_default_options();
options.lowercase = false;
options.latin_ascii = false;
options.transliterate = false;
options.strip_accents = false;
options.decompose = false;
options.trim_string = false;
options.drop_parentheticals = false;
options.replace_numeric_hyphens = false;
options.delete_numeric_hyphens = false;
options.split_alpha_from_numeric = false;
options.replace_word_hyphens = false;
options.delete_word_hyphens = false;
options.delete_final_periods = false;
options.delete_acronym_periods = false;
options.drop_english_possessives = false;
options.delete_apostrophes = false;
options.expand_numex = false;
options.roman_numerals = false;
CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St New York", "120 E 96th St New York", options, 0, NULL));
PASS();
}
SUITE(libpostal_expansion_tests) {
if (!libpostal_setup() || !libpostal_setup_language_classifier()) {
printf("Could not setup libpostal\n");
exit(EXIT_FAILURE);
}
RUN_TEST(test_expansions);
RUN_TEST(test_street_root_expansions);
RUN_TEST(test_house_number_root_expansions);
RUN_TEST(test_level_root_expansions);
RUN_TEST(test_unit_root_expansions);
RUN_TEST(test_po_box_root_expansions);
RUN_TEST(test_expansions_language_classifier);
RUN_TEST(test_expansions_no_options);
RUN_TEST(test_expansion_for_non_address_input);
libpostal_teardown();
libpostal_teardown_language_classifier();
}

87
test/test_numex.c Normal file
View File

@@ -0,0 +1,87 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include "greatest.h"
#include "../src/numex.h"
SUITE(libpostal_numex_tests);
static greatest_test_res test_numex(char *input, char *output, char *lang) {
char *normalized = replace_numeric_expressions(input, lang);
if (normalized != NULL) {
ASSERT_STR_EQ(output, normalized);
free(normalized);
} else {
ASSERT_STR_EQ(output, input);
}
PASS();
}
TEST test_numeric_expressions(void) {
// English numbers
CHECK_CALL(test_numex("five hundred ninety-three", "593", "en"));
CHECK_CALL(test_numex("five hundred and ninety-three", "593", "en"));
CHECK_CALL(test_numex("fourth and a", "4th and a", "en"));
CHECK_CALL(test_numex("foo and bar", "foo and bar", "en"));
CHECK_CALL(test_numex("thirty west twenty-sixth street", "30 west 26th street", "en"));
CHECK_CALL(test_numex("five and sixth", "5 and 6th", "en"));
CHECK_CALL(test_numex("three hundred thousand nineteenhundred and forty-fifth", "301945th", "en"));
CHECK_CALL(test_numex("seventeen eighty", "1780", "en"));
CHECK_CALL(test_numex("ten oh four", "1004", "en"));
CHECK_CALL(test_numex("ten and four", "10 and 4", "en"));
// French (Celtic-style) numbers
CHECK_CALL(test_numex("quatre-vingt-douze", "92", "fr"));
CHECK_CALL(test_numex("quatre vingt douze", "92", "fr"));
CHECK_CALL(test_numex("quatre vingts", "80", "fr"));
CHECK_CALL(test_numex("soixante-et-onze", "71", "fr"));
CHECK_CALL(test_numex("soixante-cinq", "65", "fr"));
// French (Belgian/Swiss) numbers
CHECK_CALL(test_numex("nonante-deux", "92", "fr"));
CHECK_CALL(test_numex("septante-cinq", "75", "fr"));
// German numbers
CHECK_CALL(test_numex("sechs-und-fünfzig", "56", "de"));
CHECK_CALL(test_numex("eins", "1", "de"));
CHECK_CALL(test_numex("dreiundzwanzigste strasse", "23. strasse", "de"));
// Italian numbers
CHECK_CALL(test_numex("millenovecentonovantadue", "1992", "it"));
CHECK_CALL(test_numex("ventiquattro", "24", "it"));
// Spanish numbers
CHECK_CALL(test_numex("tricentesima primera", "301.ª", "es"));
// Roman numerals (la=Latin)
CHECK_CALL(test_numex("via xx settembre", "via 20 settembre", "la"));
CHECK_CALL(test_numex("mcccxlix anno domini", "1349 anno domini", "la"));
CHECK_CALL(test_numex("str. st. nazionale dei giovi, milano", "str. st. nazionale dei giovi, milano", "la"));
// Japanese numbers
CHECK_CALL(test_numex("百二十", "120", "ja"));
// Korean numbers
CHECK_CALL(test_numex("천구백구십이", "1992", "ko"));
PASS();
}
GREATEST_SUITE(libpostal_numex_tests) {
if (!numex_module_setup(DEFAULT_NUMEX_PATH)) {
printf("Could not load numex module\n");
exit(EXIT_FAILURE);
}
RUN_TEST(test_numeric_expressions);
numex_module_teardown();
}

1913
test/test_parser.c Normal file

File diff suppressed because it is too large Load Diff

342
test/test_string_utils.c Normal file
View File

@@ -0,0 +1,342 @@
#include <stdio.h>
#include "greatest.h"
#include "../src/features.h"
#include "../src/scanner.h"
#include "../src/string_utils.h"
SUITE(libpostal_string_utils_tests);
TEST test_utf8_reverse(void) {
char *s = "Bünderstraße";
char *rev = utf8_reversed_string(s);
if (rev == NULL) {
FAIL();
}
ASSERT_STR_EQ(rev, "eßartsrednüB");
free(rev);
PASS();
}
TEST test_utf8proc_iterate_reversed(void) {
char *s = "\xce\xa9\xcc\x93\xcd\x85";
int32_t ch;
ssize_t char_len;
size_t idx = strlen(s);
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
ASSERT_EQ(char_len, 2);
ASSERT_EQ(ch, 837);
idx -= char_len;
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
ASSERT_EQ(char_len, 2);
ASSERT_EQ(ch, 787);
idx -= char_len;
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
ASSERT_EQ(char_len, 2);
ASSERT_EQ(ch, 937);
idx -= char_len;
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
ASSERT_EQ(char_len, 0);
ASSERT_EQ(ch, -1);
PASS();
}
TEST test_utf8_compare_ignore_separators(void) {
char *str1 = "Bünderstraße";
char *str2 = "Bünder-straße";
size_t prefix = utf8_common_prefix_ignore_separators(str1, str2);
ASSERT_EQ(prefix, 14);
PASS();
}
TEST test_utf8_equal_ignore_separators(void) {
char *str1 = "Bünderstraße ";
char *str2 = "Bünder-straße";
bool equal = utf8_common_prefix_ignore_separators(str1, str2);
ASSERT(equal);
str1 = " Bünder-straße ";
str2 = "Bünder straße";
equal = utf8_common_prefix_ignore_separators(str1, str2);
ASSERT(equal);
str1 = "Bünder-straße-a";
str2 = "Bünder straße aa";
equal = utf8_common_prefix_ignore_separators(str1, str2);
ASSERT_FALSE(equal);
PASS();
}
TEST test_feature_array_add(void) {
cstring_array *features = cstring_array_new();
if (features == NULL) {
FAIL();
}
feature_array_add(features, 3, "a", "foo", "blee");
feature_array_add(features, 1, "b");
ASSERT_EQ(cstring_array_num_strings(features), 2);
char *feature = cstring_array_get_string(features, 0);
size_t len = cstring_array_token_length(features, 0);
if (feature == NULL) {
cstring_array_destroy(features);
FAIL();
}
ASSERT_STR_EQ(feature, "a|foo|blee");
ASSERT_EQ(len, strlen(feature));
feature = cstring_array_get_string(features, 1);
len = cstring_array_token_length(features, 1);
if (feature == NULL) {
cstring_array_destroy(features);
FAIL();
}
ASSERT_STR_EQ(feature, "b");
ASSERT_EQ(len, strlen(feature));
char **strings = cstring_array_to_strings(features);
if (strings == NULL) {
FAIL();
}
ASSERT_STR_EQ(strings[0], "a|foo|blee");
free(strings[0]);
ASSERT_STR_EQ(strings[1], "b");
free(strings[1]);
free(strings);
PASS();
}
TEST test_char_array(void) {
char_array *str = char_array_new();
if (str == NULL) {
FAIL();
}
char_array_cat(str, "Bürgermeister");
char_array_cat(str, "|");
char_array_cat_reversed(str, "straße");
ASSERT_STR_EQ(str->a, "Bürgermeister|eßarts");
char_array_cat_printf(str, " %d %s %.2f \t ", 1234, "onetwothreefour", 12.34);
char *expected_output = "Bürgermeister|eßarts 1234 onetwothreefour 12.34 \t ";
ASSERT_STR_EQ(str->a, expected_output);
char *a = char_array_to_string(str);
ASSERT_STR_EQ(a, expected_output);
char *b = string_trim(a);
ASSERT_STR_EQ(b, "Bürgermeister|eßarts 1234 onetwothreefour 12.34");
free(a);
free(b);
str = char_array_new();
#define SEPARATOR "|*|*|*|"
char_array_add_joined(str, SEPARATOR, true, 3, "dictionaries" SEPARATOR, "foo", "bar");
a = char_array_get_string(str);
ASSERT_STR_EQ(a, "dictionaries|*|*|*|foo|*|*|*|bar");
char_array_destroy(str);
PASS();
}
TEST test_cstring_array(void) {
size_t count = 0;
cstring_array *array = cstring_array_split_no_copy(strdup("The|Low|End|Theory"), '|', &count);
if (array == NULL) {
FAIL();
}
ASSERT_EQ(count, 4);
char *str = NULL;
str = cstring_array_get_string(array, 0);
if (str == NULL) {
FAIL();
}
ASSERT_STR_EQ(str, "The");
str = cstring_array_get_string(array, 1);
if (str == NULL) {
FAIL();
}
ASSERT_STR_EQ(str, "Low");
str = cstring_array_get_string(array, 2);
if (str == NULL) {
FAIL();
}
ASSERT_STR_EQ(str, "End");
str = cstring_array_get_string(array, 3);
if (str == NULL) {
FAIL();
}
ASSERT_STR_EQ(str, "Theory");
cstring_array_destroy(array);
PASS();
}
TEST test_string_tree(void) {
string_tree_t *tree = string_tree_new();
if (tree == NULL) {
FAIL();
}
string_tree_finalize_token(tree);
string_tree_add_string(tree, "Twenty-fifth");
string_tree_add_string(tree, "Twentyfifth");
string_tree_finalize_token(tree);
string_tree_add_string(tree, "Bürgermeister");
string_tree_add_string(tree, "Buergermeister");
string_tree_add_string(tree, "Burgermeister");
string_tree_finalize_token(tree);
string_tree_add_string(tree, "Straße");
string_tree_add_string(tree, "Strasse");
string_tree_finalize_token(tree);
ASSERT_EQ(tree->token_indices->n - 1, 4);
ASSERT_EQ(string_tree_num_alternatives(tree, 0), 1);
ASSERT_EQ(string_tree_num_alternatives(tree, 1), 2);
ASSERT_EQ(string_tree_num_alternatives(tree, 2), 3);
ASSERT_EQ(string_tree_num_alternatives(tree, 3), 2);
string_tree_iterator_t *iter = string_tree_iterator_new(tree);
if (iter == NULL) {
string_tree_destroy(tree);
FAIL();
}
size_t expected_num_tokens = 4;
ASSERT_EQ(iter->num_tokens, expected_num_tokens);
ASSERT_EQ(iter->remaining, 12);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 0);
ASSERT_EQ(iter->path[2], 0);
ASSERT_EQ(iter->path[3], 0);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 0);
ASSERT_EQ(iter->path[2], 0);
ASSERT_EQ(iter->path[3], 1);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 0);
ASSERT_EQ(iter->path[2], 1);
ASSERT_EQ(iter->path[3], 0);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 0);
ASSERT_EQ(iter->path[2], 1);
ASSERT_EQ(iter->path[3], 1);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 0);
ASSERT_EQ(iter->path[2], 2);
ASSERT_EQ(iter->path[3], 0);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 0);
ASSERT_EQ(iter->path[2], 2);
ASSERT_EQ(iter->path[3], 1);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 1);
ASSERT_EQ(iter->path[2], 0);
ASSERT_EQ(iter->path[3], 0);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 1);
ASSERT_EQ(iter->path[2], 0);
ASSERT_EQ(iter->path[3], 1);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 1);
ASSERT_EQ(iter->path[2], 1);
ASSERT_EQ(iter->path[3], 0);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 1);
ASSERT_EQ(iter->path[2], 1);
ASSERT_EQ(iter->path[3], 1);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 1);
ASSERT_EQ(iter->path[2], 2);
ASSERT_EQ(iter->path[3], 0);
string_tree_iterator_next(iter);
ASSERT_FALSE(string_tree_iterator_done(iter));
ASSERT_EQ(iter->path[0], 0);
ASSERT_EQ(iter->path[1], 1);
ASSERT_EQ(iter->path[2], 2);
ASSERT_EQ(iter->path[3], 1);
string_tree_iterator_destroy(iter);
string_tree_destroy(tree);
PASS();
}
SUITE(libpostal_string_utils_tests) {
RUN_TEST(test_utf8_reverse);
RUN_TEST(test_utf8proc_iterate_reversed);
RUN_TEST(test_utf8_compare_ignore_separators);
RUN_TEST(test_feature_array_add);
RUN_TEST(test_char_array);
RUN_TEST(test_cstring_array);
RUN_TEST(test_string_tree);
}

46
test/test_transliterate.c Normal file
View File

@@ -0,0 +1,46 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include "greatest.h"
#include "../src/transliterate.h"
SUITE(libpostal_transliteration_tests);
static greatest_test_res test_transliteration(char *trans_name, char *input, char *output) {
char *transliterated = transliterate(trans_name, input, strlen(input));
ASSERT_STR_EQ(output, transliterated);
free(transliterated);
PASS();
}
TEST test_transliterators(void) {
CHECK_CALL(test_transliteration("greek-latin", "διαφορετικούς", "diaphoretikoús̱"));
CHECK_CALL(test_transliteration("devanagari-latin", "ज़", "za"));
CHECK_CALL(test_transliteration("arabic-latin", "شارع", "sẖạrʿ"));
CHECK_CALL(test_transliteration("cyrillic-latin", "улица", "ulica"));
CHECK_CALL(test_transliteration("russian-latin-bgn", "улица", "ulitsa"));
CHECK_CALL(test_transliteration("hebrew-latin", "רחוב", "rẖwb"));
CHECK_CALL(test_transliteration("latin-ascii", "foo &amp; bar", "foo & bar"));
CHECK_CALL(test_transliteration("latin-ascii-simple", "eschenbräu bräurei triftstraße 67½ &amp; foo", "eschenbräu bräurei triftstraße 67½ & foo"));
CHECK_CALL(test_transliteration("han-latin", "街𠀀abcdef", "jiēhēabcdef"));
CHECK_CALL(test_transliteration("katakana-latin", "ドウ", "dou"));
CHECK_CALL(test_transliteration("hiragana-latin", "どう", "dou"));
CHECK_CALL(test_transliteration("latin-ascii-simple", "at&t", "at&t"));
CHECK_CALL(test_transliteration("latin-ascii-simple", "at&amp;t", "at&t"));
PASS();
}
GREATEST_SUITE(libpostal_transliteration_tests) {
if (!transliteration_module_setup(DEFAULT_TRANSLITERATION_PATH)) {
printf("Could not load transliterator module\n");
exit(EXIT_FAILURE);
}
RUN_TEST(test_transliterators);
transliteration_module_teardown();
}

61
test/test_trie.c Normal file
View File

@@ -0,0 +1,61 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include "greatest.h"
#include "../src/scanner.h"
#include "../src/trie.h"
#include "../src/trie_search.h"
SUITE(libpostal_trie_tests);
static greatest_test_res test_trie_add_get(trie_t *trie, char *key, uint32_t data) {
bool added = trie_add(trie, key, data);
ASSERT(added);
uint32_t trie_data;
bool fetched = trie_get_data(trie, key, &trie_data);
ASSERT(fetched);
ASSERT_EQ(data, trie_data);
PASS();
}
static greatest_test_res test_trie_setup(trie_t *trie) {
CHECK_CALL(test_trie_add_get(trie, "st", 1));
CHECK_CALL(test_trie_add_get(trie, "street", 2));
CHECK_CALL(test_trie_add_get(trie, "st rt", 3));
CHECK_CALL(test_trie_add_get(trie, "st rd", 3));
CHECK_CALL(test_trie_add_get(trie, "state route", 4));
CHECK_CALL(test_trie_add_get(trie, "maine", 5));
PASS();
}
TEST test_trie(void) {
trie_t *trie = trie_new();
ASSERT(trie != NULL);
CHECK_CALL(test_trie_setup(trie));
char *input = "main st r 20";
token_array *tokens = tokenize_keep_whitespace(input);
phrase_array *phrases = trie_search_tokens(trie, input, tokens);
ASSERT(phrases != NULL);
ASSERT(phrases->n == 1);
phrase_t phrase = phrases->a[0];
ASSERT(phrase.start == 2);
ASSERT(phrase.len == 1);
phrase_array_destroy(phrases);
token_array_destroy(tokens);
trie_destroy(trie);
PASS();
}
GREATEST_SUITE(libpostal_trie_tests) {
RUN_TEST(test_trie);
}