Initial fork commit
This commit is contained in:
BIN
test/.libs/test_libpostal
Executable file
BIN
test/.libs/test_libpostal
Executable file
Binary file not shown.
14
test/Makefile.am
Normal file
14
test/Makefile.am
Normal file
@@ -0,0 +1,14 @@
|
||||
CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g
|
||||
CFLAGS_O0 = $(CFLAGS_BASE) -O0
|
||||
CFLAGS_O1 = $(CFLAGS_BASE) -O1
|
||||
CFLAGS_O2 = $(CFLAGS_BASE) -O2
|
||||
CFLAGS_O3 = $(CFLAGS_BASE) -O3
|
||||
DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||
|
||||
CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
TESTS = test_libpostal
|
||||
noinst_PROGRAMS = test_libpostal
|
||||
test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c ../src/strndup.c ../src/file_utils.c ../src/string_utils.c ../src/utf8proc/utf8proc.c ../src/trie.c ../src/trie_search.c ../src/transliterate.c ../src/numex.c ../src/features.c
|
||||
test_libpostal_LDADD = ../src/libpostal.la ../src/libscanner.la $(CBLAS_LIBS)
|
||||
test_libpostal_CFLAGS = $(CFLAGS_O3)
|
||||
1356
test/Makefile.in
Normal file
1356
test/Makefile.in
Normal file
File diff suppressed because it is too large
Load Diff
908
test/greatest.h
Normal file
908
test/greatest.h
Normal file
@@ -0,0 +1,908 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Scott Vokes <vokes.s@gmail.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GREATEST_H
|
||||
#define GREATEST_H
|
||||
|
||||
/* 1.1.0 */
|
||||
#define GREATEST_VERSION_MAJOR 1
|
||||
#define GREATEST_VERSION_MINOR 1
|
||||
#define GREATEST_VERSION_PATCH 0
|
||||
|
||||
/* A unit testing system for C, contained in 1 file.
|
||||
* It doesn't use dynamic allocation or depend on anything
|
||||
* beyond ANSI C89.
|
||||
*
|
||||
* An up-to-date version can be found at:
|
||||
* https://github.com/silentbicycle/greatest/
|
||||
*/
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Minimal test runner template
|
||||
*********************************************************************/
|
||||
#if 0
|
||||
#include "greatest.h"
|
||||
TEST foo_should_foo(void) {
|
||||
PASS();
|
||||
}
|
||||
static void setup_cb(void *data) {
|
||||
printf("setup callback for each test case\n");
|
||||
}
|
||||
static void teardown_cb(void *data) {
|
||||
printf("teardown callback for each test case\n");
|
||||
}
|
||||
SUITE(suite) {
|
||||
/* Optional setup/teardown callbacks which will be run before/after
|
||||
* every test case. If using a test suite, they will be cleared when
|
||||
* the suite finishes. */
|
||||
SET_SETUP(setup_cb, voidp_to_callback_data);
|
||||
SET_TEARDOWN(teardown_cb, voidp_to_callback_data);
|
||||
RUN_TEST(foo_should_foo);
|
||||
}
|
||||
/* Add definitions that need to be in the test runner's main file. */
|
||||
GREATEST_MAIN_DEFS();
|
||||
/* Set up, run suite(s) of tests, report pass/fail/skip stats. */
|
||||
int run_tests(void) {
|
||||
GREATEST_INIT(); /* init. greatest internals */
|
||||
/* List of suites to run (if any). */
|
||||
RUN_SUITE(suite);
|
||||
/* Tests can also be run directly, without using test suites. */
|
||||
RUN_TEST(foo_should_foo);
|
||||
GREATEST_PRINT_REPORT(); /* display results */
|
||||
return greatest_all_passed();
|
||||
}
|
||||
/* main(), for a standalone command-line test runner.
|
||||
* This replaces run_tests above, and adds command line option
|
||||
* handling and exiting with a pass/fail status. */
|
||||
int main(int argc, char **argv) {
|
||||
GREATEST_MAIN_BEGIN(); /* init & parse command-line args */
|
||||
RUN_SUITE(suite);
|
||||
GREATEST_MAIN_END(); /* display results */
|
||||
}
|
||||
#endif
|
||||
/*********************************************************************/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/***********
|
||||
* Options *
|
||||
***********/
|
||||
|
||||
/* Default column width for non-verbose output. */
|
||||
#ifndef GREATEST_DEFAULT_WIDTH
|
||||
#define GREATEST_DEFAULT_WIDTH 72
|
||||
#endif
|
||||
|
||||
/* FILE *, for test logging. */
|
||||
#ifndef GREATEST_STDOUT
|
||||
#define GREATEST_STDOUT stdout
|
||||
#endif
|
||||
|
||||
/* Remove GREATEST_ prefix from most commonly used symbols? */
|
||||
#ifndef GREATEST_USE_ABBREVS
|
||||
#define GREATEST_USE_ABBREVS 1
|
||||
#endif
|
||||
|
||||
/* Set to 0 to disable all use of setjmp/longjmp. */
|
||||
#ifndef GREATEST_USE_LONGJMP
|
||||
#define GREATEST_USE_LONGJMP 1
|
||||
#endif
|
||||
|
||||
#if GREATEST_USE_LONGJMP
|
||||
#include <setjmp.h>
|
||||
#endif
|
||||
|
||||
/* Set to 0 to disable all use of time.h / clock(). */
|
||||
#ifndef GREATEST_USE_TIME
|
||||
#define GREATEST_USE_TIME 1
|
||||
#endif
|
||||
|
||||
#if GREATEST_USE_TIME
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
/* Floating point type, for ASSERT_IN_RANGE. */
|
||||
#ifndef GREATEST_FLOAT
|
||||
#define GREATEST_FLOAT double
|
||||
#define GREATEST_FLOAT_FMT "%g"
|
||||
#endif
|
||||
|
||||
/*********
|
||||
* Types *
|
||||
*********/
|
||||
|
||||
/* Info for the current running suite. */
|
||||
typedef struct greatest_suite_info {
|
||||
unsigned int tests_run;
|
||||
unsigned int passed;
|
||||
unsigned int failed;
|
||||
unsigned int skipped;
|
||||
|
||||
#if GREATEST_USE_TIME
|
||||
/* timers, pre/post running suite and individual tests */
|
||||
clock_t pre_suite;
|
||||
clock_t post_suite;
|
||||
clock_t pre_test;
|
||||
clock_t post_test;
|
||||
#endif
|
||||
} greatest_suite_info;
|
||||
|
||||
/* Type for a suite function. */
|
||||
typedef void (greatest_suite_cb)(void);
|
||||
|
||||
/* Types for setup/teardown callbacks. If non-NULL, these will be run
|
||||
* and passed the pointer to their additional data. */
|
||||
typedef void (greatest_setup_cb)(void *udata);
|
||||
typedef void (greatest_teardown_cb)(void *udata);
|
||||
|
||||
/* Type for an equality comparison between two pointers of the same type.
|
||||
* Should return non-0 if equal, otherwise 0.
|
||||
* UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */
|
||||
typedef int greatest_equal_cb(const void *exp, const void *got, void *udata);
|
||||
|
||||
/* Type for a callback that prints a value pointed to by T.
|
||||
* Return value has the same meaning as printf's.
|
||||
* UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */
|
||||
typedef int greatest_printf_cb(const void *t, void *udata);
|
||||
|
||||
/* Callbacks for an arbitrary type; needed for type-specific
|
||||
* comparisons via GREATEST_ASSERT_EQUAL_T[m].*/
|
||||
typedef struct greatest_type_info {
|
||||
greatest_equal_cb *equal;
|
||||
greatest_printf_cb *print;
|
||||
} greatest_type_info;
|
||||
|
||||
/* Callbacks for string type. */
|
||||
extern greatest_type_info greatest_type_info_string;
|
||||
|
||||
typedef enum {
|
||||
GREATEST_FLAG_FIRST_FAIL = 0x01,
|
||||
GREATEST_FLAG_LIST_ONLY = 0x02
|
||||
} greatest_flag_t;
|
||||
|
||||
/* Struct containing all test runner state. */
|
||||
typedef struct greatest_run_info {
|
||||
unsigned char flags;
|
||||
unsigned char verbosity;
|
||||
unsigned int tests_run; /* total test count */
|
||||
|
||||
/* overall pass/fail/skip counts */
|
||||
unsigned int passed;
|
||||
unsigned int failed;
|
||||
unsigned int skipped;
|
||||
unsigned int assertions;
|
||||
|
||||
/* currently running test suite */
|
||||
greatest_suite_info suite;
|
||||
|
||||
/* info to print about the most recent failure */
|
||||
const char *fail_file;
|
||||
unsigned int fail_line;
|
||||
const char *msg;
|
||||
|
||||
/* current setup/teardown hooks and userdata */
|
||||
greatest_setup_cb *setup;
|
||||
void *setup_udata;
|
||||
greatest_teardown_cb *teardown;
|
||||
void *teardown_udata;
|
||||
|
||||
/* formatting info for ".....s...F"-style output */
|
||||
unsigned int col;
|
||||
unsigned int width;
|
||||
|
||||
/* only run a specific suite or test */
|
||||
const char *suite_filter;
|
||||
const char *test_filter;
|
||||
|
||||
#if GREATEST_USE_TIME
|
||||
/* overall timers */
|
||||
clock_t begin;
|
||||
clock_t end;
|
||||
#endif
|
||||
|
||||
#if GREATEST_USE_LONGJMP
|
||||
jmp_buf jump_dest;
|
||||
#endif
|
||||
} greatest_run_info;
|
||||
|
||||
struct greatest_report_t {
|
||||
/* overall pass/fail/skip counts */
|
||||
unsigned int passed;
|
||||
unsigned int failed;
|
||||
unsigned int skipped;
|
||||
unsigned int assertions;
|
||||
};
|
||||
|
||||
/* Global var for the current testing context.
|
||||
* Initialized by GREATEST_MAIN_DEFS(). */
|
||||
extern greatest_run_info greatest_info;
|
||||
|
||||
|
||||
/**********************
|
||||
* Exported functions *
|
||||
**********************/
|
||||
|
||||
/* These are used internally by greatest. */
|
||||
void greatest_do_pass(const char *name);
|
||||
void greatest_do_fail(const char *name);
|
||||
void greatest_do_skip(const char *name);
|
||||
int greatest_pre_test(const char *name);
|
||||
void greatest_post_test(const char *name, int res);
|
||||
void greatest_usage(const char *name);
|
||||
int greatest_do_assert_equal_t(const void *exp, const void *got,
|
||||
greatest_type_info *type_info, void *udata);
|
||||
|
||||
/* These are part of the public greatest API. */
|
||||
void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata);
|
||||
void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, void *udata);
|
||||
int greatest_all_passed(void);
|
||||
void greatest_set_test_filter(const char *name);
|
||||
void greatest_set_suite_filter(const char *name);
|
||||
void greatest_get_report(struct greatest_report_t *report);
|
||||
unsigned int greatest_get_verbosity(void);
|
||||
void greatest_set_verbosity(unsigned int verbosity);
|
||||
void greatest_set_flag(greatest_flag_t flag);
|
||||
|
||||
|
||||
/********************
|
||||
* Language Support *
|
||||
********************/
|
||||
|
||||
/* If __VA_ARGS__ (C99) is supported, allow parametric testing
|
||||
* without needing to manually manage the argument struct. */
|
||||
#if __STDC_VERSION__ >= 19901L || _MSC_VER >= 1800
|
||||
#define GREATEST_VA_ARGS
|
||||
#endif
|
||||
|
||||
|
||||
/**********
|
||||
* Macros *
|
||||
**********/
|
||||
|
||||
/* Define a suite. */
|
||||
#define GREATEST_SUITE(NAME) void NAME(void); void NAME(void)
|
||||
|
||||
/* Declare a suite, provided by another compilation unit. */
|
||||
#define GREATEST_SUITE_EXTERN(NAME) void NAME(void)
|
||||
|
||||
/* Start defining a test function.
|
||||
* The arguments are not included, to allow parametric testing. */
|
||||
#define GREATEST_TEST static greatest_test_res
|
||||
|
||||
/* PASS/FAIL/SKIP result from a test. Used internally. */
|
||||
typedef enum {
|
||||
GREATEST_TEST_RES_PASS = 0,
|
||||
GREATEST_TEST_RES_FAIL = -1,
|
||||
GREATEST_TEST_RES_SKIP = 1
|
||||
} greatest_test_res;
|
||||
|
||||
/* Run a suite. */
|
||||
#define GREATEST_RUN_SUITE(S_NAME) greatest_run_suite(S_NAME, #S_NAME)
|
||||
|
||||
/* Run a test in the current suite. */
|
||||
#define GREATEST_RUN_TEST(TEST) \
|
||||
do { \
|
||||
if (greatest_pre_test(#TEST) == 1) { \
|
||||
greatest_test_res res = GREATEST_SAVE_CONTEXT(); \
|
||||
if (res == GREATEST_TEST_RES_PASS) { \
|
||||
res = TEST(); \
|
||||
} \
|
||||
greatest_post_test(#TEST, res); \
|
||||
} else if (GREATEST_LIST_ONLY()) { \
|
||||
fprintf(GREATEST_STDOUT, " %s\n", #TEST); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Run a test in the current suite with one void * argument,
|
||||
* which can be a pointer to a struct with multiple arguments. */
|
||||
#define GREATEST_RUN_TEST1(TEST, ENV) \
|
||||
do { \
|
||||
if (greatest_pre_test(#TEST) == 1) { \
|
||||
int res = TEST(ENV); \
|
||||
greatest_post_test(#TEST, res); \
|
||||
} else if (GREATEST_LIST_ONLY()) { \
|
||||
fprintf(GREATEST_STDOUT, " %s\n", #TEST); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef GREATEST_VA_ARGS
|
||||
#define GREATEST_RUN_TESTp(TEST, ...) \
|
||||
do { \
|
||||
if (greatest_pre_test(#TEST) == 1) { \
|
||||
int res = TEST(__VA_ARGS__); \
|
||||
greatest_post_test(#TEST, res); \
|
||||
} else if (GREATEST_LIST_ONLY()) { \
|
||||
fprintf(GREATEST_STDOUT, " %s\n", #TEST); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
|
||||
/* Check if the test runner is in verbose mode. */
|
||||
#define GREATEST_IS_VERBOSE() ((greatest_info.verbosity) > 0)
|
||||
#define GREATEST_LIST_ONLY() \
|
||||
(greatest_info.flags & GREATEST_FLAG_LIST_ONLY)
|
||||
#define GREATEST_FIRST_FAIL() \
|
||||
(greatest_info.flags & GREATEST_FLAG_FIRST_FAIL)
|
||||
#define GREATEST_FAILURE_ABORT() \
|
||||
(greatest_info.suite.failed > 0 && GREATEST_FIRST_FAIL())
|
||||
|
||||
/* Message-less forms of tests defined below. */
|
||||
#define GREATEST_PASS() GREATEST_PASSm(NULL)
|
||||
#define GREATEST_FAIL() GREATEST_FAILm(NULL)
|
||||
#define GREATEST_SKIP() GREATEST_SKIPm(NULL)
|
||||
#define GREATEST_ASSERT(COND) \
|
||||
GREATEST_ASSERTm(#COND, COND)
|
||||
#define GREATEST_ASSERT_OR_LONGJMP(COND) \
|
||||
GREATEST_ASSERT_OR_LONGJMPm(#COND, COND)
|
||||
#define GREATEST_ASSERT_FALSE(COND) \
|
||||
GREATEST_ASSERT_FALSEm(#COND, COND)
|
||||
#define GREATEST_ASSERT_EQ(EXP, GOT) \
|
||||
GREATEST_ASSERT_EQm(#EXP " != " #GOT, EXP, GOT)
|
||||
#define GREATEST_ASSERT_EQ_FMT(EXP, GOT, FMT) \
|
||||
GREATEST_ASSERT_EQ_FMTm(#EXP " != " #GOT, EXP, GOT, FMT)
|
||||
#define GREATEST_ASSERT_IN_RANGE(EXP, GOT, TOL) \
|
||||
GREATEST_ASSERT_IN_RANGEm(#EXP " != " #GOT " +/- " #TOL, EXP, GOT, TOL)
|
||||
#define GREATEST_ASSERT_EQUAL_T(EXP, GOT, TYPE_INFO, UDATA) \
|
||||
GREATEST_ASSERT_EQUAL_Tm(#EXP " != " #GOT, EXP, GOT, TYPE_INFO, UDATA)
|
||||
#define GREATEST_ASSERT_STR_EQ(EXP, GOT) \
|
||||
GREATEST_ASSERT_STR_EQm(#EXP " != " #GOT, EXP, GOT)
|
||||
|
||||
/* The following forms take an additional message argument first,
|
||||
* to be displayed by the test runner. */
|
||||
|
||||
/* Fail if a condition is not true, with message. */
|
||||
#define GREATEST_ASSERTm(MSG, COND) \
|
||||
do { \
|
||||
greatest_info.assertions++; \
|
||||
if (!(COND)) { GREATEST_FAILm(MSG); } \
|
||||
} while (0)
|
||||
|
||||
/* Fail if a condition is not true, longjmping out of test. */
|
||||
#define GREATEST_ASSERT_OR_LONGJMPm(MSG, COND) \
|
||||
do { \
|
||||
greatest_info.assertions++; \
|
||||
if (!(COND)) { GREATEST_FAIL_WITH_LONGJMPm(MSG); } \
|
||||
} while (0)
|
||||
|
||||
/* Fail if a condition is not false, with message. */
|
||||
#define GREATEST_ASSERT_FALSEm(MSG, COND) \
|
||||
do { \
|
||||
greatest_info.assertions++; \
|
||||
if ((COND)) { GREATEST_FAILm(MSG); } \
|
||||
} while (0)
|
||||
|
||||
/* Fail if EXP != GOT (equality comparison by ==). */
|
||||
#define GREATEST_ASSERT_EQm(MSG, EXP, GOT) \
|
||||
do { \
|
||||
greatest_info.assertions++; \
|
||||
if ((EXP) != (GOT)) { GREATEST_FAILm(MSG); } \
|
||||
} while (0)
|
||||
|
||||
/* Fail if EXP != GOT (equality comparison by ==). */
|
||||
#define GREATEST_ASSERT_EQ_FMTm(MSG, EXP, GOT, FMT) \
|
||||
do { \
|
||||
const char *fmt = ( FMT ); \
|
||||
greatest_info.assertions++; \
|
||||
if ((EXP) != (GOT)) { \
|
||||
fprintf(GREATEST_STDOUT, "\nExpected: "); \
|
||||
fprintf(GREATEST_STDOUT, fmt, EXP); \
|
||||
fprintf(GREATEST_STDOUT, "\nGot: "); \
|
||||
fprintf(GREATEST_STDOUT, fmt, GOT); \
|
||||
fprintf(GREATEST_STDOUT, "\n"); \
|
||||
GREATEST_FAILm(MSG); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Fail if GOT not in range of EXP +|- TOL. */
|
||||
#define GREATEST_ASSERT_IN_RANGEm(MSG, EXP, GOT, TOL) \
|
||||
do { \
|
||||
GREATEST_FLOAT exp = (EXP); \
|
||||
GREATEST_FLOAT got = (GOT); \
|
||||
GREATEST_FLOAT tol = (TOL); \
|
||||
greatest_info.assertions++; \
|
||||
if ((exp > got && exp - got > tol) || \
|
||||
(exp < got && got - exp > tol)) { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"\nExpected: " GREATEST_FLOAT_FMT \
|
||||
" +/- " GREATEST_FLOAT_FMT "\n" \
|
||||
"Got: " GREATEST_FLOAT_FMT "\n", \
|
||||
exp, tol, got); \
|
||||
GREATEST_FAILm(MSG); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Fail if EXP is not equal to GOT, according to strcmp. */
|
||||
#define GREATEST_ASSERT_STR_EQm(MSG, EXP, GOT) \
|
||||
do { \
|
||||
GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, \
|
||||
&greatest_type_info_string, NULL); \
|
||||
} while (0) \
|
||||
|
||||
/* Fail if EXP is not equal to GOT, according to a comparison
|
||||
* callback in TYPE_INFO. If they are not equal, optionally use a
|
||||
* print callback in TYPE_INFO to print them. */
|
||||
#define GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, TYPE_INFO, UDATA) \
|
||||
do { \
|
||||
greatest_type_info *type_info = (TYPE_INFO); \
|
||||
greatest_info.assertions++; \
|
||||
if (!greatest_do_assert_equal_t(EXP, GOT, \
|
||||
type_info, UDATA)) { \
|
||||
if (type_info == NULL || type_info->equal == NULL) { \
|
||||
GREATEST_FAILm("type_info->equal callback missing!"); \
|
||||
} else { \
|
||||
GREATEST_FAILm(MSG); \
|
||||
} \
|
||||
} \
|
||||
} while (0) \
|
||||
|
||||
/* Pass. */
|
||||
#define GREATEST_PASSm(MSG) \
|
||||
do { \
|
||||
greatest_info.msg = MSG; \
|
||||
return GREATEST_TEST_RES_PASS; \
|
||||
} while (0)
|
||||
|
||||
/* Fail. */
|
||||
#define GREATEST_FAILm(MSG) \
|
||||
do { \
|
||||
greatest_info.fail_file = __FILE__; \
|
||||
greatest_info.fail_line = __LINE__; \
|
||||
greatest_info.msg = MSG; \
|
||||
return GREATEST_TEST_RES_FAIL; \
|
||||
} while (0)
|
||||
|
||||
/* Optional GREATEST_FAILm variant that longjmps. */
|
||||
#if GREATEST_USE_LONGJMP
|
||||
#define GREATEST_FAIL_WITH_LONGJMP() GREATEST_FAIL_WITH_LONGJMPm(NULL)
|
||||
#define GREATEST_FAIL_WITH_LONGJMPm(MSG) \
|
||||
do { \
|
||||
greatest_info.fail_file = __FILE__; \
|
||||
greatest_info.fail_line = __LINE__; \
|
||||
greatest_info.msg = MSG; \
|
||||
longjmp(greatest_info.jump_dest, GREATEST_TEST_RES_FAIL); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/* Skip the current test. */
|
||||
#define GREATEST_SKIPm(MSG) \
|
||||
do { \
|
||||
greatest_info.msg = MSG; \
|
||||
return GREATEST_TEST_RES_SKIP; \
|
||||
} while (0)
|
||||
|
||||
/* Check the result of a subfunction using ASSERT, etc. */
|
||||
#define GREATEST_CHECK_CALL(RES) \
|
||||
do { \
|
||||
int _check_call_res = RES; \
|
||||
if (_check_call_res != GREATEST_TEST_RES_PASS) { \
|
||||
return _check_call_res; \
|
||||
} \
|
||||
} while (0) \
|
||||
|
||||
#if GREATEST_USE_TIME
|
||||
#define GREATEST_SET_TIME(NAME) \
|
||||
NAME = clock(); \
|
||||
if (NAME == (clock_t) -1) { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"clock error: %s\n", #NAME); \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#define GREATEST_CLOCK_DIFF(C1, C2) \
|
||||
fprintf(GREATEST_STDOUT, " (%lu ticks, %.3f sec)", \
|
||||
(long unsigned int) (C2) - (long unsigned int)(C1), \
|
||||
(double)((C2) - (C1)) / (1.0 * (double)CLOCKS_PER_SEC))
|
||||
#else
|
||||
#define GREATEST_SET_TIME(UNUSED)
|
||||
#define GREATEST_CLOCK_DIFF(UNUSED1, UNUSED2)
|
||||
#endif
|
||||
|
||||
#if GREATEST_USE_LONGJMP
|
||||
#define GREATEST_SAVE_CONTEXT() \
|
||||
/* setjmp returns 0 (GREATEST_TEST_RES_PASS) on first call */ \
|
||||
/* so the test runs, then RES_FAIL from FAIL_WITH_LONGJMP. */ \
|
||||
((greatest_test_res)(setjmp(greatest_info.jump_dest)))
|
||||
#else
|
||||
#define GREATEST_SAVE_CONTEXT() \
|
||||
/*a no-op, since setjmp/longjmp aren't being used */ \
|
||||
GREATEST_TEST_RES_PASS
|
||||
#endif
|
||||
|
||||
/* Include several function definitions in the main test file. */
|
||||
#define GREATEST_MAIN_DEFS() \
|
||||
\
|
||||
/* Is FILTER a subset of NAME? */ \
|
||||
static int greatest_name_match(const char *name, \
|
||||
const char *filter) { \
|
||||
size_t offset = 0; \
|
||||
size_t filter_len = strlen(filter); \
|
||||
while (name[offset] != '\0') { \
|
||||
if (name[offset] == filter[0]) { \
|
||||
if (0 == strncmp(&name[offset], filter, filter_len)) { \
|
||||
return 1; \
|
||||
} \
|
||||
} \
|
||||
offset++; \
|
||||
} \
|
||||
\
|
||||
return 0; \
|
||||
} \
|
||||
\
|
||||
int greatest_pre_test(const char *name) { \
|
||||
if (!GREATEST_LIST_ONLY() \
|
||||
&& (!GREATEST_FIRST_FAIL() || greatest_info.suite.failed == 0) \
|
||||
&& (greatest_info.test_filter == NULL || \
|
||||
greatest_name_match(name, greatest_info.test_filter))) { \
|
||||
GREATEST_SET_TIME(greatest_info.suite.pre_test); \
|
||||
if (greatest_info.setup) { \
|
||||
greatest_info.setup(greatest_info.setup_udata); \
|
||||
} \
|
||||
return 1; /* test should be run */ \
|
||||
} else { \
|
||||
return 0; /* skipped */ \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
void greatest_post_test(const char *name, int res) { \
|
||||
GREATEST_SET_TIME(greatest_info.suite.post_test); \
|
||||
if (greatest_info.teardown) { \
|
||||
void *udata = greatest_info.teardown_udata; \
|
||||
greatest_info.teardown(udata); \
|
||||
} \
|
||||
\
|
||||
if (res <= GREATEST_TEST_RES_FAIL) { \
|
||||
greatest_do_fail(name); \
|
||||
} else if (res >= GREATEST_TEST_RES_SKIP) { \
|
||||
greatest_do_skip(name); \
|
||||
} else if (res == GREATEST_TEST_RES_PASS) { \
|
||||
greatest_do_pass(name); \
|
||||
} \
|
||||
greatest_info.suite.tests_run++; \
|
||||
greatest_info.col++; \
|
||||
if (GREATEST_IS_VERBOSE()) { \
|
||||
GREATEST_CLOCK_DIFF(greatest_info.suite.pre_test, \
|
||||
greatest_info.suite.post_test); \
|
||||
fprintf(GREATEST_STDOUT, "\n"); \
|
||||
} else if (greatest_info.col % greatest_info.width == 0) { \
|
||||
fprintf(GREATEST_STDOUT, "\n"); \
|
||||
greatest_info.col = 0; \
|
||||
} \
|
||||
if (GREATEST_STDOUT == stdout) fflush(stdout); \
|
||||
} \
|
||||
\
|
||||
static void report_suite(void) { \
|
||||
if (greatest_info.suite.tests_run > 0) { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"\n%u test%s - %u passed, %u failed, %u skipped", \
|
||||
greatest_info.suite.tests_run, \
|
||||
greatest_info.suite.tests_run == 1 ? "" : "s", \
|
||||
greatest_info.suite.passed, \
|
||||
greatest_info.suite.failed, \
|
||||
greatest_info.suite.skipped); \
|
||||
GREATEST_CLOCK_DIFF(greatest_info.suite.pre_suite, \
|
||||
greatest_info.suite.post_suite); \
|
||||
fprintf(GREATEST_STDOUT, "\n"); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
static void update_counts_and_reset_suite(void) { \
|
||||
greatest_info.setup = NULL; \
|
||||
greatest_info.setup_udata = NULL; \
|
||||
greatest_info.teardown = NULL; \
|
||||
greatest_info.teardown_udata = NULL; \
|
||||
greatest_info.passed += greatest_info.suite.passed; \
|
||||
greatest_info.failed += greatest_info.suite.failed; \
|
||||
greatest_info.skipped += greatest_info.suite.skipped; \
|
||||
greatest_info.tests_run += greatest_info.suite.tests_run; \
|
||||
memset(&greatest_info.suite, 0, sizeof(greatest_info.suite)); \
|
||||
greatest_info.col = 0; \
|
||||
} \
|
||||
\
|
||||
static void greatest_run_suite(greatest_suite_cb *suite_cb, \
|
||||
const char *suite_name) { \
|
||||
if (greatest_info.suite_filter && \
|
||||
!greatest_name_match(suite_name, greatest_info.suite_filter)) { \
|
||||
return; \
|
||||
} \
|
||||
if (GREATEST_FIRST_FAIL() && greatest_info.failed > 0) { return; } \
|
||||
if (greatest_info.suite.tests_run > 0) { /* tests w/out suite */ \
|
||||
update_counts_and_reset_suite(); \
|
||||
} \
|
||||
fprintf(GREATEST_STDOUT, "\n* Suite %s:\n", suite_name); \
|
||||
GREATEST_SET_TIME(greatest_info.suite.pre_suite); \
|
||||
suite_cb(); \
|
||||
GREATEST_SET_TIME(greatest_info.suite.post_suite); \
|
||||
report_suite(); \
|
||||
} \
|
||||
\
|
||||
void greatest_do_pass(const char *name) { \
|
||||
if (GREATEST_IS_VERBOSE()) { \
|
||||
fprintf(GREATEST_STDOUT, "PASS %s: %s", \
|
||||
name, greatest_info.msg ? greatest_info.msg : ""); \
|
||||
} else { \
|
||||
fprintf(GREATEST_STDOUT, "."); \
|
||||
} \
|
||||
greatest_info.suite.passed++; \
|
||||
} \
|
||||
\
|
||||
void greatest_do_fail(const char *name) { \
|
||||
if (GREATEST_IS_VERBOSE()) { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"FAIL %s: %s (%s:%u)", \
|
||||
name, greatest_info.msg ? greatest_info.msg : "", \
|
||||
greatest_info.fail_file, greatest_info.fail_line); \
|
||||
} else { \
|
||||
fprintf(GREATEST_STDOUT, "F"); \
|
||||
greatest_info.col++; \
|
||||
/* add linebreak if in line of '.'s */ \
|
||||
if (greatest_info.col != 0) { \
|
||||
fprintf(GREATEST_STDOUT, "\n"); \
|
||||
greatest_info.col = 0; \
|
||||
} \
|
||||
fprintf(GREATEST_STDOUT, "FAIL %s: %s (%s:%u)\n", \
|
||||
name, \
|
||||
greatest_info.msg ? greatest_info.msg : "", \
|
||||
greatest_info.fail_file, greatest_info.fail_line); \
|
||||
} \
|
||||
greatest_info.suite.failed++; \
|
||||
} \
|
||||
\
|
||||
void greatest_do_skip(const char *name) { \
|
||||
if (GREATEST_IS_VERBOSE()) { \
|
||||
fprintf(GREATEST_STDOUT, "SKIP %s: %s", \
|
||||
name, \
|
||||
greatest_info.msg ? \
|
||||
greatest_info.msg : "" ); \
|
||||
} else { \
|
||||
fprintf(GREATEST_STDOUT, "s"); \
|
||||
} \
|
||||
greatest_info.suite.skipped++; \
|
||||
} \
|
||||
\
|
||||
int greatest_do_assert_equal_t(const void *exp, const void *got, \
|
||||
greatest_type_info *type_info, void *udata) { \
|
||||
int eq = 0; \
|
||||
if (type_info == NULL || type_info->equal == NULL) { \
|
||||
return 0; \
|
||||
} \
|
||||
eq = type_info->equal(exp, got, udata); \
|
||||
if (!eq) { \
|
||||
if (type_info->print != NULL) { \
|
||||
fprintf(GREATEST_STDOUT, "\nExpected: "); \
|
||||
(void)type_info->print(exp, udata); \
|
||||
fprintf(GREATEST_STDOUT, "\nGot: "); \
|
||||
(void)type_info->print(got, udata); \
|
||||
fprintf(GREATEST_STDOUT, "\n"); \
|
||||
} else { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"GREATEST_ASSERT_EQUAL_T failure at %s:%u\n", \
|
||||
greatest_info.fail_file, \
|
||||
greatest_info.fail_line); \
|
||||
} \
|
||||
} \
|
||||
return eq; \
|
||||
} \
|
||||
\
|
||||
void greatest_usage(const char *name) { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"Usage: %s [-hlfv] [-s SUITE] [-t TEST]\n" \
|
||||
" -h print this Help\n" \
|
||||
" -l List suites and their tests, then exit\n" \
|
||||
" -f Stop runner after first failure\n" \
|
||||
" -v Verbose output\n" \
|
||||
" -s SUITE only run suites containing string SUITE\n" \
|
||||
" -t TEST only run tests containing string TEST\n", \
|
||||
name); \
|
||||
} \
|
||||
\
|
||||
static void greatest_parse_args(int argc, char **argv) { \
|
||||
int i = 0; \
|
||||
for (i = 1; i < argc; i++) { \
|
||||
if (0 == strncmp("-t", argv[i], 2)) { \
|
||||
if (argc <= i + 1) { \
|
||||
greatest_usage(argv[0]); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
greatest_info.test_filter = argv[i+1]; \
|
||||
i++; \
|
||||
} else if (0 == strncmp("-s", argv[i], 2)) { \
|
||||
if (argc <= i + 1) { \
|
||||
greatest_usage(argv[0]); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
greatest_info.suite_filter = argv[i+1]; \
|
||||
i++; \
|
||||
} else if (0 == strncmp("-f", argv[i], 2)) { \
|
||||
greatest_info.flags |= GREATEST_FLAG_FIRST_FAIL; \
|
||||
} else if (0 == strncmp("-v", argv[i], 2)) { \
|
||||
greatest_info.verbosity++; \
|
||||
} else if (0 == strncmp("-l", argv[i], 2)) { \
|
||||
greatest_info.flags |= GREATEST_FLAG_LIST_ONLY; \
|
||||
} else if (0 == strncmp("-h", argv[i], 2)) { \
|
||||
greatest_usage(argv[0]); \
|
||||
exit(EXIT_SUCCESS); \
|
||||
} else if (0 == strncmp("--", argv[i], 2)) { \
|
||||
break; \
|
||||
} else { \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"Unknown argument '%s'\n", argv[i]); \
|
||||
greatest_usage(argv[0]); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
int greatest_all_passed(void) { return (greatest_info.failed == 0); } \
|
||||
\
|
||||
void greatest_set_test_filter(const char *name) { \
|
||||
greatest_info.test_filter = name; \
|
||||
} \
|
||||
\
|
||||
void greatest_set_suite_filter(const char *name) { \
|
||||
greatest_info.suite_filter = name; \
|
||||
} \
|
||||
\
|
||||
void greatest_get_report(struct greatest_report_t *report) { \
|
||||
if (report) { \
|
||||
report->passed = greatest_info.passed; \
|
||||
report->failed = greatest_info.failed; \
|
||||
report->skipped = greatest_info.skipped; \
|
||||
report->assertions = greatest_info.assertions; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
unsigned int greatest_get_verbosity(void) { \
|
||||
return greatest_info.verbosity; \
|
||||
} \
|
||||
\
|
||||
void greatest_set_verbosity(unsigned int verbosity) { \
|
||||
greatest_info.verbosity = (unsigned char)verbosity; \
|
||||
} \
|
||||
\
|
||||
void greatest_set_flag(greatest_flag_t flag) { \
|
||||
greatest_info.flags |= flag; \
|
||||
} \
|
||||
\
|
||||
void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata) { \
|
||||
greatest_info.setup = cb; \
|
||||
greatest_info.setup_udata = udata; \
|
||||
} \
|
||||
\
|
||||
void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, \
|
||||
void *udata) { \
|
||||
greatest_info.teardown = cb; \
|
||||
greatest_info.teardown_udata = udata; \
|
||||
} \
|
||||
\
|
||||
static int greatest_string_equal_cb(const void *exp, const void *got, \
|
||||
void *udata) { \
|
||||
(void)udata; \
|
||||
return (0 == strcmp((const char *)exp, (const char *)got)); \
|
||||
} \
|
||||
\
|
||||
static int greatest_string_printf_cb(const void *t, void *udata) { \
|
||||
(void)udata; \
|
||||
return fprintf(GREATEST_STDOUT, "%s", (const char *)t); \
|
||||
} \
|
||||
\
|
||||
greatest_type_info greatest_type_info_string = { \
|
||||
greatest_string_equal_cb, \
|
||||
greatest_string_printf_cb, \
|
||||
}; \
|
||||
\
|
||||
greatest_run_info greatest_info
|
||||
|
||||
/* Init internals. */
|
||||
#define GREATEST_INIT() \
|
||||
do { \
|
||||
/* Suppress unused function warning if features aren't used */ \
|
||||
(void)greatest_run_suite; \
|
||||
(void)greatest_parse_args; \
|
||||
\
|
||||
memset(&greatest_info, 0, sizeof(greatest_info)); \
|
||||
greatest_info.width = GREATEST_DEFAULT_WIDTH; \
|
||||
GREATEST_SET_TIME(greatest_info.begin); \
|
||||
} while (0) \
|
||||
|
||||
/* Handle command-line arguments, etc. */
|
||||
#define GREATEST_MAIN_BEGIN() \
|
||||
do { \
|
||||
GREATEST_INIT(); \
|
||||
greatest_parse_args(argc, argv); \
|
||||
} while (0)
|
||||
|
||||
/* Report passes, failures, skipped tests, the number of
|
||||
* assertions, and the overall run time. */
|
||||
#define GREATEST_PRINT_REPORT() \
|
||||
do { \
|
||||
if (!GREATEST_LIST_ONLY()) { \
|
||||
update_counts_and_reset_suite(); \
|
||||
GREATEST_SET_TIME(greatest_info.end); \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"\nTotal: %u test%s", \
|
||||
greatest_info.tests_run, \
|
||||
greatest_info.tests_run == 1 ? "" : "s"); \
|
||||
GREATEST_CLOCK_DIFF(greatest_info.begin, \
|
||||
greatest_info.end); \
|
||||
fprintf(GREATEST_STDOUT, ", %u assertion%s\n", \
|
||||
greatest_info.assertions, \
|
||||
greatest_info.assertions == 1 ? "" : "s"); \
|
||||
fprintf(GREATEST_STDOUT, \
|
||||
"Pass: %u, fail: %u, skip: %u.\n", \
|
||||
greatest_info.passed, \
|
||||
greatest_info.failed, greatest_info.skipped); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Report results, exit with exit status based on results. */
|
||||
#define GREATEST_MAIN_END() \
|
||||
do { \
|
||||
GREATEST_PRINT_REPORT(); \
|
||||
return (greatest_all_passed() ? EXIT_SUCCESS : EXIT_FAILURE); \
|
||||
} while (0)
|
||||
|
||||
/* Make abbreviations without the GREATEST_ prefix for the
|
||||
* most commonly used symbols. */
|
||||
#if GREATEST_USE_ABBREVS
|
||||
#define TEST GREATEST_TEST
|
||||
#define SUITE GREATEST_SUITE
|
||||
#define SUITE_EXTERN GREATEST_SUITE_EXTERN
|
||||
#define RUN_TEST GREATEST_RUN_TEST
|
||||
#define RUN_TEST1 GREATEST_RUN_TEST1
|
||||
#define RUN_SUITE GREATEST_RUN_SUITE
|
||||
#define ASSERT GREATEST_ASSERT
|
||||
#define ASSERTm GREATEST_ASSERTm
|
||||
#define ASSERT_FALSE GREATEST_ASSERT_FALSE
|
||||
#define ASSERT_EQ GREATEST_ASSERT_EQ
|
||||
#define ASSERT_EQ_FMT GREATEST_ASSERT_EQ_FMT
|
||||
#define ASSERT_IN_RANGE GREATEST_ASSERT_IN_RANGE
|
||||
#define ASSERT_EQUAL_T GREATEST_ASSERT_EQUAL_T
|
||||
#define ASSERT_STR_EQ GREATEST_ASSERT_STR_EQ
|
||||
#define ASSERT_FALSEm GREATEST_ASSERT_FALSEm
|
||||
#define ASSERT_EQm GREATEST_ASSERT_EQm
|
||||
#define ASSERT_EQ_FMTm GREATEST_ASSERT_EQ_FMTm
|
||||
#define ASSERT_IN_RANGEm GREATEST_ASSERT_IN_RANGEm
|
||||
#define ASSERT_EQUAL_Tm GREATEST_ASSERT_EQUAL_Tm
|
||||
#define ASSERT_STR_EQm GREATEST_ASSERT_STR_EQm
|
||||
#define PASS GREATEST_PASS
|
||||
#define FAIL GREATEST_FAIL
|
||||
#define SKIP GREATEST_SKIP
|
||||
#define PASSm GREATEST_PASSm
|
||||
#define FAILm GREATEST_FAILm
|
||||
#define SKIPm GREATEST_SKIPm
|
||||
#define SET_SETUP GREATEST_SET_SETUP_CB
|
||||
#define SET_TEARDOWN GREATEST_SET_TEARDOWN_CB
|
||||
#define CHECK_CALL GREATEST_CHECK_CALL
|
||||
|
||||
#ifdef GREATEST_VA_ARGS
|
||||
#define RUN_TESTp GREATEST_RUN_TESTp
|
||||
#endif
|
||||
|
||||
#if GREATEST_USE_LONGJMP
|
||||
#define ASSERT_OR_LONGJMP GREATEST_ASSERT_OR_LONGJMP
|
||||
#define ASSERT_OR_LONGJMPm GREATEST_ASSERT_OR_LONGJMPm
|
||||
#define FAIL_WITH_LONGJMP GREATEST_FAIL_WITH_LONGJMP
|
||||
#define FAIL_WITH_LONGJMPm GREATEST_FAIL_WITH_LONGJMPm
|
||||
#endif
|
||||
|
||||
#endif /* USE_ABBREVS */
|
||||
|
||||
#endif
|
||||
25
test/test.c
Normal file
25
test/test.c
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "greatest.h"
|
||||
|
||||
SUITE_EXTERN(libpostal_expansion_tests);
|
||||
SUITE_EXTERN(libpostal_parser_tests);
|
||||
SUITE_EXTERN(libpostal_transliteration_tests);
|
||||
SUITE_EXTERN(libpostal_numex_tests);
|
||||
SUITE_EXTERN(libpostal_string_utils_tests);
|
||||
SUITE_EXTERN(libpostal_trie_tests);
|
||||
SUITE_EXTERN(libpostal_crf_context_tests);
|
||||
|
||||
GREATEST_MAIN_DEFS();
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
GREATEST_MAIN_BEGIN();
|
||||
|
||||
RUN_SUITE(libpostal_expansion_tests);
|
||||
RUN_SUITE(libpostal_parser_tests);
|
||||
RUN_SUITE(libpostal_transliteration_tests);
|
||||
RUN_SUITE(libpostal_numex_tests);
|
||||
RUN_SUITE(libpostal_string_utils_tests);
|
||||
RUN_SUITE(libpostal_trie_tests);
|
||||
RUN_SUITE(libpostal_crf_context_tests);
|
||||
GREATEST_MAIN_END();
|
||||
}
|
||||
268
test/test_crf_context.c
Normal file
268
test/test_crf_context.c
Normal file
@@ -0,0 +1,268 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "greatest.h"
|
||||
#include "../src/float_utils.c"
|
||||
#include "../src/crf_context.c"
|
||||
|
||||
SUITE(libpostal_crf_context_tests);
|
||||
|
||||
static greatest_test_res check_values(double cv, double tv) {
|
||||
ASSERT_IN_RANGE(cv, tv, 1e-9);
|
||||
PASS();
|
||||
}
|
||||
|
||||
static greatest_test_res check_matrix_size(double_matrix_t *x, size_t m, size_t n) {
|
||||
ASSERT(x);
|
||||
ASSERT_EQ(x->m, m);
|
||||
ASSERT_EQ(x->n, n);
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_crf_context(void) {
|
||||
int y1, y2, y3;
|
||||
double norm = 0;
|
||||
|
||||
const size_t L = 3;
|
||||
const size_t T = 3;
|
||||
|
||||
crf_context_t *ctx = crf_context_new(CRF_CONTEXT_ALL, L, 1);
|
||||
ASSERT(ctx != NULL);
|
||||
|
||||
const size_t T_large = 100;
|
||||
|
||||
bool ret = crf_context_set_num_items(ctx, T_large);
|
||||
ASSERT(ret);
|
||||
|
||||
check_matrix_size(ctx->state, T_large, L);
|
||||
check_matrix_size(ctx->exp_state, T_large, L);
|
||||
check_matrix_size(ctx->state_trans, T_large, L * L);
|
||||
check_matrix_size(ctx->exp_state_trans, T_large, L * L);
|
||||
check_matrix_size(ctx->trans, L, L);
|
||||
check_matrix_size(ctx->exp_trans, L, L);
|
||||
|
||||
ret = crf_context_set_num_items(ctx, T);
|
||||
ASSERT(ret);
|
||||
|
||||
check_matrix_size(ctx->state, T, L);
|
||||
check_matrix_size(ctx->exp_state, T, L);
|
||||
check_matrix_size(ctx->state_trans, T, L * L);
|
||||
check_matrix_size(ctx->exp_state_trans, T, L * L);
|
||||
check_matrix_size(ctx->trans, L, L);
|
||||
check_matrix_size(ctx->exp_trans, L, L);
|
||||
|
||||
double *state_trans = NULL;
|
||||
double *state = NULL;
|
||||
double *trans = NULL;
|
||||
double scores[T][L][L];
|
||||
uint32_t labels[L];
|
||||
|
||||
/* Initialize the state scores. */
|
||||
state = state_score(ctx, 0);
|
||||
state[0] = .4; state[1] = .5; state[2] = .1;
|
||||
state = state_score(ctx, 1);
|
||||
state[0] = .4; state[1] = .1; state[2] = .5;
|
||||
state = state_score(ctx, 2);
|
||||
state[0] = .4; state[1] = .1; state[2] = .5;
|
||||
|
||||
printf("state\n");
|
||||
|
||||
/* Initialize the state scores. */
|
||||
state_trans = state_trans_score(ctx, 0, 0);
|
||||
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .5;
|
||||
state_trans = state_trans_score(ctx, 0, 1);
|
||||
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .5;
|
||||
state_trans = state_trans_score(ctx, 0, 2);
|
||||
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .5;
|
||||
state_trans = state_trans_score(ctx, 1, 0);
|
||||
state_trans[0] = .3; state_trans[1] = .1; state_trans[2] = .6;
|
||||
state_trans = state_trans_score(ctx, 1, 1);
|
||||
state_trans[0] = .5; state_trans[1] = .1; state_trans[2] = .3;
|
||||
state_trans = state_trans_score(ctx, 1, 2);
|
||||
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .4;
|
||||
state_trans = state_trans_score(ctx, 2, 0);
|
||||
state_trans[0] = .3; state_trans[1] = .1; state_trans[2] = .6;
|
||||
state_trans = state_trans_score(ctx, 2, 1);
|
||||
state_trans[0] = .5; state_trans[1] = .1; state_trans[2] = .3;
|
||||
state_trans = state_trans_score(ctx, 2, 2);
|
||||
state_trans[0] = .4; state_trans[1] = .2; state_trans[2] = .4;
|
||||
|
||||
printf("state_trans\n");
|
||||
|
||||
trans = trans_score(ctx, 0);
|
||||
trans[0] = .3; trans[1] = .1; trans[2] = .4;
|
||||
trans = trans_score(ctx, 1);
|
||||
trans[0] = .6; trans[1] = .2; trans[2] = .1;
|
||||
trans = trans_score(ctx, 2);
|
||||
trans[0] = .5; trans[1] = .2; trans[2] = .1;
|
||||
|
||||
printf("trans\n");
|
||||
|
||||
crf_context_exp_state(ctx);
|
||||
printf("exp state\n");
|
||||
crf_context_exp_state_trans(ctx);
|
||||
printf("exp state_trans\n");
|
||||
crf_context_exp_trans(ctx);
|
||||
printf("exp trans\n");
|
||||
|
||||
crf_context_alpha_score(ctx);
|
||||
printf("alpha\n");
|
||||
|
||||
crf_context_beta_score(ctx);
|
||||
printf("beta\n");
|
||||
|
||||
/* Compute the score of every label sequence. */
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
double s1 = exp_state_score(ctx, 0)[y1];
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
double s2 = s1;
|
||||
s2 *= exp_state_trans_score(ctx, 1, y1)[y2];
|
||||
s2 *= exp_trans_score(ctx, y1)[y2];
|
||||
s2 *= exp_state_score(ctx, 1)[y2];
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
double s3 = s2;
|
||||
s3 *= exp_state_trans_score(ctx, 2, y2)[y3];
|
||||
s3 *= exp_trans_score(ctx, y2)[y3];
|
||||
s3 *= exp_state_score(ctx, 2)[y3];
|
||||
scores[y1][y2][y3] = s3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the partition factor. */
|
||||
norm = 0.;
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
norm += scores[y1][y2][y3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check the partition factor. */
|
||||
printf("Check for the partition factor...\n");
|
||||
CHECK_CALL(check_values(exp(ctx->log_norm), norm));
|
||||
|
||||
/* Compute the sequence probabilities. */
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
double logp;
|
||||
|
||||
labels[0] = y1;
|
||||
labels[1] = y2;
|
||||
labels[2] = y3;
|
||||
logp = crf_context_score(ctx, labels) - crf_context_lognorm(ctx);
|
||||
printf("Check for the sequence %d-%d-%d...\n", y1, y2, y3);
|
||||
CHECK_CALL(check_values(exp(logp), scores[y1][y2][y3] / norm));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the marginal probability at t=0 */
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
double a, b, c, s = 0.;
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
s += scores[y1][y2][y3];
|
||||
}
|
||||
}
|
||||
|
||||
a = alpha_score(ctx, 0)[y1];
|
||||
b = beta_score(ctx, 0)[y1];
|
||||
c = 1. / ctx->scale_factor->a[0];
|
||||
|
||||
printf("Check for the marginal probability (0,%d)...\n", y1);
|
||||
CHECK_CALL(check_values(a * b * c, s / norm));
|
||||
}
|
||||
|
||||
/* Compute the marginal probability at t=1 */
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
double a, b, c, s = 0.;
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
s += scores[y1][y2][y3];
|
||||
}
|
||||
}
|
||||
|
||||
a = alpha_score(ctx, 1)[y2];
|
||||
b = beta_score(ctx, 1)[y2];
|
||||
c = 1. / ctx->scale_factor->a[1];
|
||||
|
||||
printf("Check for the marginal probability (1,%d)...\n", y2);
|
||||
CHECK_CALL(check_values(a * b * c, s / norm));
|
||||
}
|
||||
|
||||
/* Compute the marginal probability at t=2 */
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
double a, b, c, s = 0.;
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
s += scores[y1][y2][y3];
|
||||
}
|
||||
}
|
||||
|
||||
a = alpha_score(ctx, 2)[y3];
|
||||
b = beta_score(ctx, 2)[y3];
|
||||
c = 1. / ctx->scale_factor->a[2];
|
||||
|
||||
printf("Check for the marginal probability (2,%d)...\n", y3);
|
||||
CHECK_CALL(check_values(a * b * c, s / norm));
|
||||
}
|
||||
|
||||
/* Compute the marginal probabilities of transitions. */
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
double a, b, s, st, t, p = 0.;
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
p += scores[y1][y2][y3];
|
||||
}
|
||||
|
||||
a = alpha_score(ctx, 0)[y1];
|
||||
b = beta_score(ctx, 1)[y2];
|
||||
s = exp_state_score(ctx, 1)[y2];
|
||||
st = exp_state_trans_score(ctx, 1, y1)[y2];
|
||||
t = exp_trans_score(ctx, y1)[y2];
|
||||
|
||||
printf("Check for the marginal probability (0,%d)-(1,%d)...\n", y1, y2);
|
||||
CHECK_CALL(check_values(a * t * st * s * b, p / norm));
|
||||
}
|
||||
}
|
||||
|
||||
for (y2 = 0; y2 < L; y2++) {
|
||||
for (y3 = 0; y3 < L; y3++) {
|
||||
double a, b, s, st, t, p = 0.;
|
||||
for (y1 = 0; y1 < T; y1++) {
|
||||
p += scores[y1][y2][y3];
|
||||
}
|
||||
|
||||
a = alpha_score(ctx, 1)[y2];
|
||||
b = beta_score(ctx, 2)[y3];
|
||||
s = exp_state_score(ctx, 2)[y3];
|
||||
st = exp_state_trans_score(ctx, 2, y2)[y3];
|
||||
t = exp_trans_score(ctx, y2)[y3];
|
||||
|
||||
printf("Check for the marginal probability (1,%d)-(2,%d)...\n", y2, y3);
|
||||
CHECK_CALL(check_values(a * t * st * s * b, p / norm));
|
||||
}
|
||||
}
|
||||
|
||||
double viterbi = crf_context_viterbi(ctx, labels);
|
||||
printf("viterbi score=%f\n", viterbi);
|
||||
for (int i = 0; i < L; i++) {
|
||||
printf("label[%d]=%d\n", i, labels[i]);
|
||||
}
|
||||
|
||||
crf_context_destroy(ctx);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
SUITE(libpostal_crf_context_tests) {
|
||||
|
||||
RUN_TEST(test_crf_context);
|
||||
|
||||
}
|
||||
339
test/test_expand.c
Normal file
339
test/test_expand.c
Normal file
@@ -0,0 +1,339 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "greatest.h"
|
||||
#include "../src/string_utils.h"
|
||||
#include "../src/libpostal.h"
|
||||
|
||||
SUITE(libpostal_expansion_tests);
|
||||
|
||||
static greatest_test_res test_expansion_contains_phrase_option(char *input, char *output, libpostal_normalize_options_t options, bool root) {
|
||||
size_t num_expansions;
|
||||
|
||||
char **expansions = NULL;
|
||||
if (!root) {
|
||||
expansions = libpostal_expand_address(input, options, &num_expansions);
|
||||
} else {
|
||||
expansions = libpostal_expand_address_root(input, options, &num_expansions);
|
||||
}
|
||||
|
||||
bool contains_expansion = false;
|
||||
char *expansion;
|
||||
for (size_t i = 0; i < num_expansions; i++) {
|
||||
expansion = expansions[i];
|
||||
printf("expansion = %s\n", expansion);
|
||||
if (string_equals(output, expansion)) {
|
||||
contains_expansion = true;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!contains_expansion) {
|
||||
printf("Expansions should contain %s, got {", output);
|
||||
for (size_t i = 0; i < num_expansions; i++) {
|
||||
expansion = expansions[i];
|
||||
printf("%s%s", expansion, i < num_expansions - 1 ? "," : "");
|
||||
}
|
||||
printf("}\n");
|
||||
FAIL();
|
||||
}
|
||||
|
||||
libpostal_expansion_array_destroy(expansions, num_expansions);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
static greatest_test_res test_expansion_contains(char *input, char *output, libpostal_normalize_options_t options) {
|
||||
bool root = false;
|
||||
CHECK_CALL(test_expansion_contains_phrase_option(input, output, options, root));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
static greatest_test_res test_root_expansion_contains(char *input, char *output, libpostal_normalize_options_t options) {
|
||||
bool root = true;
|
||||
CHECK_CALL(test_expansion_contains_phrase_option(input, output, options, root));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
static greatest_test_res test_expansion_contains_phrase_option_with_languages(char *input, char *output, libpostal_normalize_options_t options, bool root, size_t num_languages, va_list args) {
|
||||
char **languages = NULL;
|
||||
|
||||
size_t i;
|
||||
|
||||
if (num_languages > 0) {
|
||||
languages = malloc(sizeof(char *) * num_languages);
|
||||
char *lang;
|
||||
|
||||
for (i = 0; i < num_languages; i++) {
|
||||
lang = va_arg(args, char *);
|
||||
ASSERT(strlen(lang) < LIBPOSTAL_MAX_LANGUAGE_LEN);
|
||||
languages[i] = strdup(lang);
|
||||
}
|
||||
|
||||
options.num_languages = num_languages;
|
||||
options.languages = (char **)languages;
|
||||
} else {
|
||||
options.languages = NULL;
|
||||
options.num_languages = 0;
|
||||
}
|
||||
|
||||
CHECK_CALL(test_expansion_contains_phrase_option(input, output, options, root));
|
||||
if (languages != NULL) {
|
||||
for (i = 0; i < num_languages; i++) {
|
||||
free(languages[i]);
|
||||
}
|
||||
free(languages);
|
||||
}
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
|
||||
static greatest_test_res test_expansion_contains_with_languages(char *input, char *output, libpostal_normalize_options_t options, size_t num_languages, ...) {
|
||||
bool root = false;
|
||||
va_list args;
|
||||
if (num_languages > 0) {
|
||||
va_start(args, num_languages);
|
||||
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
|
||||
va_end(args);
|
||||
} else {
|
||||
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
|
||||
}
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
static greatest_test_res test_root_expansion_contains_with_languages(char *input, char *output, libpostal_normalize_options_t options, size_t num_languages, ...) {
|
||||
bool root = true;
|
||||
va_list args;
|
||||
if (num_languages > 0) {
|
||||
va_start(args, num_languages);
|
||||
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
|
||||
va_end(args);
|
||||
} else {
|
||||
CHECK_CALL(test_expansion_contains_phrase_option_with_languages(input, output, options, root, num_languages, args));
|
||||
}
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST test_expansions(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
|
||||
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St", "120 east 96 street", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("120 E Ninety-sixth St", "120 east 96 street", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("4998 Vanderbilt Dr, Columbus, OH 43213", "4998 vanderbilt drive columbus ohio 43213", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Nineteen oh one W El Segundo Blvd", "1901 west el segundo boulevard", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Quatre vingt douze Ave des Champs-Élysées", "92 avenue des champs-elysees", options, 1, "fr"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Quatre vingt douze Ave des Champs-Élysées", "92 avenue des champs elysees", options, 1, "fr"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Quatre vingt douze Ave des Champs-Élysées", "92 avenue des champselysees", options, 1, "fr"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_expansion_for_non_address_input(void) {
|
||||
size_t num_expansions;
|
||||
|
||||
// This is tested as the input caused a segfault in expand_alternative_phrase_option
|
||||
char **expansions = libpostal_expand_address("ida-b@wells.co", libpostal_get_default_options(), &num_expansions);
|
||||
libpostal_expansion_array_destroy(expansions, num_expansions);
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_street_root_expansions(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
options.address_components = LIBPOSTAL_ADDRESS_STREET | LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
// English - normal cases
|
||||
CHECK_CALL(test_root_expansion_contains("Malcolm X Blvd", "malcolm x", options));
|
||||
CHECK_CALL(test_root_expansion_contains("E 106th St", "106", options));
|
||||
CHECK_CALL(test_root_expansion_contains("S Park Ave", "park", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Park South", "park", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Rev Dr. MLK Dr S", "martin luther king junior", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Rev Dr. Martin Luther King Jr Dr S", "martin luther king junior", options));
|
||||
CHECK_CALL(test_root_expansion_contains("East 6th Street", "6th", options));
|
||||
|
||||
// English - edge cases
|
||||
CHECK_CALL(test_root_expansion_contains("Avenue B", "b", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Avenue C", "c", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Avenue D", "d", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Avenue E", "e", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Avenue N", "n", options));
|
||||
CHECK_CALL(test_root_expansion_contains("U St SE", "u", options));
|
||||
CHECK_CALL(test_root_expansion_contains("S Park", "park", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Park S", "park", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Avenue Rd", "avenue", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Broadway", "broadway", options));
|
||||
CHECK_CALL(test_root_expansion_contains("E Broadway", "broadway", options));
|
||||
CHECK_CALL(test_root_expansion_contains("E Center St", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("E Ctr St", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("E Center Street", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("E Ctr Street", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Center St E", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Ctr St E", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Center Street E", "center", options));
|
||||
CHECK_CALL(test_root_expansion_contains("Ctr Street E", "center", options));
|
||||
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("W. UNION STREET", "union", options, 2, "en", "es"));
|
||||
|
||||
|
||||
// Spanish
|
||||
CHECK_CALL(test_root_expansion_contains("C/ Ocho", "8", options));
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
TEST test_house_number_root_expansions(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
options.address_components = LIBPOSTAL_ADDRESS_HOUSE_NUMBER | LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
// English - normal cases
|
||||
CHECK_CALL(test_root_expansion_contains("1A", "1 a", options));
|
||||
CHECK_CALL(test_root_expansion_contains("A1", "a 1", options));
|
||||
CHECK_CALL(test_root_expansion_contains("1", "1", options));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("# 1", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("No. 1", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("House No. 1", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("House #1", "1", options, 1, "en"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_level_root_expansions(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
options.address_components = LIBPOSTAL_ADDRESS_LEVEL | LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
// English - normal cases
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1st Fl", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1st Floor", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("First Fl", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("First Floor", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("2nd Fl", "2", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("2nd Floor", "2", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Second Fl", "2", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Second Floor", "2", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Fl #1", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Fl No. 1", "1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Floor No. 1", "1", options, 1, "en"));
|
||||
|
||||
// Specifiers
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("SB 1", "sub basement 1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Bsmt", "basement", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Bsmt 1", "basement 1", options, 1, "en"));
|
||||
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1G", "1 ground", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("G", "ground", options, 1, "en"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_unit_root_expansions(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
options.address_components = LIBPOSTAL_ADDRESS_UNIT | LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
// English - normal cases
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1A", "1 a", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("A1", "a 1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Apt 101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Apt No 101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Apt #101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Apartment 101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Apartment #101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Ste 101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Ste No 101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Ste #101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Suite 101", "101", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Suite #101", "101", options, 1, "en"));
|
||||
|
||||
// Specifiers
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("PH 1", "penthouse 1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("PH1", "penthouse 1", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("Penthouse 1", "penthouse 1", options, 1, "en"));
|
||||
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1L", "1l", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1L", "1 left", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1F", "1f", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1F", "1f", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1R", "1r", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("1R", "1r", options, 1, "en"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
TEST test_po_box_root_expansions(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
options.address_components = LIBPOSTAL_ADDRESS_PO_BOX | LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("PO Box 1234", "1234", options, 1, "en"));
|
||||
CHECK_CALL(test_root_expansion_contains_with_languages("PO Box #1234", "1234", options, 1, "en"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_expansions_language_classifier(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
|
||||
CHECK_CALL(test_expansion_contains_with_languages("V XX Sett", "via 20 settembre", options, 0, NULL));
|
||||
CHECK_CALL(test_expansion_contains_with_languages("C/ Ocho", "calle 8", options, 0, NULL));
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_expansions_no_options(void) {
|
||||
libpostal_normalize_options_t options = libpostal_get_default_options();
|
||||
options.lowercase = false;
|
||||
options.latin_ascii = false;
|
||||
options.transliterate = false;
|
||||
options.strip_accents = false;
|
||||
options.decompose = false;
|
||||
options.trim_string = false;
|
||||
options.drop_parentheticals = false;
|
||||
options.replace_numeric_hyphens = false;
|
||||
options.delete_numeric_hyphens = false;
|
||||
options.split_alpha_from_numeric = false;
|
||||
options.replace_word_hyphens = false;
|
||||
options.delete_word_hyphens = false;
|
||||
options.delete_final_periods = false;
|
||||
options.delete_acronym_periods = false;
|
||||
options.drop_english_possessives = false;
|
||||
options.delete_apostrophes = false;
|
||||
options.expand_numex = false;
|
||||
options.roman_numerals = false;
|
||||
|
||||
CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St New York", "120 E 96th St New York", options, 0, NULL));
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
SUITE(libpostal_expansion_tests) {
|
||||
if (!libpostal_setup() || !libpostal_setup_language_classifier()) {
|
||||
printf("Could not setup libpostal\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
RUN_TEST(test_expansions);
|
||||
RUN_TEST(test_street_root_expansions);
|
||||
RUN_TEST(test_house_number_root_expansions);
|
||||
RUN_TEST(test_level_root_expansions);
|
||||
RUN_TEST(test_unit_root_expansions);
|
||||
RUN_TEST(test_po_box_root_expansions);
|
||||
RUN_TEST(test_expansions_language_classifier);
|
||||
RUN_TEST(test_expansions_no_options);
|
||||
RUN_TEST(test_expansion_for_non_address_input);
|
||||
|
||||
libpostal_teardown();
|
||||
libpostal_teardown_language_classifier();
|
||||
|
||||
}
|
||||
|
||||
87
test/test_numex.c
Normal file
87
test/test_numex.c
Normal file
@@ -0,0 +1,87 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "greatest.h"
|
||||
#include "../src/numex.h"
|
||||
|
||||
SUITE(libpostal_numex_tests);
|
||||
|
||||
static greatest_test_res test_numex(char *input, char *output, char *lang) {
|
||||
char *normalized = replace_numeric_expressions(input, lang);
|
||||
|
||||
if (normalized != NULL) {
|
||||
ASSERT_STR_EQ(output, normalized);
|
||||
free(normalized);
|
||||
} else {
|
||||
ASSERT_STR_EQ(output, input);
|
||||
}
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_numeric_expressions(void) {
|
||||
|
||||
// English numbers
|
||||
CHECK_CALL(test_numex("five hundred ninety-three", "593", "en"));
|
||||
CHECK_CALL(test_numex("five hundred and ninety-three", "593", "en"));
|
||||
CHECK_CALL(test_numex("fourth and a", "4th and a", "en"));
|
||||
CHECK_CALL(test_numex("foo and bar", "foo and bar", "en"));
|
||||
CHECK_CALL(test_numex("thirty west twenty-sixth street", "30 west 26th street", "en"));
|
||||
CHECK_CALL(test_numex("five and sixth", "5 and 6th", "en"));
|
||||
CHECK_CALL(test_numex("three hundred thousand nineteenhundred and forty-fifth", "301945th", "en"));
|
||||
CHECK_CALL(test_numex("seventeen eighty", "1780", "en"));
|
||||
CHECK_CALL(test_numex("ten oh four", "1004", "en"));
|
||||
CHECK_CALL(test_numex("ten and four", "10 and 4", "en"));
|
||||
|
||||
// French (Celtic-style) numbers
|
||||
CHECK_CALL(test_numex("quatre-vingt-douze", "92", "fr"));
|
||||
CHECK_CALL(test_numex("quatre vingt douze", "92", "fr"));
|
||||
CHECK_CALL(test_numex("quatre vingts", "80", "fr"));
|
||||
CHECK_CALL(test_numex("soixante-et-onze", "71", "fr"));
|
||||
CHECK_CALL(test_numex("soixante-cinq", "65", "fr"));
|
||||
|
||||
// French (Belgian/Swiss) numbers
|
||||
CHECK_CALL(test_numex("nonante-deux", "92", "fr"));
|
||||
CHECK_CALL(test_numex("septante-cinq", "75", "fr"));
|
||||
|
||||
// German numbers
|
||||
CHECK_CALL(test_numex("sechs-und-fünfzig", "56", "de"));
|
||||
CHECK_CALL(test_numex("eins", "1", "de"));
|
||||
CHECK_CALL(test_numex("dreiundzwanzigste strasse", "23. strasse", "de"));
|
||||
|
||||
// Italian numbers
|
||||
CHECK_CALL(test_numex("millenovecentonovantadue", "1992", "it"));
|
||||
CHECK_CALL(test_numex("ventiquattro", "24", "it"));
|
||||
|
||||
|
||||
// Spanish numbers
|
||||
CHECK_CALL(test_numex("tricentesima primera", "301.ª", "es"));
|
||||
|
||||
// Roman numerals (la=Latin)
|
||||
|
||||
CHECK_CALL(test_numex("via xx settembre", "via 20 settembre", "la"));
|
||||
CHECK_CALL(test_numex("mcccxlix anno domini", "1349 anno domini", "la"));
|
||||
CHECK_CALL(test_numex("str. st. nazionale dei giovi, milano", "str. st. nazionale dei giovi, milano", "la"));
|
||||
|
||||
// Japanese numbers
|
||||
|
||||
CHECK_CALL(test_numex("百二十", "120", "ja"));
|
||||
|
||||
// Korean numbers
|
||||
|
||||
CHECK_CALL(test_numex("천구백구십이", "1992", "ko"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
GREATEST_SUITE(libpostal_numex_tests) {
|
||||
if (!numex_module_setup(DEFAULT_NUMEX_PATH)) {
|
||||
printf("Could not load numex module\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
RUN_TEST(test_numeric_expressions);
|
||||
|
||||
numex_module_teardown();
|
||||
}
|
||||
1913
test/test_parser.c
Normal file
1913
test/test_parser.c
Normal file
File diff suppressed because it is too large
Load Diff
342
test/test_string_utils.c
Normal file
342
test/test_string_utils.c
Normal file
@@ -0,0 +1,342 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "greatest.h"
|
||||
|
||||
#include "../src/features.h"
|
||||
#include "../src/scanner.h"
|
||||
#include "../src/string_utils.h"
|
||||
|
||||
SUITE(libpostal_string_utils_tests);
|
||||
|
||||
TEST test_utf8_reverse(void) {
|
||||
char *s = "Bünderstraße";
|
||||
char *rev = utf8_reversed_string(s);
|
||||
if (rev == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(rev, "eßartsrednüB");
|
||||
free(rev);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_utf8proc_iterate_reversed(void) {
|
||||
char *s = "\xce\xa9\xcc\x93\xcd\x85";
|
||||
|
||||
int32_t ch;
|
||||
ssize_t char_len;
|
||||
size_t idx = strlen(s);
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 2);
|
||||
ASSERT_EQ(ch, 837);
|
||||
idx -= char_len;
|
||||
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 2);
|
||||
ASSERT_EQ(ch, 787);
|
||||
idx -= char_len;
|
||||
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 2);
|
||||
ASSERT_EQ(ch, 937);
|
||||
idx -= char_len;
|
||||
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 0);
|
||||
ASSERT_EQ(ch, -1);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_utf8_compare_ignore_separators(void) {
|
||||
char *str1 = "Bünderstraße";
|
||||
char *str2 = "Bünder-straße";
|
||||
|
||||
size_t prefix = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
|
||||
ASSERT_EQ(prefix, 14);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_utf8_equal_ignore_separators(void) {
|
||||
char *str1 = "Bünderstraße ";
|
||||
char *str2 = "Bünder-straße";
|
||||
|
||||
bool equal = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
ASSERT(equal);
|
||||
|
||||
str1 = " Bünder-straße ";
|
||||
str2 = "Bünder straße";
|
||||
equal = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
ASSERT(equal);
|
||||
|
||||
str1 = "Bünder-straße-a";
|
||||
str2 = "Bünder straße aa";
|
||||
equal = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
ASSERT_FALSE(equal);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_feature_array_add(void) {
|
||||
cstring_array *features = cstring_array_new();
|
||||
if (features == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
feature_array_add(features, 3, "a", "foo", "blee");
|
||||
feature_array_add(features, 1, "b");
|
||||
|
||||
ASSERT_EQ(cstring_array_num_strings(features), 2);
|
||||
|
||||
char *feature = cstring_array_get_string(features, 0);
|
||||
size_t len = cstring_array_token_length(features, 0);
|
||||
|
||||
if (feature == NULL) {
|
||||
cstring_array_destroy(features);
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(feature, "a|foo|blee");
|
||||
ASSERT_EQ(len, strlen(feature));
|
||||
|
||||
feature = cstring_array_get_string(features, 1);
|
||||
len = cstring_array_token_length(features, 1);
|
||||
|
||||
if (feature == NULL) {
|
||||
cstring_array_destroy(features);
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(feature, "b");
|
||||
ASSERT_EQ(len, strlen(feature));
|
||||
|
||||
char **strings = cstring_array_to_strings(features);
|
||||
if (strings == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(strings[0], "a|foo|blee");
|
||||
free(strings[0]);
|
||||
ASSERT_STR_EQ(strings[1], "b");
|
||||
free(strings[1]);
|
||||
|
||||
free(strings);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_char_array(void) {
|
||||
char_array *str = char_array_new();
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
char_array_cat(str, "Bürgermeister");
|
||||
char_array_cat(str, "|");
|
||||
char_array_cat_reversed(str, "straße");
|
||||
|
||||
ASSERT_STR_EQ(str->a, "Bürgermeister|eßarts");
|
||||
|
||||
char_array_cat_printf(str, " %d %s %.2f \t ", 1234, "onetwothreefour", 12.34);
|
||||
|
||||
char *expected_output = "Bürgermeister|eßarts 1234 onetwothreefour 12.34 \t ";
|
||||
ASSERT_STR_EQ(str->a, expected_output);
|
||||
|
||||
char *a = char_array_to_string(str);
|
||||
ASSERT_STR_EQ(a, expected_output);
|
||||
|
||||
char *b = string_trim(a);
|
||||
ASSERT_STR_EQ(b, "Bürgermeister|eßarts 1234 onetwothreefour 12.34");
|
||||
|
||||
free(a);
|
||||
free(b);
|
||||
|
||||
str = char_array_new();
|
||||
#define SEPARATOR "|*|*|*|"
|
||||
|
||||
char_array_add_joined(str, SEPARATOR, true, 3, "dictionaries" SEPARATOR, "foo", "bar");
|
||||
|
||||
a = char_array_get_string(str);
|
||||
|
||||
ASSERT_STR_EQ(a, "dictionaries|*|*|*|foo|*|*|*|bar");
|
||||
|
||||
char_array_destroy(str);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_cstring_array(void) {
|
||||
size_t count = 0;
|
||||
cstring_array *array = cstring_array_split_no_copy(strdup("The|Low|End|Theory"), '|', &count);
|
||||
if (array == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_EQ(count, 4);
|
||||
|
||||
char *str = NULL;
|
||||
|
||||
str = cstring_array_get_string(array, 0);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "The");
|
||||
|
||||
str = cstring_array_get_string(array, 1);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "Low");
|
||||
|
||||
str = cstring_array_get_string(array, 2);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "End");
|
||||
|
||||
str = cstring_array_get_string(array, 3);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "Theory");
|
||||
|
||||
cstring_array_destroy(array);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_string_tree(void) {
|
||||
string_tree_t *tree = string_tree_new();
|
||||
if (tree == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
|
||||
string_tree_finalize_token(tree);
|
||||
string_tree_add_string(tree, "Twenty-fifth");
|
||||
string_tree_add_string(tree, "Twentyfifth");
|
||||
string_tree_finalize_token(tree);
|
||||
string_tree_add_string(tree, "Bürgermeister");
|
||||
string_tree_add_string(tree, "Buergermeister");
|
||||
string_tree_add_string(tree, "Burgermeister");
|
||||
string_tree_finalize_token(tree);
|
||||
string_tree_add_string(tree, "Straße");
|
||||
string_tree_add_string(tree, "Strasse");
|
||||
string_tree_finalize_token(tree);
|
||||
|
||||
ASSERT_EQ(tree->token_indices->n - 1, 4);
|
||||
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 0), 1);
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 1), 2);
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 2), 3);
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 3), 2);
|
||||
|
||||
string_tree_iterator_t *iter = string_tree_iterator_new(tree);
|
||||
|
||||
if (iter == NULL) {
|
||||
string_tree_destroy(tree);
|
||||
FAIL();
|
||||
}
|
||||
size_t expected_num_tokens = 4;
|
||||
ASSERT_EQ(iter->num_tokens, expected_num_tokens);
|
||||
ASSERT_EQ(iter->remaining, 12);
|
||||
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_destroy(iter);
|
||||
string_tree_destroy(tree);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
SUITE(libpostal_string_utils_tests) {
|
||||
RUN_TEST(test_utf8_reverse);
|
||||
RUN_TEST(test_utf8proc_iterate_reversed);
|
||||
RUN_TEST(test_utf8_compare_ignore_separators);
|
||||
RUN_TEST(test_feature_array_add);
|
||||
RUN_TEST(test_char_array);
|
||||
RUN_TEST(test_cstring_array);
|
||||
RUN_TEST(test_string_tree);
|
||||
}
|
||||
|
||||
|
||||
46
test/test_transliterate.c
Normal file
46
test/test_transliterate.c
Normal file
@@ -0,0 +1,46 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "greatest.h"
|
||||
#include "../src/transliterate.h"
|
||||
|
||||
SUITE(libpostal_transliteration_tests);
|
||||
|
||||
static greatest_test_res test_transliteration(char *trans_name, char *input, char *output) {
|
||||
char *transliterated = transliterate(trans_name, input, strlen(input));
|
||||
|
||||
ASSERT_STR_EQ(output, transliterated);
|
||||
free(transliterated);
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_transliterators(void) {
|
||||
CHECK_CALL(test_transliteration("greek-latin", "διαφορετικούς", "diaphoretikoús̱"));
|
||||
CHECK_CALL(test_transliteration("devanagari-latin", "ज़", "za"));
|
||||
CHECK_CALL(test_transliteration("arabic-latin", "شارع", "sẖạrʿ"));
|
||||
CHECK_CALL(test_transliteration("cyrillic-latin", "улица", "ulica"));
|
||||
CHECK_CALL(test_transliteration("russian-latin-bgn", "улица", "ulitsa"));
|
||||
CHECK_CALL(test_transliteration("hebrew-latin", "רחוב", "rẖwb"));
|
||||
CHECK_CALL(test_transliteration("latin-ascii", "foo & bar", "foo & bar"));
|
||||
CHECK_CALL(test_transliteration("latin-ascii-simple", "eschenbräu bräurei triftstraße 67½ & foo", "eschenbräu bräurei triftstraße 67½ & foo"));
|
||||
CHECK_CALL(test_transliteration("han-latin", "街𠀀abcdef", "jiēhēabcdef"));
|
||||
CHECK_CALL(test_transliteration("katakana-latin", "ドウ", "dou"));
|
||||
CHECK_CALL(test_transliteration("hiragana-latin", "どう", "dou"));
|
||||
CHECK_CALL(test_transliteration("latin-ascii-simple", "at&t", "at&t"));
|
||||
CHECK_CALL(test_transliteration("latin-ascii-simple", "at&t", "at&t"));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
GREATEST_SUITE(libpostal_transliteration_tests) {
|
||||
if (!transliteration_module_setup(DEFAULT_TRANSLITERATION_PATH)) {
|
||||
printf("Could not load transliterator module\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
RUN_TEST(test_transliterators);
|
||||
|
||||
transliteration_module_teardown();
|
||||
}
|
||||
61
test/test_trie.c
Normal file
61
test/test_trie.c
Normal file
@@ -0,0 +1,61 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "greatest.h"
|
||||
#include "../src/scanner.h"
|
||||
#include "../src/trie.h"
|
||||
#include "../src/trie_search.h"
|
||||
|
||||
SUITE(libpostal_trie_tests);
|
||||
|
||||
static greatest_test_res test_trie_add_get(trie_t *trie, char *key, uint32_t data) {
|
||||
bool added = trie_add(trie, key, data);
|
||||
ASSERT(added);
|
||||
|
||||
uint32_t trie_data;
|
||||
bool fetched = trie_get_data(trie, key, &trie_data);
|
||||
ASSERT(fetched);
|
||||
ASSERT_EQ(data, trie_data);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
static greatest_test_res test_trie_setup(trie_t *trie) {
|
||||
CHECK_CALL(test_trie_add_get(trie, "st", 1));
|
||||
CHECK_CALL(test_trie_add_get(trie, "street", 2));
|
||||
CHECK_CALL(test_trie_add_get(trie, "st rt", 3));
|
||||
CHECK_CALL(test_trie_add_get(trie, "st rd", 3));
|
||||
CHECK_CALL(test_trie_add_get(trie, "state route", 4));
|
||||
CHECK_CALL(test_trie_add_get(trie, "maine", 5));
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
|
||||
TEST test_trie(void) {
|
||||
trie_t *trie = trie_new();
|
||||
ASSERT(trie != NULL);
|
||||
CHECK_CALL(test_trie_setup(trie));
|
||||
|
||||
char *input = "main st r 20";
|
||||
token_array *tokens = tokenize_keep_whitespace(input);
|
||||
phrase_array *phrases = trie_search_tokens(trie, input, tokens);
|
||||
|
||||
ASSERT(phrases != NULL);
|
||||
ASSERT(phrases->n == 1);
|
||||
phrase_t phrase = phrases->a[0];
|
||||
ASSERT(phrase.start == 2);
|
||||
ASSERT(phrase.len == 1);
|
||||
|
||||
phrase_array_destroy(phrases);
|
||||
token_array_destroy(tokens);
|
||||
trie_destroy(trie);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
GREATEST_SUITE(libpostal_trie_tests) {
|
||||
RUN_TEST(test_trie);
|
||||
}
|
||||
Reference in New Issue
Block a user