Initial fork commit
This commit is contained in:
342
test/test_string_utils.c
Normal file
342
test/test_string_utils.c
Normal file
@@ -0,0 +1,342 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "greatest.h"
|
||||
|
||||
#include "../src/features.h"
|
||||
#include "../src/scanner.h"
|
||||
#include "../src/string_utils.h"
|
||||
|
||||
SUITE(libpostal_string_utils_tests);
|
||||
|
||||
TEST test_utf8_reverse(void) {
|
||||
char *s = "Bünderstraße";
|
||||
char *rev = utf8_reversed_string(s);
|
||||
if (rev == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(rev, "eßartsrednüB");
|
||||
free(rev);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_utf8proc_iterate_reversed(void) {
|
||||
char *s = "\xce\xa9\xcc\x93\xcd\x85";
|
||||
|
||||
int32_t ch;
|
||||
ssize_t char_len;
|
||||
size_t idx = strlen(s);
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 2);
|
||||
ASSERT_EQ(ch, 837);
|
||||
idx -= char_len;
|
||||
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 2);
|
||||
ASSERT_EQ(ch, 787);
|
||||
idx -= char_len;
|
||||
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 2);
|
||||
ASSERT_EQ(ch, 937);
|
||||
idx -= char_len;
|
||||
|
||||
char_len = utf8proc_iterate_reversed((uint8_t *)s, idx, &ch);
|
||||
ASSERT_EQ(char_len, 0);
|
||||
ASSERT_EQ(ch, -1);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_utf8_compare_ignore_separators(void) {
|
||||
char *str1 = "Bünderstraße";
|
||||
char *str2 = "Bünder-straße";
|
||||
|
||||
size_t prefix = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
|
||||
ASSERT_EQ(prefix, 14);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_utf8_equal_ignore_separators(void) {
|
||||
char *str1 = "Bünderstraße ";
|
||||
char *str2 = "Bünder-straße";
|
||||
|
||||
bool equal = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
ASSERT(equal);
|
||||
|
||||
str1 = " Bünder-straße ";
|
||||
str2 = "Bünder straße";
|
||||
equal = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
ASSERT(equal);
|
||||
|
||||
str1 = "Bünder-straße-a";
|
||||
str2 = "Bünder straße aa";
|
||||
equal = utf8_common_prefix_ignore_separators(str1, str2);
|
||||
ASSERT_FALSE(equal);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_feature_array_add(void) {
|
||||
cstring_array *features = cstring_array_new();
|
||||
if (features == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
feature_array_add(features, 3, "a", "foo", "blee");
|
||||
feature_array_add(features, 1, "b");
|
||||
|
||||
ASSERT_EQ(cstring_array_num_strings(features), 2);
|
||||
|
||||
char *feature = cstring_array_get_string(features, 0);
|
||||
size_t len = cstring_array_token_length(features, 0);
|
||||
|
||||
if (feature == NULL) {
|
||||
cstring_array_destroy(features);
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(feature, "a|foo|blee");
|
||||
ASSERT_EQ(len, strlen(feature));
|
||||
|
||||
feature = cstring_array_get_string(features, 1);
|
||||
len = cstring_array_token_length(features, 1);
|
||||
|
||||
if (feature == NULL) {
|
||||
cstring_array_destroy(features);
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(feature, "b");
|
||||
ASSERT_EQ(len, strlen(feature));
|
||||
|
||||
char **strings = cstring_array_to_strings(features);
|
||||
if (strings == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
|
||||
ASSERT_STR_EQ(strings[0], "a|foo|blee");
|
||||
free(strings[0]);
|
||||
ASSERT_STR_EQ(strings[1], "b");
|
||||
free(strings[1]);
|
||||
|
||||
free(strings);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_char_array(void) {
|
||||
char_array *str = char_array_new();
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
char_array_cat(str, "Bürgermeister");
|
||||
char_array_cat(str, "|");
|
||||
char_array_cat_reversed(str, "straße");
|
||||
|
||||
ASSERT_STR_EQ(str->a, "Bürgermeister|eßarts");
|
||||
|
||||
char_array_cat_printf(str, " %d %s %.2f \t ", 1234, "onetwothreefour", 12.34);
|
||||
|
||||
char *expected_output = "Bürgermeister|eßarts 1234 onetwothreefour 12.34 \t ";
|
||||
ASSERT_STR_EQ(str->a, expected_output);
|
||||
|
||||
char *a = char_array_to_string(str);
|
||||
ASSERT_STR_EQ(a, expected_output);
|
||||
|
||||
char *b = string_trim(a);
|
||||
ASSERT_STR_EQ(b, "Bürgermeister|eßarts 1234 onetwothreefour 12.34");
|
||||
|
||||
free(a);
|
||||
free(b);
|
||||
|
||||
str = char_array_new();
|
||||
#define SEPARATOR "|*|*|*|"
|
||||
|
||||
char_array_add_joined(str, SEPARATOR, true, 3, "dictionaries" SEPARATOR, "foo", "bar");
|
||||
|
||||
a = char_array_get_string(str);
|
||||
|
||||
ASSERT_STR_EQ(a, "dictionaries|*|*|*|foo|*|*|*|bar");
|
||||
|
||||
char_array_destroy(str);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_cstring_array(void) {
|
||||
size_t count = 0;
|
||||
cstring_array *array = cstring_array_split_no_copy(strdup("The|Low|End|Theory"), '|', &count);
|
||||
if (array == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_EQ(count, 4);
|
||||
|
||||
char *str = NULL;
|
||||
|
||||
str = cstring_array_get_string(array, 0);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "The");
|
||||
|
||||
str = cstring_array_get_string(array, 1);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "Low");
|
||||
|
||||
str = cstring_array_get_string(array, 2);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "End");
|
||||
|
||||
str = cstring_array_get_string(array, 3);
|
||||
if (str == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
ASSERT_STR_EQ(str, "Theory");
|
||||
|
||||
cstring_array_destroy(array);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
TEST test_string_tree(void) {
|
||||
string_tree_t *tree = string_tree_new();
|
||||
if (tree == NULL) {
|
||||
FAIL();
|
||||
}
|
||||
|
||||
string_tree_finalize_token(tree);
|
||||
string_tree_add_string(tree, "Twenty-fifth");
|
||||
string_tree_add_string(tree, "Twentyfifth");
|
||||
string_tree_finalize_token(tree);
|
||||
string_tree_add_string(tree, "Bürgermeister");
|
||||
string_tree_add_string(tree, "Buergermeister");
|
||||
string_tree_add_string(tree, "Burgermeister");
|
||||
string_tree_finalize_token(tree);
|
||||
string_tree_add_string(tree, "Straße");
|
||||
string_tree_add_string(tree, "Strasse");
|
||||
string_tree_finalize_token(tree);
|
||||
|
||||
ASSERT_EQ(tree->token_indices->n - 1, 4);
|
||||
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 0), 1);
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 1), 2);
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 2), 3);
|
||||
ASSERT_EQ(string_tree_num_alternatives(tree, 3), 2);
|
||||
|
||||
string_tree_iterator_t *iter = string_tree_iterator_new(tree);
|
||||
|
||||
if (iter == NULL) {
|
||||
string_tree_destroy(tree);
|
||||
FAIL();
|
||||
}
|
||||
size_t expected_num_tokens = 4;
|
||||
ASSERT_EQ(iter->num_tokens, expected_num_tokens);
|
||||
ASSERT_EQ(iter->remaining, 12);
|
||||
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 0);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 0);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 1);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 0);
|
||||
|
||||
string_tree_iterator_next(iter);
|
||||
ASSERT_FALSE(string_tree_iterator_done(iter));
|
||||
ASSERT_EQ(iter->path[0], 0);
|
||||
ASSERT_EQ(iter->path[1], 1);
|
||||
ASSERT_EQ(iter->path[2], 2);
|
||||
ASSERT_EQ(iter->path[3], 1);
|
||||
|
||||
string_tree_iterator_destroy(iter);
|
||||
string_tree_destroy(tree);
|
||||
|
||||
PASS();
|
||||
}
|
||||
|
||||
SUITE(libpostal_string_utils_tests) {
|
||||
RUN_TEST(test_utf8_reverse);
|
||||
RUN_TEST(test_utf8proc_iterate_reversed);
|
||||
RUN_TEST(test_utf8_compare_ignore_separators);
|
||||
RUN_TEST(test_feature_array_add);
|
||||
RUN_TEST(test_char_array);
|
||||
RUN_TEST(test_cstring_array);
|
||||
RUN_TEST(test_string_tree);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user