Adding the export marker to all functions used in tests.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#include "features.h"
|
||||
|
||||
|
||||
void feature_array_add(cstring_array *features, size_t count, ...) {
|
||||
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...) {
|
||||
if (count <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -5,12 +5,13 @@
|
||||
#include <stdarg.h>
|
||||
#include "collections.h"
|
||||
#include "string_utils.h"
|
||||
#include "export.h"
|
||||
|
||||
#define FEATURE_SEPARATOR_CHAR "|"
|
||||
|
||||
// Add feature to array
|
||||
|
||||
void feature_array_add(cstring_array *features, size_t count, ...);
|
||||
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...);
|
||||
|
||||
// Add feature using printf format
|
||||
void feature_array_add_printf(cstring_array *features, char *format, ...);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "file_utils.h"
|
||||
|
||||
char *file_getline(FILE * f)
|
||||
LIBPOSTAL_EXPORT char *file_getline(FILE * f)
|
||||
{
|
||||
char buf[BUFSIZ];
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "export.h"
|
||||
#include "libpostal_config.h"
|
||||
#include "string_utils.h"
|
||||
|
||||
@@ -52,7 +53,7 @@
|
||||
#define COMMA_SEPARATOR ","
|
||||
#define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR)
|
||||
|
||||
char *file_getline(FILE * f);
|
||||
LIBPOSTAL_EXPORT char *file_getline(FILE * f);
|
||||
|
||||
bool file_exists(char *filename);
|
||||
|
||||
|
||||
@@ -599,7 +599,7 @@ bool numex_module_init(void) {
|
||||
Must be called only once before the module can be used
|
||||
*/
|
||||
|
||||
bool numex_module_setup(char *filename) {
|
||||
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) {
|
||||
if (numex_table == NULL) {
|
||||
return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename);
|
||||
}
|
||||
@@ -610,7 +610,7 @@ bool numex_module_setup(char *filename) {
|
||||
Called once when done with the module (usually at
|
||||
the end of a main method)
|
||||
*/
|
||||
void numex_module_teardown(void) {
|
||||
LIBPOSTAL_EXPORT void numex_module_teardown(void) {
|
||||
numex_table_destroy();
|
||||
numex_table = NULL;
|
||||
}
|
||||
@@ -1101,7 +1101,7 @@ size_t ordinal_suffix_len(char *str, size_t len, char *lang) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *replace_numeric_expressions(char *str, char *lang) {
|
||||
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang) {
|
||||
numex_result_array *results = convert_numeric_expressions(str, lang);
|
||||
if (results == NULL) return NULL;
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "tokens.h"
|
||||
#include "trie.h"
|
||||
#include "trie_search.h"
|
||||
#include "export.h"
|
||||
|
||||
#define NUMEX_DATA_FILE "numex.dat"
|
||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
||||
@@ -146,7 +147,7 @@ typedef struct numex_result {
|
||||
|
||||
VECTOR_INIT(numex_result_array, numex_result_t)
|
||||
|
||||
char *replace_numeric_expressions(char *str, char *lang);
|
||||
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang);
|
||||
numex_result_array *convert_numeric_expressions(char *str, char *lang);
|
||||
size_t ordinal_suffix_len(char *s, size_t len, char *lang);
|
||||
size_t possible_ordinal_digit_len(char *str, size_t len);
|
||||
@@ -155,9 +156,9 @@ bool numex_table_write(FILE *file);
|
||||
bool numex_table_save(char *filename);
|
||||
|
||||
bool numex_module_init(void);
|
||||
bool numex_module_setup(char *filename);
|
||||
void numex_module_teardown(void);
|
||||
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename);
|
||||
LIBPOSTAL_EXPORT void numex_module_teardown(void);
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -310240,7 +310240,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
|
||||
|
||||
}
|
||||
|
||||
token_array *tokenize_keep_whitespace(const char *input) {
|
||||
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) {
|
||||
token_array *tokens = token_array_new();
|
||||
tokenize_add_tokens(tokens, input, strlen(input), true);
|
||||
return tokens;
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "token_types.h"
|
||||
#include "tokens.h"
|
||||
#include "export.h"
|
||||
|
||||
typedef struct scanner {
|
||||
unsigned char *src, *cursor, *start, *end;
|
||||
@@ -19,7 +20,7 @@ uint16_t scan_token(scanner_t *s);
|
||||
scanner_t scanner_from_string(const char *input, size_t len);
|
||||
|
||||
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
|
||||
token_array *tokenize_keep_whitespace(const char *input);
|
||||
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input);
|
||||
token_array *tokenize(const char *input);
|
||||
|
||||
|
||||
|
||||
@@ -255,7 +255,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
|
||||
|
||||
}
|
||||
|
||||
token_array *tokenize_keep_whitespace(const char *input) {
|
||||
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) {
|
||||
token_array *tokens = token_array_new();
|
||||
tokenize_add_tokens(tokens, input, strlen(input), true);
|
||||
return tokens;
|
||||
|
||||
@@ -57,7 +57,7 @@ inline size_t string_common_suffix(const char *str1, const char *str2) {
|
||||
return common_suffix;
|
||||
}
|
||||
|
||||
inline bool string_starts_with(const char *str, const char *start) {
|
||||
LIBPOSTAL_EXPORT inline bool string_starts_with(const char *str, const char *start) {
|
||||
for (; *start; str++, start++)
|
||||
if (*str != *start)
|
||||
return false;
|
||||
@@ -71,7 +71,7 @@ inline bool string_ends_with(const char *str, const char *ending) {
|
||||
return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending);
|
||||
}
|
||||
|
||||
inline bool string_equals(const char *s1, const char *s2) {
|
||||
LIBPOSTAL_EXPORT inline bool string_equals(const char *s1, const char *s2) {
|
||||
if (s1 == NULL || s2 == NULL) return false;
|
||||
return strcmp(s1, s2) == 0;
|
||||
}
|
||||
@@ -168,7 +168,7 @@ uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_re
|
||||
return num_replacements;
|
||||
}
|
||||
|
||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
|
||||
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
|
||||
ssize_t len = 0;
|
||||
|
||||
const uint8_t *ptr = str + start;
|
||||
@@ -187,7 +187,7 @@ ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *ds
|
||||
return ret_len;
|
||||
}
|
||||
|
||||
char *utf8_reversed_string(const char *s) {
|
||||
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s) {
|
||||
int32_t unich;
|
||||
ssize_t len, remaining;
|
||||
|
||||
@@ -477,7 +477,7 @@ size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *st
|
||||
|
||||
}
|
||||
|
||||
inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
|
||||
LIBPOSTAL_EXPORT inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
|
||||
return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2));
|
||||
}
|
||||
|
||||
@@ -605,7 +605,7 @@ size_t string_left_spaces_len(char *str, size_t len) {
|
||||
return spaces;
|
||||
}
|
||||
|
||||
char *string_trim(char *str) {
|
||||
LIBPOSTAL_EXPORT char *string_trim(char *str) {
|
||||
size_t len = strlen(str);
|
||||
size_t left_spaces = string_left_spaces_len(str, len);
|
||||
size_t right_spaces = string_right_spaces_len(str, len);
|
||||
@@ -629,14 +629,14 @@ char_array *char_array_from_string_no_copy(char *str, size_t n) {
|
||||
return array;
|
||||
}
|
||||
|
||||
inline char *char_array_get_string(char_array *array) {
|
||||
LIBPOSTAL_EXPORT inline char *char_array_get_string(char_array *array) {
|
||||
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
||||
char_array_terminate(array);
|
||||
}
|
||||
return array->a;
|
||||
}
|
||||
|
||||
inline char *char_array_to_string(char_array *array) {
|
||||
LIBPOSTAL_EXPORT inline char *char_array_to_string(char_array *array) {
|
||||
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
||||
char_array_terminate(array);
|
||||
}
|
||||
@@ -661,7 +661,7 @@ inline size_t char_array_len(char_array *array) {
|
||||
}
|
||||
}
|
||||
|
||||
inline void char_array_append(char_array *array, char *str) {
|
||||
LIBPOSTAL_EXPORT inline void char_array_append(char_array *array, char *str) {
|
||||
while(*str) {
|
||||
char_array_push(array, *str++);
|
||||
}
|
||||
@@ -695,11 +695,11 @@ inline void char_array_append_reversed(char_array *array, char *str) {
|
||||
char_array_append_reversed_len(array, str, len);
|
||||
}
|
||||
|
||||
inline void char_array_terminate(char_array *array) {
|
||||
LIBPOSTAL_EXPORT inline void char_array_terminate(char_array *array) {
|
||||
char_array_push(array, '\0');
|
||||
}
|
||||
|
||||
inline void char_array_cat(char_array *array, char *str) {
|
||||
LIBPOSTAL_EXPORT inline void char_array_cat(char_array *array, char *str) {
|
||||
char_array_strip_nul_byte(array);
|
||||
char_array_append(array, str);
|
||||
char_array_terminate(array);
|
||||
@@ -712,7 +712,7 @@ inline void char_array_cat_len(char_array *array, char *str, size_t len) {
|
||||
}
|
||||
|
||||
|
||||
inline void char_array_cat_reversed(char_array *array, char *str) {
|
||||
LIBPOSTAL_EXPORT inline void char_array_cat_reversed(char_array *array, char *str) {
|
||||
char_array_strip_nul_byte(array);
|
||||
char_array_append_reversed(array, str);
|
||||
char_array_terminate(array);
|
||||
@@ -763,7 +763,7 @@ void char_array_add_vjoined(char_array *array, char *separator, bool strip_separ
|
||||
|
||||
}
|
||||
|
||||
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
||||
LIBPOSTAL_EXPORT inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
||||
va_list args;
|
||||
va_start(args, count);
|
||||
char_array_add_vjoined(array, separator, strip_separator, count, args);
|
||||
@@ -807,14 +807,14 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args) {
|
||||
}
|
||||
}
|
||||
|
||||
void char_array_cat_printf(char_array *array, char *format, ...) {
|
||||
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
char_array_cat_vprintf(array, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
cstring_array *cstring_array_new(void) {
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) {
|
||||
cstring_array *array = malloc(sizeof(cstring_array));
|
||||
if (array == NULL) return NULL;
|
||||
|
||||
@@ -833,7 +833,7 @@ cstring_array *cstring_array_new(void) {
|
||||
return array;
|
||||
}
|
||||
|
||||
void cstring_array_destroy(cstring_array *self) {
|
||||
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self) {
|
||||
if (self == NULL) return;
|
||||
if (self->indices) {
|
||||
uint32_array_destroy(self->indices);
|
||||
@@ -888,7 +888,7 @@ inline size_t cstring_array_used(cstring_array *self) {
|
||||
return self->str->n;
|
||||
}
|
||||
|
||||
inline size_t cstring_array_num_strings(cstring_array *self) {
|
||||
LIBPOSTAL_EXPORT inline size_t cstring_array_num_strings(cstring_array *self) {
|
||||
if (self == NULL) return 0;
|
||||
return self->indices->n;
|
||||
}
|
||||
@@ -957,13 +957,13 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) {
|
||||
return (int32_t)self->indices->a[i];
|
||||
}
|
||||
|
||||
inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
||||
LIBPOSTAL_EXPORT inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
||||
int32_t data_index = cstring_array_get_offset(self, i);
|
||||
if (data_index < 0) return NULL;
|
||||
return self->str->a + data_index;
|
||||
}
|
||||
|
||||
inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
|
||||
LIBPOSTAL_EXPORT inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
|
||||
if (INVALID_INDEX(i, self->indices->n)) {
|
||||
return -1;
|
||||
}
|
||||
@@ -1014,7 +1014,7 @@ cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *sep
|
||||
}
|
||||
|
||||
|
||||
cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
|
||||
*count = 0;
|
||||
char *ptr = str;
|
||||
size_t len = strlen(str);
|
||||
@@ -1033,7 +1033,7 @@ cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *co
|
||||
}
|
||||
|
||||
|
||||
char **cstring_array_to_strings(cstring_array *self) {
|
||||
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self) {
|
||||
char **strings = malloc(self->indices->n * sizeof(char *));
|
||||
|
||||
for (int i = 0; i < cstring_array_num_strings(self); i++) {
|
||||
@@ -1072,7 +1072,7 @@ string_tree_t *string_tree_new_size(size_t size) {
|
||||
|
||||
#define DEFAULT_STRING_TREE_SIZE 8
|
||||
|
||||
string_tree_t *string_tree_new(void) {
|
||||
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void) {
|
||||
return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE);
|
||||
}
|
||||
|
||||
@@ -1084,12 +1084,12 @@ inline char *string_tree_get_alternative(string_tree_t *self, size_t token_index
|
||||
return cstring_array_get_string(self->strings, token_start + alternative);
|
||||
}
|
||||
|
||||
inline void string_tree_finalize_token(string_tree_t *self) {
|
||||
LIBPOSTAL_EXPORT inline void string_tree_finalize_token(string_tree_t *self) {
|
||||
uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings));
|
||||
}
|
||||
|
||||
// terminated
|
||||
inline void string_tree_add_string(string_tree_t *self, char *str) {
|
||||
LIBPOSTAL_EXPORT inline void string_tree_add_string(string_tree_t *self, char *str) {
|
||||
cstring_array_add_string(self->strings, str);
|
||||
}
|
||||
|
||||
@@ -1114,13 +1114,13 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) {
|
||||
return (uint32_t)cstring_array_num_strings(self->strings);
|
||||
}
|
||||
|
||||
inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
|
||||
LIBPOSTAL_EXPORT inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
|
||||
if (i >= self->token_indices->n) return 0;
|
||||
uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i];
|
||||
return n > 0 ? n : 1;
|
||||
}
|
||||
|
||||
void string_tree_destroy(string_tree_t *self) {
|
||||
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
if (self->token_indices != NULL) {
|
||||
@@ -1134,7 +1134,7 @@ void string_tree_destroy(string_tree_t *self) {
|
||||
free(self);
|
||||
}
|
||||
|
||||
string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
||||
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
||||
string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t));
|
||||
self->tree = tree;
|
||||
|
||||
@@ -1165,7 +1165,7 @@ string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
||||
return self;
|
||||
}
|
||||
|
||||
void string_tree_iterator_next(string_tree_iterator_t *self) {
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self) {
|
||||
if (self->remaining > 0) {
|
||||
int i;
|
||||
for (i = self->num_tokens - 1; i >= 0; i--) {
|
||||
@@ -1194,11 +1194,11 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i)
|
||||
return cstring_array_get_string(self->tree->strings, base_index + offset);
|
||||
}
|
||||
|
||||
bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
||||
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
||||
return self->remaining == 0;
|
||||
}
|
||||
|
||||
void string_tree_iterator_destroy(string_tree_iterator_t *self) {
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
if (self->path) {
|
||||
|
||||
@@ -17,6 +17,7 @@ Utilities for manipulating strings in C.
|
||||
#include "utf8proc/utf8proc.h"
|
||||
#include "vector.h"
|
||||
#include "strndup.h"
|
||||
#include "export.h"
|
||||
|
||||
#define MAX_UTF8_CHAR_SIZE 4
|
||||
|
||||
@@ -60,16 +61,16 @@ char *string_replace_char(char *str, char c1, char c2);
|
||||
bool string_replace_with_array(char *str, char *replace, char *with, char_array *result);
|
||||
char *string_replace(char *str, char *replace, char *with);
|
||||
|
||||
bool string_starts_with(const char *str, const char *start);
|
||||
LIBPOSTAL_EXPORT bool string_starts_with(const char *str, const char *start);
|
||||
bool string_ends_with(const char *str, const char *ending);
|
||||
|
||||
bool string_equals(const char *s1, const char *s2);
|
||||
LIBPOSTAL_EXPORT bool string_equals(const char *s1, const char *s2);
|
||||
|
||||
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
|
||||
|
||||
// UTF-8 string methods
|
||||
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||
|
||||
// Casing functions return a copy, caller frees
|
||||
char *utf8_lower_options(const char *s, utf8proc_option_t options);
|
||||
@@ -81,7 +82,7 @@ int utf8_compare(const char *str1, const char *str2);
|
||||
int utf8_compare_len(const char *str1, const char *str2, size_t len);
|
||||
size_t utf8_common_prefix(const char *str1, const char *str2);
|
||||
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
|
||||
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||
LIBPOSTAL_EXPORT size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
||||
|
||||
bool utf8_is_hyphen(int32_t ch);
|
||||
@@ -100,7 +101,7 @@ ssize_t string_next_hyphen_index(char *str, size_t len);
|
||||
bool string_contains_hyphen(char *str);
|
||||
bool string_contains_hyphen_len(char *str, size_t len);
|
||||
|
||||
char *string_trim(char *str);
|
||||
LIBPOSTAL_EXPORT char *string_trim(char *str);
|
||||
|
||||
/* char_array is a dynamic character array defined in collections.h
|
||||
but has a few additional methods related to string manipulation.
|
||||
@@ -113,40 +114,40 @@ char_array *char_array_from_string(char *str);
|
||||
char_array *char_array_from_string_no_copy(char *str, size_t n);
|
||||
|
||||
// Gets the underlying C string for a char_array
|
||||
char *char_array_get_string(char_array *array);
|
||||
LIBPOSTAL_EXPORT char *char_array_get_string(char_array *array);
|
||||
|
||||
// Frees the char_array and returns a standard NUL-terminated string
|
||||
char *char_array_to_string(char_array *array);
|
||||
LIBPOSTAL_EXPORT char *char_array_to_string(char_array *array);
|
||||
|
||||
// Can use strlen(array->a) but this is faster
|
||||
size_t char_array_len(char_array *array);
|
||||
|
||||
// append_* methods do not NUL-terminate
|
||||
void char_array_append(char_array *array, char *str);
|
||||
LIBPOSTAL_EXPORT void char_array_append(char_array *array, char *str);
|
||||
void char_array_append_len(char_array *array, char *str, size_t len);
|
||||
void char_array_append_reversed(char_array *array, char *str);
|
||||
void char_array_append_reversed_len(char_array *array, char *str, size_t len);
|
||||
// add NUL terminator to a char_array
|
||||
void char_array_strip_nul_byte(char_array *array);
|
||||
void char_array_terminate(char_array *array);
|
||||
LIBPOSTAL_EXPORT void char_array_terminate(char_array *array);
|
||||
|
||||
// add_* methods NUL-terminate without stripping NUL-byte
|
||||
void char_array_add(char_array *array, char *str);
|
||||
void char_array_add_len(char_array *array, char *str, size_t len);
|
||||
|
||||
// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated
|
||||
void char_array_cat(char_array *array, char *str);
|
||||
LIBPOSTAL_EXPORT void char_array_cat(char_array *array, char *str);
|
||||
void char_array_cat_len(char_array *array, char *str, size_t len);
|
||||
void char_array_cat_reversed(char_array *array, char *str);
|
||||
LIBPOSTAL_EXPORT void char_array_cat_reversed(char_array *array, char *str);
|
||||
void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
|
||||
|
||||
// Similar to cat methods but with printf args
|
||||
void char_array_cat_vprintf(char_array *array, char *format, va_list args);
|
||||
void char_array_cat_printf(char_array *array, char *format, ...);
|
||||
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...);
|
||||
|
||||
// Mainly for paths or delimited strings
|
||||
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
||||
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
LIBPOSTAL_EXPORT void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
|
||||
|
||||
@@ -171,13 +172,13 @@ typedef struct {
|
||||
char_array *str;
|
||||
} cstring_array;
|
||||
|
||||
cstring_array *cstring_array_new(void);
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void);
|
||||
|
||||
cstring_array *cstring_array_new_size(size_t size);
|
||||
|
||||
size_t cstring_array_capacity(cstring_array *self);
|
||||
size_t cstring_array_used(cstring_array *self);
|
||||
size_t cstring_array_num_strings(cstring_array *self);
|
||||
LIBPOSTAL_EXPORT size_t cstring_array_num_strings(cstring_array *self);
|
||||
void cstring_array_resize(cstring_array *self, size_t size);
|
||||
void cstring_array_clear(cstring_array *self);
|
||||
|
||||
@@ -185,7 +186,7 @@ cstring_array *cstring_array_from_char_array(char_array *str);
|
||||
cstring_array *cstring_array_from_strings(char **strings, size_t n);
|
||||
|
||||
// Convert cstring_array to an array of n C strings and destroy the cstring_array
|
||||
char **cstring_array_to_strings(cstring_array *self);
|
||||
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self);
|
||||
|
||||
// Split on delimiter
|
||||
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count);
|
||||
@@ -193,7 +194,7 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa
|
||||
cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count);
|
||||
|
||||
// Split on delimiter by replacing (single character) separator with the NUL byte in the original string
|
||||
cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
|
||||
|
||||
uint32_t cstring_array_start_token(cstring_array *self);
|
||||
uint32_t cstring_array_add_string(cstring_array *self, char *str);
|
||||
@@ -207,10 +208,10 @@ void cstring_array_cat_string_len(cstring_array *self, char *str, size_t len);
|
||||
|
||||
void cstring_array_terminate(cstring_array *self);
|
||||
int32_t cstring_array_get_offset(cstring_array *self, uint32_t i);
|
||||
char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
||||
int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
||||
LIBPOSTAL_EXPORT char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
||||
LIBPOSTAL_EXPORT int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
||||
|
||||
void cstring_array_destroy(cstring_array *self);
|
||||
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self);
|
||||
|
||||
#define cstring_array_foreach(array, i, s, code) { \
|
||||
for (int __si = 0; __si < array->indices->n; __si++) { \
|
||||
@@ -246,16 +247,16 @@ typedef struct string_tree {
|
||||
cstring_array *strings;
|
||||
} string_tree_t;
|
||||
|
||||
string_tree_t *string_tree_new(void);
|
||||
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void);
|
||||
string_tree_t *string_tree_new_size(size_t size);
|
||||
|
||||
// get
|
||||
char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative);
|
||||
|
||||
// finalize
|
||||
void string_tree_finalize_token(string_tree_t *self);
|
||||
LIBPOSTAL_EXPORT void string_tree_finalize_token(string_tree_t *self);
|
||||
// terminated
|
||||
void string_tree_add_string(string_tree_t *self, char *str);
|
||||
LIBPOSTAL_EXPORT void string_tree_add_string(string_tree_t *self, char *str);
|
||||
void string_tree_add_string_len(string_tree_t *self, char *str, size_t len);
|
||||
// unterminated
|
||||
void string_tree_append_string(string_tree_t *self, char *str);
|
||||
@@ -264,9 +265,9 @@ void string_tree_append_string_len(string_tree_t *self, char *str, size_t len);
|
||||
uint32_t string_tree_num_tokens(string_tree_t *self);
|
||||
uint32_t string_tree_num_strings(string_tree_t *self);
|
||||
|
||||
uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
|
||||
LIBPOSTAL_EXPORT uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
|
||||
|
||||
void string_tree_destroy(string_tree_t *self);
|
||||
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self);
|
||||
|
||||
typedef struct string_tree_iterator {
|
||||
string_tree_t *tree;
|
||||
@@ -275,11 +276,11 @@ typedef struct string_tree_iterator {
|
||||
uint32_t remaining;
|
||||
} string_tree_iterator_t;
|
||||
|
||||
string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
|
||||
void string_tree_iterator_next(string_tree_iterator_t *self);
|
||||
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self);
|
||||
char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i);
|
||||
bool string_tree_iterator_done(string_tree_iterator_t *self);
|
||||
void string_tree_iterator_destroy(string_tree_iterator_t *self);
|
||||
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self);
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self);
|
||||
|
||||
|
||||
#define string_tree_iterator_foreach_token(iter, s, code) { \
|
||||
|
||||
@@ -665,7 +665,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
|
||||
return char_array_to_string(ret);
|
||||
}
|
||||
|
||||
char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
if (trans_name == NULL || str == NULL) return NULL;
|
||||
|
||||
transliteration_table_t *trans_table = get_transliteration_table();
|
||||
@@ -1977,7 +1977,7 @@ bool transliteration_module_init(void) {
|
||||
return trans_table != NULL;
|
||||
}
|
||||
|
||||
bool transliteration_module_setup(char *filename) {
|
||||
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) {
|
||||
if (trans_table == NULL) {
|
||||
return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename);
|
||||
}
|
||||
@@ -1986,7 +1986,7 @@ bool transliteration_module_setup(char *filename) {
|
||||
}
|
||||
|
||||
|
||||
void transliteration_module_teardown(void) {
|
||||
LIBPOSTAL_EXPORT void transliteration_module_teardown(void) {
|
||||
transliteration_table_destroy();
|
||||
trans_table = NULL;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "trie_search.h"
|
||||
#include "unicode_scripts.h"
|
||||
#include "strndup.h"
|
||||
#include "export.h"
|
||||
|
||||
#define LATIN_ASCII "latin-ascii"
|
||||
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
||||
@@ -152,7 +153,7 @@ void transliterator_destroy(transliterator_t *self);
|
||||
bool transliteration_table_add_transliterator(transliterator_t *trans);
|
||||
|
||||
transliterator_t *get_transliterator(char *name);
|
||||
char *transliterate(char *trans_name, char *str, size_t len);
|
||||
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len);
|
||||
|
||||
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
|
||||
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
|
||||
@@ -172,7 +173,7 @@ bool transliteration_table_save(char *filename);
|
||||
|
||||
// Module setup/teardown
|
||||
bool transliteration_module_init(void);
|
||||
bool transliteration_module_setup(char *filename);
|
||||
void transliteration_module_teardown(void);
|
||||
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename);
|
||||
LIBPOSTAL_EXPORT void transliteration_module_teardown(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -96,7 +96,7 @@ trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size) {
|
||||
return self;
|
||||
}
|
||||
|
||||
trie_t *trie_new(void) {
|
||||
LIBPOSTAL_EXPORT trie_t *trie_new(void) {
|
||||
return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET));
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, ui
|
||||
}
|
||||
|
||||
|
||||
inline bool trie_add(trie_t *self, char *key, uint32_t data) {
|
||||
LIBPOSTAL_EXPORT inline bool trie_add(trie_t *self, char *key, uint32_t data) {
|
||||
size_t len = strlen(key);
|
||||
if (len == 0) return false;
|
||||
return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data);
|
||||
@@ -754,7 +754,7 @@ inline bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
|
||||
LIBPOSTAL_EXPORT inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
|
||||
uint32_t node_id = trie_get(self, key);
|
||||
return trie_get_data_at_index(self, node_id, data);
|
||||
}
|
||||
@@ -899,7 +899,7 @@ inline uint32_t trie_num_keys(trie_t *self) {
|
||||
/*
|
||||
Destructor
|
||||
*/
|
||||
void trie_destroy(trie_t *self) {
|
||||
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self) {
|
||||
if (!self)
|
||||
return;
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "klib/kvec.h"
|
||||
#include "log/log.h"
|
||||
#include "string_utils.h"
|
||||
#include "export.h"
|
||||
|
||||
#define TRIE_SIGNATURE 0xABABABAB
|
||||
#define NULL_NODE_ID 0
|
||||
@@ -79,7 +80,7 @@ typedef struct trie {
|
||||
} trie_t;
|
||||
|
||||
trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size);
|
||||
trie_t *trie_new(void);
|
||||
LIBPOSTAL_EXPORT trie_t *trie_new(void);
|
||||
|
||||
uint32_t trie_get_char_index(trie_t *self, unsigned char c);
|
||||
uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c);
|
||||
@@ -97,7 +98,7 @@ trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node);
|
||||
bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node);
|
||||
|
||||
bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data);
|
||||
bool trie_get_data(trie_t *self, char *key, uint32_t *data);
|
||||
LIBPOSTAL_EXPORT bool trie_get_data(trie_t *self, char *key, uint32_t *data);
|
||||
bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data);
|
||||
bool trie_set_data(trie_t *self, char *key, uint32_t data);
|
||||
|
||||
@@ -113,7 +114,7 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai
|
||||
void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data);
|
||||
|
||||
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data);
|
||||
bool trie_add(trie_t *self, char *key, uint32_t data);
|
||||
LIBPOSTAL_EXPORT bool trie_add(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data);
|
||||
bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
|
||||
@@ -146,7 +147,7 @@ bool trie_save(trie_t *self, char *path);
|
||||
trie_t *trie_read(FILE *file);
|
||||
trie_t *trie_load(char *path);
|
||||
|
||||
void trie_destroy(trie_t *self);
|
||||
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self);
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -465,7 +465,7 @@ inline bool trie_search_tokens_with_phrases(trie_t *self, char *str, token_array
|
||||
return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases);
|
||||
}
|
||||
|
||||
inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
|
||||
LIBPOSTAL_EXPORT inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
|
||||
phrase_array *phrases = NULL;
|
||||
if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) {
|
||||
return NULL;
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "tokens.h"
|
||||
#include "vector.h"
|
||||
#include "utf8proc/utf8proc.h"
|
||||
#include "export.h"
|
||||
|
||||
typedef struct phrase {
|
||||
uint32_t start;
|
||||
@@ -31,7 +32,7 @@ VECTOR_INIT(phrase_array, phrase_t)
|
||||
phrase_array *trie_search(trie_t *self, char *text);
|
||||
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
|
||||
bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases);
|
||||
phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
|
||||
LIBPOSTAL_EXPORT phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
|
||||
bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases);
|
||||
bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases);
|
||||
phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id);
|
||||
|
||||
Reference in New Issue
Block a user