Adding the export marker to all functions used in tests.
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
#include "features.h"
|
#include "features.h"
|
||||||
|
|
||||||
|
|
||||||
void feature_array_add(cstring_array *features, size_t count, ...) {
|
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...) {
|
||||||
if (count <= 0) {
|
if (count <= 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,12 +5,13 @@
|
|||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include "collections.h"
|
#include "collections.h"
|
||||||
#include "string_utils.h"
|
#include "string_utils.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
#define FEATURE_SEPARATOR_CHAR "|"
|
#define FEATURE_SEPARATOR_CHAR "|"
|
||||||
|
|
||||||
// Add feature to array
|
// Add feature to array
|
||||||
|
|
||||||
void feature_array_add(cstring_array *features, size_t count, ...);
|
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...);
|
||||||
|
|
||||||
// Add feature using printf format
|
// Add feature using printf format
|
||||||
void feature_array_add_printf(cstring_array *features, char *format, ...);
|
void feature_array_add_printf(cstring_array *features, char *format, ...);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#include "file_utils.h"
|
#include "file_utils.h"
|
||||||
|
|
||||||
char *file_getline(FILE * f)
|
LIBPOSTAL_EXPORT char *file_getline(FILE * f)
|
||||||
{
|
{
|
||||||
char buf[BUFSIZ];
|
char buf[BUFSIZ];
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include "export.h"
|
||||||
#include "libpostal_config.h"
|
#include "libpostal_config.h"
|
||||||
#include "string_utils.h"
|
#include "string_utils.h"
|
||||||
|
|
||||||
@@ -52,7 +53,7 @@
|
|||||||
#define COMMA_SEPARATOR ","
|
#define COMMA_SEPARATOR ","
|
||||||
#define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR)
|
#define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR)
|
||||||
|
|
||||||
char *file_getline(FILE * f);
|
LIBPOSTAL_EXPORT char *file_getline(FILE * f);
|
||||||
|
|
||||||
bool file_exists(char *filename);
|
bool file_exists(char *filename);
|
||||||
|
|
||||||
|
|||||||
@@ -599,7 +599,7 @@ bool numex_module_init(void) {
|
|||||||
Must be called only once before the module can be used
|
Must be called only once before the module can be used
|
||||||
*/
|
*/
|
||||||
|
|
||||||
bool numex_module_setup(char *filename) {
|
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) {
|
||||||
if (numex_table == NULL) {
|
if (numex_table == NULL) {
|
||||||
return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename);
|
return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename);
|
||||||
}
|
}
|
||||||
@@ -610,7 +610,7 @@ bool numex_module_setup(char *filename) {
|
|||||||
Called once when done with the module (usually at
|
Called once when done with the module (usually at
|
||||||
the end of a main method)
|
the end of a main method)
|
||||||
*/
|
*/
|
||||||
void numex_module_teardown(void) {
|
LIBPOSTAL_EXPORT void numex_module_teardown(void) {
|
||||||
numex_table_destroy();
|
numex_table_destroy();
|
||||||
numex_table = NULL;
|
numex_table = NULL;
|
||||||
}
|
}
|
||||||
@@ -1101,7 +1101,7 @@ size_t ordinal_suffix_len(char *str, size_t len, char *lang) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *replace_numeric_expressions(char *str, char *lang) {
|
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang) {
|
||||||
numex_result_array *results = convert_numeric_expressions(str, lang);
|
numex_result_array *results = convert_numeric_expressions(str, lang);
|
||||||
if (results == NULL) return NULL;
|
if (results == NULL) return NULL;
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
#include "tokens.h"
|
#include "tokens.h"
|
||||||
#include "trie.h"
|
#include "trie.h"
|
||||||
#include "trie_search.h"
|
#include "trie_search.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
#define NUMEX_DATA_FILE "numex.dat"
|
#define NUMEX_DATA_FILE "numex.dat"
|
||||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
||||||
@@ -146,7 +147,7 @@ typedef struct numex_result {
|
|||||||
|
|
||||||
VECTOR_INIT(numex_result_array, numex_result_t)
|
VECTOR_INIT(numex_result_array, numex_result_t)
|
||||||
|
|
||||||
char *replace_numeric_expressions(char *str, char *lang);
|
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang);
|
||||||
numex_result_array *convert_numeric_expressions(char *str, char *lang);
|
numex_result_array *convert_numeric_expressions(char *str, char *lang);
|
||||||
size_t ordinal_suffix_len(char *s, size_t len, char *lang);
|
size_t ordinal_suffix_len(char *s, size_t len, char *lang);
|
||||||
size_t possible_ordinal_digit_len(char *str, size_t len);
|
size_t possible_ordinal_digit_len(char *str, size_t len);
|
||||||
@@ -155,9 +156,9 @@ bool numex_table_write(FILE *file);
|
|||||||
bool numex_table_save(char *filename);
|
bool numex_table_save(char *filename);
|
||||||
|
|
||||||
bool numex_module_init(void);
|
bool numex_module_init(void);
|
||||||
bool numex_module_setup(char *filename);
|
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename);
|
||||||
void numex_module_teardown(void);
|
LIBPOSTAL_EXPORT void numex_module_teardown(void);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -310240,7 +310240,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
token_array *tokenize_keep_whitespace(const char *input) {
|
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) {
|
||||||
token_array *tokens = token_array_new();
|
token_array *tokens = token_array_new();
|
||||||
tokenize_add_tokens(tokens, input, strlen(input), true);
|
tokenize_add_tokens(tokens, input, strlen(input), true);
|
||||||
return tokens;
|
return tokens;
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include "token_types.h"
|
#include "token_types.h"
|
||||||
#include "tokens.h"
|
#include "tokens.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
typedef struct scanner {
|
typedef struct scanner {
|
||||||
unsigned char *src, *cursor, *start, *end;
|
unsigned char *src, *cursor, *start, *end;
|
||||||
@@ -19,7 +20,7 @@ uint16_t scan_token(scanner_t *s);
|
|||||||
scanner_t scanner_from_string(const char *input, size_t len);
|
scanner_t scanner_from_string(const char *input, size_t len);
|
||||||
|
|
||||||
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
|
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
|
||||||
token_array *tokenize_keep_whitespace(const char *input);
|
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input);
|
||||||
token_array *tokenize(const char *input);
|
token_array *tokenize(const char *input);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -255,7 +255,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
token_array *tokenize_keep_whitespace(const char *input) {
|
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) {
|
||||||
token_array *tokens = token_array_new();
|
token_array *tokens = token_array_new();
|
||||||
tokenize_add_tokens(tokens, input, strlen(input), true);
|
tokenize_add_tokens(tokens, input, strlen(input), true);
|
||||||
return tokens;
|
return tokens;
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ inline size_t string_common_suffix(const char *str1, const char *str2) {
|
|||||||
return common_suffix;
|
return common_suffix;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool string_starts_with(const char *str, const char *start) {
|
LIBPOSTAL_EXPORT inline bool string_starts_with(const char *str, const char *start) {
|
||||||
for (; *start; str++, start++)
|
for (; *start; str++, start++)
|
||||||
if (*str != *start)
|
if (*str != *start)
|
||||||
return false;
|
return false;
|
||||||
@@ -71,7 +71,7 @@ inline bool string_ends_with(const char *str, const char *ending) {
|
|||||||
return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending);
|
return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool string_equals(const char *s1, const char *s2) {
|
LIBPOSTAL_EXPORT inline bool string_equals(const char *s1, const char *s2) {
|
||||||
if (s1 == NULL || s2 == NULL) return false;
|
if (s1 == NULL || s2 == NULL) return false;
|
||||||
return strcmp(s1, s2) == 0;
|
return strcmp(s1, s2) == 0;
|
||||||
}
|
}
|
||||||
@@ -168,7 +168,7 @@ uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_re
|
|||||||
return num_replacements;
|
return num_replacements;
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
|
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
|
||||||
ssize_t len = 0;
|
ssize_t len = 0;
|
||||||
|
|
||||||
const uint8_t *ptr = str + start;
|
const uint8_t *ptr = str + start;
|
||||||
@@ -187,7 +187,7 @@ ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *ds
|
|||||||
return ret_len;
|
return ret_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *utf8_reversed_string(const char *s) {
|
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s) {
|
||||||
int32_t unich;
|
int32_t unich;
|
||||||
ssize_t len, remaining;
|
ssize_t len, remaining;
|
||||||
|
|
||||||
@@ -477,7 +477,7 @@ size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *st
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
|
LIBPOSTAL_EXPORT inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
|
||||||
return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2));
|
return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -605,7 +605,7 @@ size_t string_left_spaces_len(char *str, size_t len) {
|
|||||||
return spaces;
|
return spaces;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *string_trim(char *str) {
|
LIBPOSTAL_EXPORT char *string_trim(char *str) {
|
||||||
size_t len = strlen(str);
|
size_t len = strlen(str);
|
||||||
size_t left_spaces = string_left_spaces_len(str, len);
|
size_t left_spaces = string_left_spaces_len(str, len);
|
||||||
size_t right_spaces = string_right_spaces_len(str, len);
|
size_t right_spaces = string_right_spaces_len(str, len);
|
||||||
@@ -629,14 +629,14 @@ char_array *char_array_from_string_no_copy(char *str, size_t n) {
|
|||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline char *char_array_get_string(char_array *array) {
|
LIBPOSTAL_EXPORT inline char *char_array_get_string(char_array *array) {
|
||||||
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
||||||
char_array_terminate(array);
|
char_array_terminate(array);
|
||||||
}
|
}
|
||||||
return array->a;
|
return array->a;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline char *char_array_to_string(char_array *array) {
|
LIBPOSTAL_EXPORT inline char *char_array_to_string(char_array *array) {
|
||||||
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
||||||
char_array_terminate(array);
|
char_array_terminate(array);
|
||||||
}
|
}
|
||||||
@@ -661,7 +661,7 @@ inline size_t char_array_len(char_array *array) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void char_array_append(char_array *array, char *str) {
|
LIBPOSTAL_EXPORT inline void char_array_append(char_array *array, char *str) {
|
||||||
while(*str) {
|
while(*str) {
|
||||||
char_array_push(array, *str++);
|
char_array_push(array, *str++);
|
||||||
}
|
}
|
||||||
@@ -695,11 +695,11 @@ inline void char_array_append_reversed(char_array *array, char *str) {
|
|||||||
char_array_append_reversed_len(array, str, len);
|
char_array_append_reversed_len(array, str, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void char_array_terminate(char_array *array) {
|
LIBPOSTAL_EXPORT inline void char_array_terminate(char_array *array) {
|
||||||
char_array_push(array, '\0');
|
char_array_push(array, '\0');
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void char_array_cat(char_array *array, char *str) {
|
LIBPOSTAL_EXPORT inline void char_array_cat(char_array *array, char *str) {
|
||||||
char_array_strip_nul_byte(array);
|
char_array_strip_nul_byte(array);
|
||||||
char_array_append(array, str);
|
char_array_append(array, str);
|
||||||
char_array_terminate(array);
|
char_array_terminate(array);
|
||||||
@@ -712,7 +712,7 @@ inline void char_array_cat_len(char_array *array, char *str, size_t len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void char_array_cat_reversed(char_array *array, char *str) {
|
LIBPOSTAL_EXPORT inline void char_array_cat_reversed(char_array *array, char *str) {
|
||||||
char_array_strip_nul_byte(array);
|
char_array_strip_nul_byte(array);
|
||||||
char_array_append_reversed(array, str);
|
char_array_append_reversed(array, str);
|
||||||
char_array_terminate(array);
|
char_array_terminate(array);
|
||||||
@@ -763,7 +763,7 @@ void char_array_add_vjoined(char_array *array, char *separator, bool strip_separ
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
LIBPOSTAL_EXPORT inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, count);
|
va_start(args, count);
|
||||||
char_array_add_vjoined(array, separator, strip_separator, count, args);
|
char_array_add_vjoined(array, separator, strip_separator, count, args);
|
||||||
@@ -807,14 +807,14 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void char_array_cat_printf(char_array *array, char *format, ...) {
|
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...) {
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, format);
|
va_start(args, format);
|
||||||
char_array_cat_vprintf(array, format, args);
|
char_array_cat_vprintf(array, format, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
cstring_array *cstring_array_new(void) {
|
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) {
|
||||||
cstring_array *array = malloc(sizeof(cstring_array));
|
cstring_array *array = malloc(sizeof(cstring_array));
|
||||||
if (array == NULL) return NULL;
|
if (array == NULL) return NULL;
|
||||||
|
|
||||||
@@ -833,7 +833,7 @@ cstring_array *cstring_array_new(void) {
|
|||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cstring_array_destroy(cstring_array *self) {
|
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self) {
|
||||||
if (self == NULL) return;
|
if (self == NULL) return;
|
||||||
if (self->indices) {
|
if (self->indices) {
|
||||||
uint32_array_destroy(self->indices);
|
uint32_array_destroy(self->indices);
|
||||||
@@ -888,7 +888,7 @@ inline size_t cstring_array_used(cstring_array *self) {
|
|||||||
return self->str->n;
|
return self->str->n;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t cstring_array_num_strings(cstring_array *self) {
|
LIBPOSTAL_EXPORT inline size_t cstring_array_num_strings(cstring_array *self) {
|
||||||
if (self == NULL) return 0;
|
if (self == NULL) return 0;
|
||||||
return self->indices->n;
|
return self->indices->n;
|
||||||
}
|
}
|
||||||
@@ -957,13 +957,13 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) {
|
|||||||
return (int32_t)self->indices->a[i];
|
return (int32_t)self->indices->a[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
LIBPOSTAL_EXPORT inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
||||||
int32_t data_index = cstring_array_get_offset(self, i);
|
int32_t data_index = cstring_array_get_offset(self, i);
|
||||||
if (data_index < 0) return NULL;
|
if (data_index < 0) return NULL;
|
||||||
return self->str->a + data_index;
|
return self->str->a + data_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
|
LIBPOSTAL_EXPORT inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
|
||||||
if (INVALID_INDEX(i, self->indices->n)) {
|
if (INVALID_INDEX(i, self->indices->n)) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -1014,7 +1014,7 @@ cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *sep
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
|
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
|
||||||
*count = 0;
|
*count = 0;
|
||||||
char *ptr = str;
|
char *ptr = str;
|
||||||
size_t len = strlen(str);
|
size_t len = strlen(str);
|
||||||
@@ -1033,7 +1033,7 @@ cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *co
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char **cstring_array_to_strings(cstring_array *self) {
|
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self) {
|
||||||
char **strings = malloc(self->indices->n * sizeof(char *));
|
char **strings = malloc(self->indices->n * sizeof(char *));
|
||||||
|
|
||||||
for (int i = 0; i < cstring_array_num_strings(self); i++) {
|
for (int i = 0; i < cstring_array_num_strings(self); i++) {
|
||||||
@@ -1072,7 +1072,7 @@ string_tree_t *string_tree_new_size(size_t size) {
|
|||||||
|
|
||||||
#define DEFAULT_STRING_TREE_SIZE 8
|
#define DEFAULT_STRING_TREE_SIZE 8
|
||||||
|
|
||||||
string_tree_t *string_tree_new(void) {
|
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void) {
|
||||||
return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE);
|
return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1084,12 +1084,12 @@ inline char *string_tree_get_alternative(string_tree_t *self, size_t token_index
|
|||||||
return cstring_array_get_string(self->strings, token_start + alternative);
|
return cstring_array_get_string(self->strings, token_start + alternative);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void string_tree_finalize_token(string_tree_t *self) {
|
LIBPOSTAL_EXPORT inline void string_tree_finalize_token(string_tree_t *self) {
|
||||||
uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings));
|
uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings));
|
||||||
}
|
}
|
||||||
|
|
||||||
// terminated
|
// terminated
|
||||||
inline void string_tree_add_string(string_tree_t *self, char *str) {
|
LIBPOSTAL_EXPORT inline void string_tree_add_string(string_tree_t *self, char *str) {
|
||||||
cstring_array_add_string(self->strings, str);
|
cstring_array_add_string(self->strings, str);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1114,13 +1114,13 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) {
|
|||||||
return (uint32_t)cstring_array_num_strings(self->strings);
|
return (uint32_t)cstring_array_num_strings(self->strings);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
|
LIBPOSTAL_EXPORT inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
|
||||||
if (i >= self->token_indices->n) return 0;
|
if (i >= self->token_indices->n) return 0;
|
||||||
uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i];
|
uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i];
|
||||||
return n > 0 ? n : 1;
|
return n > 0 ? n : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void string_tree_destroy(string_tree_t *self) {
|
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) {
|
||||||
if (self == NULL) return;
|
if (self == NULL) return;
|
||||||
|
|
||||||
if (self->token_indices != NULL) {
|
if (self->token_indices != NULL) {
|
||||||
@@ -1134,7 +1134,7 @@ void string_tree_destroy(string_tree_t *self) {
|
|||||||
free(self);
|
free(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
||||||
string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t));
|
string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t));
|
||||||
self->tree = tree;
|
self->tree = tree;
|
||||||
|
|
||||||
@@ -1165,7 +1165,7 @@ string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
void string_tree_iterator_next(string_tree_iterator_t *self) {
|
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self) {
|
||||||
if (self->remaining > 0) {
|
if (self->remaining > 0) {
|
||||||
int i;
|
int i;
|
||||||
for (i = self->num_tokens - 1; i >= 0; i--) {
|
for (i = self->num_tokens - 1; i >= 0; i--) {
|
||||||
@@ -1194,11 +1194,11 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i)
|
|||||||
return cstring_array_get_string(self->tree->strings, base_index + offset);
|
return cstring_array_get_string(self->tree->strings, base_index + offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
||||||
return self->remaining == 0;
|
return self->remaining == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void string_tree_iterator_destroy(string_tree_iterator_t *self) {
|
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self) {
|
||||||
if (self == NULL) return;
|
if (self == NULL) return;
|
||||||
|
|
||||||
if (self->path) {
|
if (self->path) {
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ Utilities for manipulating strings in C.
|
|||||||
#include "utf8proc/utf8proc.h"
|
#include "utf8proc/utf8proc.h"
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
#include "strndup.h"
|
#include "strndup.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
#define MAX_UTF8_CHAR_SIZE 4
|
#define MAX_UTF8_CHAR_SIZE 4
|
||||||
|
|
||||||
@@ -60,16 +61,16 @@ char *string_replace_char(char *str, char c1, char c2);
|
|||||||
bool string_replace_with_array(char *str, char *replace, char *with, char_array *result);
|
bool string_replace_with_array(char *str, char *replace, char *with, char_array *result);
|
||||||
char *string_replace(char *str, char *replace, char *with);
|
char *string_replace(char *str, char *replace, char *with);
|
||||||
|
|
||||||
bool string_starts_with(const char *str, const char *start);
|
LIBPOSTAL_EXPORT bool string_starts_with(const char *str, const char *start);
|
||||||
bool string_ends_with(const char *str, const char *ending);
|
bool string_ends_with(const char *str, const char *ending);
|
||||||
|
|
||||||
bool string_equals(const char *s1, const char *s2);
|
LIBPOSTAL_EXPORT bool string_equals(const char *s1, const char *s2);
|
||||||
|
|
||||||
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
|
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
|
||||||
|
|
||||||
// UTF-8 string methods
|
// UTF-8 string methods
|
||||||
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||||
|
|
||||||
// Casing functions return a copy, caller frees
|
// Casing functions return a copy, caller frees
|
||||||
char *utf8_lower_options(const char *s, utf8proc_option_t options);
|
char *utf8_lower_options(const char *s, utf8proc_option_t options);
|
||||||
@@ -81,7 +82,7 @@ int utf8_compare(const char *str1, const char *str2);
|
|||||||
int utf8_compare_len(const char *str1, const char *str2, size_t len);
|
int utf8_compare_len(const char *str1, const char *str2, size_t len);
|
||||||
size_t utf8_common_prefix(const char *str1, const char *str2);
|
size_t utf8_common_prefix(const char *str1, const char *str2);
|
||||||
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
|
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
|
||||||
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
LIBPOSTAL_EXPORT size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||||
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
||||||
|
|
||||||
bool utf8_is_hyphen(int32_t ch);
|
bool utf8_is_hyphen(int32_t ch);
|
||||||
@@ -100,7 +101,7 @@ ssize_t string_next_hyphen_index(char *str, size_t len);
|
|||||||
bool string_contains_hyphen(char *str);
|
bool string_contains_hyphen(char *str);
|
||||||
bool string_contains_hyphen_len(char *str, size_t len);
|
bool string_contains_hyphen_len(char *str, size_t len);
|
||||||
|
|
||||||
char *string_trim(char *str);
|
LIBPOSTAL_EXPORT char *string_trim(char *str);
|
||||||
|
|
||||||
/* char_array is a dynamic character array defined in collections.h
|
/* char_array is a dynamic character array defined in collections.h
|
||||||
but has a few additional methods related to string manipulation.
|
but has a few additional methods related to string manipulation.
|
||||||
@@ -113,40 +114,40 @@ char_array *char_array_from_string(char *str);
|
|||||||
char_array *char_array_from_string_no_copy(char *str, size_t n);
|
char_array *char_array_from_string_no_copy(char *str, size_t n);
|
||||||
|
|
||||||
// Gets the underlying C string for a char_array
|
// Gets the underlying C string for a char_array
|
||||||
char *char_array_get_string(char_array *array);
|
LIBPOSTAL_EXPORT char *char_array_get_string(char_array *array);
|
||||||
|
|
||||||
// Frees the char_array and returns a standard NUL-terminated string
|
// Frees the char_array and returns a standard NUL-terminated string
|
||||||
char *char_array_to_string(char_array *array);
|
LIBPOSTAL_EXPORT char *char_array_to_string(char_array *array);
|
||||||
|
|
||||||
// Can use strlen(array->a) but this is faster
|
// Can use strlen(array->a) but this is faster
|
||||||
size_t char_array_len(char_array *array);
|
size_t char_array_len(char_array *array);
|
||||||
|
|
||||||
// append_* methods do not NUL-terminate
|
// append_* methods do not NUL-terminate
|
||||||
void char_array_append(char_array *array, char *str);
|
LIBPOSTAL_EXPORT void char_array_append(char_array *array, char *str);
|
||||||
void char_array_append_len(char_array *array, char *str, size_t len);
|
void char_array_append_len(char_array *array, char *str, size_t len);
|
||||||
void char_array_append_reversed(char_array *array, char *str);
|
void char_array_append_reversed(char_array *array, char *str);
|
||||||
void char_array_append_reversed_len(char_array *array, char *str, size_t len);
|
void char_array_append_reversed_len(char_array *array, char *str, size_t len);
|
||||||
// add NUL terminator to a char_array
|
// add NUL terminator to a char_array
|
||||||
void char_array_strip_nul_byte(char_array *array);
|
void char_array_strip_nul_byte(char_array *array);
|
||||||
void char_array_terminate(char_array *array);
|
LIBPOSTAL_EXPORT void char_array_terminate(char_array *array);
|
||||||
|
|
||||||
// add_* methods NUL-terminate without stripping NUL-byte
|
// add_* methods NUL-terminate without stripping NUL-byte
|
||||||
void char_array_add(char_array *array, char *str);
|
void char_array_add(char_array *array, char *str);
|
||||||
void char_array_add_len(char_array *array, char *str, size_t len);
|
void char_array_add_len(char_array *array, char *str, size_t len);
|
||||||
|
|
||||||
// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated
|
// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated
|
||||||
void char_array_cat(char_array *array, char *str);
|
LIBPOSTAL_EXPORT void char_array_cat(char_array *array, char *str);
|
||||||
void char_array_cat_len(char_array *array, char *str, size_t len);
|
void char_array_cat_len(char_array *array, char *str, size_t len);
|
||||||
void char_array_cat_reversed(char_array *array, char *str);
|
LIBPOSTAL_EXPORT void char_array_cat_reversed(char_array *array, char *str);
|
||||||
void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
|
void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
|
||||||
|
|
||||||
// Similar to cat methods but with printf args
|
// Similar to cat methods but with printf args
|
||||||
void char_array_cat_vprintf(char_array *array, char *format, va_list args);
|
void char_array_cat_vprintf(char_array *array, char *format, va_list args);
|
||||||
void char_array_cat_printf(char_array *array, char *format, ...);
|
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...);
|
||||||
|
|
||||||
// Mainly for paths or delimited strings
|
// Mainly for paths or delimited strings
|
||||||
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
||||||
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
LIBPOSTAL_EXPORT void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||||
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||||
|
|
||||||
|
|
||||||
@@ -171,13 +172,13 @@ typedef struct {
|
|||||||
char_array *str;
|
char_array *str;
|
||||||
} cstring_array;
|
} cstring_array;
|
||||||
|
|
||||||
cstring_array *cstring_array_new(void);
|
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void);
|
||||||
|
|
||||||
cstring_array *cstring_array_new_size(size_t size);
|
cstring_array *cstring_array_new_size(size_t size);
|
||||||
|
|
||||||
size_t cstring_array_capacity(cstring_array *self);
|
size_t cstring_array_capacity(cstring_array *self);
|
||||||
size_t cstring_array_used(cstring_array *self);
|
size_t cstring_array_used(cstring_array *self);
|
||||||
size_t cstring_array_num_strings(cstring_array *self);
|
LIBPOSTAL_EXPORT size_t cstring_array_num_strings(cstring_array *self);
|
||||||
void cstring_array_resize(cstring_array *self, size_t size);
|
void cstring_array_resize(cstring_array *self, size_t size);
|
||||||
void cstring_array_clear(cstring_array *self);
|
void cstring_array_clear(cstring_array *self);
|
||||||
|
|
||||||
@@ -185,7 +186,7 @@ cstring_array *cstring_array_from_char_array(char_array *str);
|
|||||||
cstring_array *cstring_array_from_strings(char **strings, size_t n);
|
cstring_array *cstring_array_from_strings(char **strings, size_t n);
|
||||||
|
|
||||||
// Convert cstring_array to an array of n C strings and destroy the cstring_array
|
// Convert cstring_array to an array of n C strings and destroy the cstring_array
|
||||||
char **cstring_array_to_strings(cstring_array *self);
|
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self);
|
||||||
|
|
||||||
// Split on delimiter
|
// Split on delimiter
|
||||||
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count);
|
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count);
|
||||||
@@ -193,7 +194,7 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa
|
|||||||
cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count);
|
cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count);
|
||||||
|
|
||||||
// Split on delimiter by replacing (single character) separator with the NUL byte in the original string
|
// Split on delimiter by replacing (single character) separator with the NUL byte in the original string
|
||||||
cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
|
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
|
||||||
|
|
||||||
uint32_t cstring_array_start_token(cstring_array *self);
|
uint32_t cstring_array_start_token(cstring_array *self);
|
||||||
uint32_t cstring_array_add_string(cstring_array *self, char *str);
|
uint32_t cstring_array_add_string(cstring_array *self, char *str);
|
||||||
@@ -207,10 +208,10 @@ void cstring_array_cat_string_len(cstring_array *self, char *str, size_t len);
|
|||||||
|
|
||||||
void cstring_array_terminate(cstring_array *self);
|
void cstring_array_terminate(cstring_array *self);
|
||||||
int32_t cstring_array_get_offset(cstring_array *self, uint32_t i);
|
int32_t cstring_array_get_offset(cstring_array *self, uint32_t i);
|
||||||
char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
LIBPOSTAL_EXPORT char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
||||||
int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
LIBPOSTAL_EXPORT int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
||||||
|
|
||||||
void cstring_array_destroy(cstring_array *self);
|
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self);
|
||||||
|
|
||||||
#define cstring_array_foreach(array, i, s, code) { \
|
#define cstring_array_foreach(array, i, s, code) { \
|
||||||
for (int __si = 0; __si < array->indices->n; __si++) { \
|
for (int __si = 0; __si < array->indices->n; __si++) { \
|
||||||
@@ -246,16 +247,16 @@ typedef struct string_tree {
|
|||||||
cstring_array *strings;
|
cstring_array *strings;
|
||||||
} string_tree_t;
|
} string_tree_t;
|
||||||
|
|
||||||
string_tree_t *string_tree_new(void);
|
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void);
|
||||||
string_tree_t *string_tree_new_size(size_t size);
|
string_tree_t *string_tree_new_size(size_t size);
|
||||||
|
|
||||||
// get
|
// get
|
||||||
char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative);
|
char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative);
|
||||||
|
|
||||||
// finalize
|
// finalize
|
||||||
void string_tree_finalize_token(string_tree_t *self);
|
LIBPOSTAL_EXPORT void string_tree_finalize_token(string_tree_t *self);
|
||||||
// terminated
|
// terminated
|
||||||
void string_tree_add_string(string_tree_t *self, char *str);
|
LIBPOSTAL_EXPORT void string_tree_add_string(string_tree_t *self, char *str);
|
||||||
void string_tree_add_string_len(string_tree_t *self, char *str, size_t len);
|
void string_tree_add_string_len(string_tree_t *self, char *str, size_t len);
|
||||||
// unterminated
|
// unterminated
|
||||||
void string_tree_append_string(string_tree_t *self, char *str);
|
void string_tree_append_string(string_tree_t *self, char *str);
|
||||||
@@ -264,9 +265,9 @@ void string_tree_append_string_len(string_tree_t *self, char *str, size_t len);
|
|||||||
uint32_t string_tree_num_tokens(string_tree_t *self);
|
uint32_t string_tree_num_tokens(string_tree_t *self);
|
||||||
uint32_t string_tree_num_strings(string_tree_t *self);
|
uint32_t string_tree_num_strings(string_tree_t *self);
|
||||||
|
|
||||||
uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
|
LIBPOSTAL_EXPORT uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
|
||||||
|
|
||||||
void string_tree_destroy(string_tree_t *self);
|
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self);
|
||||||
|
|
||||||
typedef struct string_tree_iterator {
|
typedef struct string_tree_iterator {
|
||||||
string_tree_t *tree;
|
string_tree_t *tree;
|
||||||
@@ -275,11 +276,11 @@ typedef struct string_tree_iterator {
|
|||||||
uint32_t remaining;
|
uint32_t remaining;
|
||||||
} string_tree_iterator_t;
|
} string_tree_iterator_t;
|
||||||
|
|
||||||
string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
|
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
|
||||||
void string_tree_iterator_next(string_tree_iterator_t *self);
|
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self);
|
||||||
char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i);
|
char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i);
|
||||||
bool string_tree_iterator_done(string_tree_iterator_t *self);
|
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self);
|
||||||
void string_tree_iterator_destroy(string_tree_iterator_t *self);
|
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self);
|
||||||
|
|
||||||
|
|
||||||
#define string_tree_iterator_foreach_token(iter, s, code) { \
|
#define string_tree_iterator_foreach_token(iter, s, code) { \
|
||||||
|
|||||||
@@ -665,7 +665,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
|
|||||||
return char_array_to_string(ret);
|
return char_array_to_string(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *transliterate(char *trans_name, char *str, size_t len) {
|
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len) {
|
||||||
if (trans_name == NULL || str == NULL) return NULL;
|
if (trans_name == NULL || str == NULL) return NULL;
|
||||||
|
|
||||||
transliteration_table_t *trans_table = get_transliteration_table();
|
transliteration_table_t *trans_table = get_transliteration_table();
|
||||||
@@ -1977,7 +1977,7 @@ bool transliteration_module_init(void) {
|
|||||||
return trans_table != NULL;
|
return trans_table != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool transliteration_module_setup(char *filename) {
|
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) {
|
||||||
if (trans_table == NULL) {
|
if (trans_table == NULL) {
|
||||||
return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename);
|
return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename);
|
||||||
}
|
}
|
||||||
@@ -1986,7 +1986,7 @@ bool transliteration_module_setup(char *filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void transliteration_module_teardown(void) {
|
LIBPOSTAL_EXPORT void transliteration_module_teardown(void) {
|
||||||
transliteration_table_destroy();
|
transliteration_table_destroy();
|
||||||
trans_table = NULL;
|
trans_table = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#include "trie_search.h"
|
#include "trie_search.h"
|
||||||
#include "unicode_scripts.h"
|
#include "unicode_scripts.h"
|
||||||
#include "strndup.h"
|
#include "strndup.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
#define LATIN_ASCII "latin-ascii"
|
#define LATIN_ASCII "latin-ascii"
|
||||||
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
||||||
@@ -152,7 +153,7 @@ void transliterator_destroy(transliterator_t *self);
|
|||||||
bool transliteration_table_add_transliterator(transliterator_t *trans);
|
bool transliteration_table_add_transliterator(transliterator_t *trans);
|
||||||
|
|
||||||
transliterator_t *get_transliterator(char *name);
|
transliterator_t *get_transliterator(char *name);
|
||||||
char *transliterate(char *trans_name, char *str, size_t len);
|
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len);
|
||||||
|
|
||||||
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
|
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
|
||||||
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
|
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
|
||||||
@@ -172,7 +173,7 @@ bool transliteration_table_save(char *filename);
|
|||||||
|
|
||||||
// Module setup/teardown
|
// Module setup/teardown
|
||||||
bool transliteration_module_init(void);
|
bool transliteration_module_init(void);
|
||||||
bool transliteration_module_setup(char *filename);
|
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename);
|
||||||
void transliteration_module_teardown(void);
|
LIBPOSTAL_EXPORT void transliteration_module_teardown(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -96,7 +96,7 @@ trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size) {
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
trie_t *trie_new(void) {
|
LIBPOSTAL_EXPORT trie_t *trie_new(void) {
|
||||||
return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET));
|
return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -661,7 +661,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, ui
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline bool trie_add(trie_t *self, char *key, uint32_t data) {
|
LIBPOSTAL_EXPORT inline bool trie_add(trie_t *self, char *key, uint32_t data) {
|
||||||
size_t len = strlen(key);
|
size_t len = strlen(key);
|
||||||
if (len == 0) return false;
|
if (len == 0) return false;
|
||||||
return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data);
|
return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data);
|
||||||
@@ -754,7 +754,7 @@ inline bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
|
LIBPOSTAL_EXPORT inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
|
||||||
uint32_t node_id = trie_get(self, key);
|
uint32_t node_id = trie_get(self, key);
|
||||||
return trie_get_data_at_index(self, node_id, data);
|
return trie_get_data_at_index(self, node_id, data);
|
||||||
}
|
}
|
||||||
@@ -899,7 +899,7 @@ inline uint32_t trie_num_keys(trie_t *self) {
|
|||||||
/*
|
/*
|
||||||
Destructor
|
Destructor
|
||||||
*/
|
*/
|
||||||
void trie_destroy(trie_t *self) {
|
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self) {
|
||||||
if (!self)
|
if (!self)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@
|
|||||||
#include "klib/kvec.h"
|
#include "klib/kvec.h"
|
||||||
#include "log/log.h"
|
#include "log/log.h"
|
||||||
#include "string_utils.h"
|
#include "string_utils.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
#define TRIE_SIGNATURE 0xABABABAB
|
#define TRIE_SIGNATURE 0xABABABAB
|
||||||
#define NULL_NODE_ID 0
|
#define NULL_NODE_ID 0
|
||||||
@@ -79,7 +80,7 @@ typedef struct trie {
|
|||||||
} trie_t;
|
} trie_t;
|
||||||
|
|
||||||
trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size);
|
trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size);
|
||||||
trie_t *trie_new(void);
|
LIBPOSTAL_EXPORT trie_t *trie_new(void);
|
||||||
|
|
||||||
uint32_t trie_get_char_index(trie_t *self, unsigned char c);
|
uint32_t trie_get_char_index(trie_t *self, unsigned char c);
|
||||||
uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c);
|
uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c);
|
||||||
@@ -97,7 +98,7 @@ trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node);
|
|||||||
bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node);
|
bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node);
|
||||||
|
|
||||||
bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data);
|
bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data);
|
||||||
bool trie_get_data(trie_t *self, char *key, uint32_t *data);
|
LIBPOSTAL_EXPORT bool trie_get_data(trie_t *self, char *key, uint32_t *data);
|
||||||
bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data);
|
bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data);
|
||||||
bool trie_set_data(trie_t *self, char *key, uint32_t data);
|
bool trie_set_data(trie_t *self, char *key, uint32_t data);
|
||||||
|
|
||||||
@@ -113,7 +114,7 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai
|
|||||||
void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data);
|
void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data);
|
||||||
|
|
||||||
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data);
|
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data);
|
||||||
bool trie_add(trie_t *self, char *key, uint32_t data);
|
LIBPOSTAL_EXPORT bool trie_add(trie_t *self, char *key, uint32_t data);
|
||||||
bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data);
|
bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data);
|
||||||
bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
|
bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
|
||||||
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
|
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
|
||||||
@@ -146,7 +147,7 @@ bool trie_save(trie_t *self, char *path);
|
|||||||
trie_t *trie_read(FILE *file);
|
trie_t *trie_read(FILE *file);
|
||||||
trie_t *trie_load(char *path);
|
trie_t *trie_load(char *path);
|
||||||
|
|
||||||
void trie_destroy(trie_t *self);
|
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -465,7 +465,7 @@ inline bool trie_search_tokens_with_phrases(trie_t *self, char *str, token_array
|
|||||||
return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases);
|
return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
|
LIBPOSTAL_EXPORT inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
|
||||||
phrase_array *phrases = NULL;
|
phrase_array *phrases = NULL;
|
||||||
if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) {
|
if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
#include "tokens.h"
|
#include "tokens.h"
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
#include "utf8proc/utf8proc.h"
|
#include "utf8proc/utf8proc.h"
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
typedef struct phrase {
|
typedef struct phrase {
|
||||||
uint32_t start;
|
uint32_t start;
|
||||||
@@ -31,7 +32,7 @@ VECTOR_INIT(phrase_array, phrase_t)
|
|||||||
phrase_array *trie_search(trie_t *self, char *text);
|
phrase_array *trie_search(trie_t *self, char *text);
|
||||||
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
|
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
|
||||||
bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases);
|
bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases);
|
||||||
phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
|
LIBPOSTAL_EXPORT phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
|
||||||
bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases);
|
bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases);
|
||||||
bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases);
|
bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases);
|
||||||
phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id);
|
phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id);
|
||||||
|
|||||||
Reference in New Issue
Block a user