Removing EXPORT statements from all source files and most header files, leaving only the exports for the main API in libpostal.h. Modified Makefiles so that all the test apps build without having extra functions exported from libpostal.
This commit is contained in:
@@ -9,12 +9,12 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||
|
||||
# Wonky but have to be able to override the user's optimization level to compile the scanner
|
||||
# as it takes an unreasonably long time to compile with the optimizer on.
|
||||
CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS
|
||||
CFLAGS =
|
||||
|
||||
lib_LTLIBRARIES = libpostal.la
|
||||
libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
|
||||
libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
|
||||
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
|
||||
libpostal_la_CFLAGS = $(CFLAGS_O2)
|
||||
libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS
|
||||
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined
|
||||
|
||||
dist_bin_SCRIPTS = libpostal_data
|
||||
@@ -24,41 +24,41 @@ dist_bin_SCRIPTS = libpostal_data
|
||||
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
|
||||
noinst_LTLIBRARIES = libscanner.la
|
||||
libscanner_la_SOURCES = klib/drand48.c scanner.c
|
||||
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA)
|
||||
libscanner_la_CFLAGS = $(CFLAGS_O0) -D LIBPOSTAL_EXPORTS $(CFLAGS_SCANNER_EXTRA)
|
||||
|
||||
noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test
|
||||
|
||||
libpostal_SOURCES = main.c json_encode.c
|
||||
libpostal_SOURCES = strndup.c main.c json_encode.c file_utils.c string_utils.c utf8proc/utf8proc.c
|
||||
libpostal_LDADD = libpostal.la
|
||||
libpostal_CFLAGS = $(CFLAGS_O3)
|
||||
bench_SOURCES = bench.c
|
||||
bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
|
||||
bench_CFLAGS = $(CFLAGS_O3)
|
||||
address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c
|
||||
address_parser_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c
|
||||
address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
|
||||
address_parser_CFLAGS = $(CFLAGS_O3)
|
||||
|
||||
build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
|
||||
build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
|
||||
build_address_dictionary_CFLAGS = $(CFLAGS_O3)
|
||||
build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c
|
||||
build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c
|
||||
build_numex_table_CFLAGS = $(CFLAGS_O3)
|
||||
build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c
|
||||
build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c
|
||||
build_trans_table_CFLAGS = $(CFLAGS_O3)
|
||||
address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c
|
||||
address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c
|
||||
address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
address_parser_train_CFLAGS = $(CFLAGS_O3)
|
||||
|
||||
address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c
|
||||
address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c
|
||||
address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
address_parser_test_CFLAGS = $(CFLAGS_O3)
|
||||
|
||||
language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c
|
||||
language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c
|
||||
language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
language_classifier_train_CFLAGS = $(CFLAGS_O3)
|
||||
language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
|
||||
language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
|
||||
language_classifier_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
language_classifier_CFLAGS = $(CFLAGS_O3)
|
||||
language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
|
||||
language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
|
||||
language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
language_classifier_test_CFLAGS = $(CFLAGS_O3)
|
||||
|
||||
|
||||
16
src/export.h
16
src/export.h
@@ -1,16 +0,0 @@
|
||||
#ifndef EXPORT_H
|
||||
#define EXPORT_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef LIBPOSTAL_EXPORTS
|
||||
#define LIBPOSTAL_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define LIBPOSTAL_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
#elif __GNUC__ >= 4
|
||||
#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default")))
|
||||
#else
|
||||
#define LIBPOSTAL_EXPORT
|
||||
#endif
|
||||
|
||||
#endif //EXPORT_H
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "features.h"
|
||||
|
||||
|
||||
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...) {
|
||||
void feature_array_add(cstring_array *features, size_t count, ...) {
|
||||
if (count <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -5,13 +5,12 @@
|
||||
#include <stdarg.h>
|
||||
#include "collections.h"
|
||||
#include "string_utils.h"
|
||||
#include "export.h"
|
||||
|
||||
#define FEATURE_SEPARATOR_CHAR "|"
|
||||
|
||||
// Add feature to array
|
||||
|
||||
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...);
|
||||
void feature_array_add(cstring_array *features, size_t count, ...);
|
||||
|
||||
// Add feature using printf format
|
||||
void feature_array_add_printf(cstring_array *features, char *format, ...);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "file_utils.h"
|
||||
|
||||
LIBPOSTAL_EXPORT char *file_getline(FILE * f)
|
||||
char *file_getline(FILE * f)
|
||||
{
|
||||
char buf[BUFSIZ];
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "export.h"
|
||||
#include "libpostal_config.h"
|
||||
#include "string_utils.h"
|
||||
|
||||
@@ -53,7 +52,7 @@
|
||||
#define COMMA_SEPARATOR ","
|
||||
#define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR)
|
||||
|
||||
LIBPOSTAL_EXPORT char *file_getline(FILE * f);
|
||||
char *file_getline(FILE * f);
|
||||
|
||||
bool file_exists(char *filename);
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = {
|
||||
.roman_numerals = true
|
||||
};
|
||||
|
||||
LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void) {
|
||||
libpostal_normalize_options_t libpostal_get_default_options(void) {
|
||||
return LIBPOSTAL_DEFAULT_OPTIONS;
|
||||
}
|
||||
|
||||
@@ -942,7 +942,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_
|
||||
char_array_destroy(temp_string);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
|
||||
char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
|
||||
options.address_components |= LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
uint64_t normalize_string_options = get_normalize_string_options(options);
|
||||
@@ -1021,14 +1021,14 @@ LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normaliz
|
||||
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n) {
|
||||
void libpostal_expansion_array_destroy(char **expansions, size_t n) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
free(expansions[i]);
|
||||
}
|
||||
free(expansions);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
|
||||
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
for (size_t i = 0; i < self->num_components; i++) {
|
||||
@@ -1057,11 +1057,11 @@ static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIO
|
||||
.country = NULL
|
||||
};
|
||||
|
||||
LIBPOSTAL_EXPORT inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
|
||||
inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
|
||||
return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
|
||||
libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
|
||||
libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country);
|
||||
|
||||
if (parsed == NULL) {
|
||||
@@ -1073,7 +1073,7 @@ LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(ch
|
||||
return parsed;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) {
|
||||
bool libpostal_setup_datadir(char *datadir) {
|
||||
char *transliteration_path = NULL;
|
||||
char *numex_path = NULL;
|
||||
char *address_dictionary_path = NULL;
|
||||
@@ -1114,11 +1114,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) {
|
||||
return true;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup(void) {
|
||||
bool libpostal_setup(void) {
|
||||
return libpostal_setup_datadir(NULL);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
char *language_classifier_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
@@ -1137,11 +1137,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir)
|
||||
return true;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void) {
|
||||
bool libpostal_setup_language_classifier(void) {
|
||||
return libpostal_setup_language_classifier_datadir(NULL);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
char *parser_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
@@ -1160,11 +1160,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
return true;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_parser(void) {
|
||||
bool libpostal_setup_parser(void) {
|
||||
return libpostal_setup_parser_datadir(NULL);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown(void) {
|
||||
void libpostal_teardown(void) {
|
||||
transliteration_module_teardown();
|
||||
|
||||
numex_module_teardown();
|
||||
@@ -1172,10 +1172,10 @@ LIBPOSTAL_EXPORT void libpostal_teardown(void) {
|
||||
address_dictionary_module_teardown();
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void) {
|
||||
void libpostal_teardown_language_classifier(void) {
|
||||
language_classifier_module_teardown();
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown_parser(void) {
|
||||
void libpostal_teardown_parser(void) {
|
||||
address_parser_module_teardown();
|
||||
}
|
||||
|
||||
@@ -9,7 +9,18 @@ extern "C" {
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "export.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef LIBPOSTAL_EXPORTS
|
||||
#define LIBPOSTAL_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define LIBPOSTAL_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
#elif __GNUC__ >= 4
|
||||
#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default")))
|
||||
#else
|
||||
#define LIBPOSTAL_EXPORT
|
||||
#endif
|
||||
|
||||
#define LIBPOSTAL_MAX_LANGUAGE_LEN 4
|
||||
|
||||
|
||||
@@ -599,7 +599,7 @@ bool numex_module_init(void) {
|
||||
Must be called only once before the module can be used
|
||||
*/
|
||||
|
||||
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) {
|
||||
bool numex_module_setup(char *filename) {
|
||||
if (numex_table == NULL) {
|
||||
return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename);
|
||||
}
|
||||
@@ -610,7 +610,7 @@ LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) {
|
||||
Called once when done with the module (usually at
|
||||
the end of a main method)
|
||||
*/
|
||||
LIBPOSTAL_EXPORT void numex_module_teardown(void) {
|
||||
void numex_module_teardown(void) {
|
||||
numex_table_destroy();
|
||||
numex_table = NULL;
|
||||
}
|
||||
@@ -1101,7 +1101,7 @@ size_t ordinal_suffix_len(char *str, size_t len, char *lang) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang) {
|
||||
char *replace_numeric_expressions(char *str, char *lang) {
|
||||
numex_result_array *results = convert_numeric_expressions(str, lang);
|
||||
if (results == NULL) return NULL;
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "tokens.h"
|
||||
#include "trie.h"
|
||||
#include "trie_search.h"
|
||||
#include "export.h"
|
||||
|
||||
#define NUMEX_DATA_FILE "numex.dat"
|
||||
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
|
||||
@@ -147,7 +146,7 @@ typedef struct numex_result {
|
||||
|
||||
VECTOR_INIT(numex_result_array, numex_result_t)
|
||||
|
||||
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang);
|
||||
char *replace_numeric_expressions(char *str, char *lang);
|
||||
numex_result_array *convert_numeric_expressions(char *str, char *lang);
|
||||
size_t ordinal_suffix_len(char *s, size_t len, char *lang);
|
||||
size_t possible_ordinal_digit_len(char *str, size_t len);
|
||||
@@ -156,9 +155,9 @@ bool numex_table_write(FILE *file);
|
||||
bool numex_table_save(char *filename);
|
||||
|
||||
bool numex_module_init(void);
|
||||
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename);
|
||||
LIBPOSTAL_EXPORT void numex_module_teardown(void);
|
||||
|
||||
bool numex_module_setup(char *filename);
|
||||
void numex_module_teardown(void);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -310240,7 +310240,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
|
||||
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) {
|
||||
token_array *tokenize_keep_whitespace(const char *input) {
|
||||
token_array *tokens = token_array_new();
|
||||
tokenize_add_tokens(tokens, input, strlen(input), true);
|
||||
return tokens;
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#include "token_types.h"
|
||||
#include "tokens.h"
|
||||
#include "export.h"
|
||||
|
||||
typedef struct scanner {
|
||||
unsigned char *src, *cursor, *start, *end;
|
||||
@@ -20,7 +19,7 @@ uint16_t scan_token(scanner_t *s);
|
||||
scanner_t scanner_from_string(const char *input, size_t len);
|
||||
|
||||
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
|
||||
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input);
|
||||
token_array *tokenize_keep_whitespace(const char *input);
|
||||
token_array *tokenize(const char *input);
|
||||
|
||||
|
||||
|
||||
@@ -255,7 +255,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
|
||||
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) {
|
||||
token_array *tokenize_keep_whitespace(const char *input) {
|
||||
token_array *tokens = token_array_new();
|
||||
tokenize_add_tokens(tokens, input, strlen(input), true);
|
||||
return tokens;
|
||||
|
||||
@@ -58,7 +58,7 @@ inline size_t string_common_suffix(const char *str1, const char *str2) {
|
||||
return common_suffix;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline bool string_starts_with(const char *str, const char *start) {
|
||||
inline bool string_starts_with(const char *str, const char *start) {
|
||||
for (; *start; str++, start++)
|
||||
if (*str != *start)
|
||||
return false;
|
||||
@@ -72,7 +72,7 @@ inline bool string_ends_with(const char *str, const char *ending) {
|
||||
return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline bool string_equals(const char *s1, const char *s2) {
|
||||
inline bool string_equals(const char *s1, const char *s2) {
|
||||
if (s1 == NULL || s2 == NULL) return false;
|
||||
return strcmp(s1, s2) == 0;
|
||||
}
|
||||
@@ -169,7 +169,7 @@ uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_re
|
||||
return num_replacements;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
|
||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
|
||||
ssize_t len = 0;
|
||||
|
||||
const uint8_t *ptr = str + start;
|
||||
@@ -188,7 +188,7 @@ LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t s
|
||||
return ret_len;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s) {
|
||||
char *utf8_reversed_string(const char *s) {
|
||||
int32_t unich;
|
||||
ssize_t len, remaining;
|
||||
|
||||
@@ -478,7 +478,7 @@ size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *st
|
||||
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
|
||||
inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
|
||||
return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2));
|
||||
}
|
||||
|
||||
@@ -606,7 +606,7 @@ size_t string_left_spaces_len(char *str, size_t len) {
|
||||
return spaces;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT char *string_trim(char *str) {
|
||||
char *string_trim(char *str) {
|
||||
size_t len = strlen(str);
|
||||
size_t left_spaces = string_left_spaces_len(str, len);
|
||||
size_t right_spaces = string_right_spaces_len(str, len);
|
||||
@@ -630,14 +630,14 @@ char_array *char_array_from_string_no_copy(char *str, size_t n) {
|
||||
return array;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline char *char_array_get_string(char_array *array) {
|
||||
inline char *char_array_get_string(char_array *array) {
|
||||
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
||||
char_array_terminate(array);
|
||||
}
|
||||
return array->a;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline char *char_array_to_string(char_array *array) {
|
||||
inline char *char_array_to_string(char_array *array) {
|
||||
if (array->n == 0 || array->a[array->n - 1] != '\0') {
|
||||
char_array_terminate(array);
|
||||
}
|
||||
@@ -662,7 +662,7 @@ inline size_t char_array_len(char_array *array) {
|
||||
}
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline void char_array_append(char_array *array, char *str) {
|
||||
inline void char_array_append(char_array *array, char *str) {
|
||||
while(*str) {
|
||||
char_array_push(array, *str++);
|
||||
}
|
||||
@@ -696,11 +696,11 @@ inline void char_array_append_reversed(char_array *array, char *str) {
|
||||
char_array_append_reversed_len(array, str, len);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline void char_array_terminate(char_array *array) {
|
||||
inline void char_array_terminate(char_array *array) {
|
||||
char_array_push(array, '\0');
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline void char_array_cat(char_array *array, char *str) {
|
||||
inline void char_array_cat(char_array *array, char *str) {
|
||||
char_array_strip_nul_byte(array);
|
||||
char_array_append(array, str);
|
||||
char_array_terminate(array);
|
||||
@@ -713,7 +713,7 @@ inline void char_array_cat_len(char_array *array, char *str, size_t len) {
|
||||
}
|
||||
|
||||
|
||||
LIBPOSTAL_EXPORT inline void char_array_cat_reversed(char_array *array, char *str) {
|
||||
inline void char_array_cat_reversed(char_array *array, char *str) {
|
||||
char_array_strip_nul_byte(array);
|
||||
char_array_append_reversed(array, str);
|
||||
char_array_terminate(array);
|
||||
@@ -764,7 +764,7 @@ void char_array_add_vjoined(char_array *array, char *separator, bool strip_separ
|
||||
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
||||
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
|
||||
va_list args;
|
||||
va_start(args, count);
|
||||
char_array_add_vjoined(array, separator, strip_separator, count, args);
|
||||
@@ -808,14 +808,14 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args) {
|
||||
}
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...) {
|
||||
void char_array_cat_printf(char_array *array, char *format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
char_array_cat_vprintf(array, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) {
|
||||
cstring_array *cstring_array_new(void) {
|
||||
cstring_array *array = malloc(sizeof(cstring_array));
|
||||
if (array == NULL) return NULL;
|
||||
|
||||
@@ -834,7 +834,7 @@ LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) {
|
||||
return array;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self) {
|
||||
void cstring_array_destroy(cstring_array *self) {
|
||||
if (self == NULL) return;
|
||||
if (self->indices) {
|
||||
uint32_array_destroy(self->indices);
|
||||
@@ -889,7 +889,7 @@ inline size_t cstring_array_used(cstring_array *self) {
|
||||
return self->str->n;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline size_t cstring_array_num_strings(cstring_array *self) {
|
||||
inline size_t cstring_array_num_strings(cstring_array *self) {
|
||||
if (self == NULL) return 0;
|
||||
return self->indices->n;
|
||||
}
|
||||
@@ -958,13 +958,13 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) {
|
||||
return (int32_t)self->indices->a[i];
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
||||
inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
||||
int32_t data_index = cstring_array_get_offset(self, i);
|
||||
if (data_index < 0) return NULL;
|
||||
return self->str->a + data_index;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
|
||||
inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
|
||||
if (INVALID_INDEX(i, self->indices->n)) {
|
||||
return -1;
|
||||
}
|
||||
@@ -1015,7 +1015,7 @@ cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *sep
|
||||
}
|
||||
|
||||
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
|
||||
cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
|
||||
*count = 0;
|
||||
char *ptr = str;
|
||||
size_t len = strlen(str);
|
||||
@@ -1034,7 +1034,7 @@ LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char sepa
|
||||
}
|
||||
|
||||
|
||||
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self) {
|
||||
char **cstring_array_to_strings(cstring_array *self) {
|
||||
char **strings = malloc(self->indices->n * sizeof(char *));
|
||||
|
||||
for (int i = 0; i < cstring_array_num_strings(self); i++) {
|
||||
@@ -1073,7 +1073,7 @@ string_tree_t *string_tree_new_size(size_t size) {
|
||||
|
||||
#define DEFAULT_STRING_TREE_SIZE 8
|
||||
|
||||
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void) {
|
||||
string_tree_t *string_tree_new(void) {
|
||||
return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE);
|
||||
}
|
||||
|
||||
@@ -1085,12 +1085,12 @@ inline char *string_tree_get_alternative(string_tree_t *self, size_t token_index
|
||||
return cstring_array_get_string(self->strings, token_start + alternative);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline void string_tree_finalize_token(string_tree_t *self) {
|
||||
inline void string_tree_finalize_token(string_tree_t *self) {
|
||||
uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings));
|
||||
}
|
||||
|
||||
// terminated
|
||||
LIBPOSTAL_EXPORT inline void string_tree_add_string(string_tree_t *self, char *str) {
|
||||
inline void string_tree_add_string(string_tree_t *self, char *str) {
|
||||
cstring_array_add_string(self->strings, str);
|
||||
}
|
||||
|
||||
@@ -1115,13 +1115,13 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) {
|
||||
return (uint32_t)cstring_array_num_strings(self->strings);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
|
||||
inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
|
||||
if (i >= self->token_indices->n) return 0;
|
||||
uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i];
|
||||
return n > 0 ? n : 1;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) {
|
||||
void string_tree_destroy(string_tree_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
if (self->token_indices != NULL) {
|
||||
@@ -1135,7 +1135,7 @@ LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) {
|
||||
free(self);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
||||
string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
|
||||
string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t));
|
||||
self->tree = tree;
|
||||
|
||||
@@ -1166,7 +1166,7 @@ LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t
|
||||
return self;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self) {
|
||||
void string_tree_iterator_next(string_tree_iterator_t *self) {
|
||||
if (self->remaining > 0) {
|
||||
int i;
|
||||
for (i = self->num_tokens - 1; i >= 0; i--) {
|
||||
@@ -1195,11 +1195,11 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i)
|
||||
return cstring_array_get_string(self->tree->strings, base_index + offset);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
||||
bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
||||
return self->remaining == 0;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self) {
|
||||
void string_tree_iterator_destroy(string_tree_iterator_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
if (self->path) {
|
||||
|
||||
@@ -16,7 +16,6 @@ Utilities for manipulating strings in C.
|
||||
#include "collections.h"
|
||||
#include "utf8proc/utf8proc.h"
|
||||
#include "vector.h"
|
||||
#include "export.h"
|
||||
|
||||
#define MAX_UTF8_CHAR_SIZE 4
|
||||
|
||||
@@ -60,16 +59,16 @@ char *string_replace_char(char *str, char c1, char c2);
|
||||
bool string_replace_with_array(char *str, char *replace, char *with, char_array *result);
|
||||
char *string_replace(char *str, char *replace, char *with);
|
||||
|
||||
LIBPOSTAL_EXPORT bool string_starts_with(const char *str, const char *start);
|
||||
bool string_starts_with(const char *str, const char *start);
|
||||
bool string_ends_with(const char *str, const char *ending);
|
||||
|
||||
LIBPOSTAL_EXPORT bool string_equals(const char *s1, const char *s2);
|
||||
bool string_equals(const char *s1, const char *s2);
|
||||
|
||||
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
|
||||
|
||||
// UTF-8 string methods
|
||||
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||
|
||||
// Casing functions return a copy, caller frees
|
||||
char *utf8_lower_options(const char *s, utf8proc_option_t options);
|
||||
@@ -81,7 +80,7 @@ int utf8_compare(const char *str1, const char *str2);
|
||||
int utf8_compare_len(const char *str1, const char *str2, size_t len);
|
||||
size_t utf8_common_prefix(const char *str1, const char *str2);
|
||||
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
|
||||
LIBPOSTAL_EXPORT size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
||||
|
||||
bool utf8_is_hyphen(int32_t ch);
|
||||
@@ -100,7 +99,7 @@ ssize_t string_next_hyphen_index(char *str, size_t len);
|
||||
bool string_contains_hyphen(char *str);
|
||||
bool string_contains_hyphen_len(char *str, size_t len);
|
||||
|
||||
LIBPOSTAL_EXPORT char *string_trim(char *str);
|
||||
char *string_trim(char *str);
|
||||
|
||||
/* char_array is a dynamic character array defined in collections.h
|
||||
but has a few additional methods related to string manipulation.
|
||||
@@ -113,40 +112,40 @@ char_array *char_array_from_string(char *str);
|
||||
char_array *char_array_from_string_no_copy(char *str, size_t n);
|
||||
|
||||
// Gets the underlying C string for a char_array
|
||||
LIBPOSTAL_EXPORT char *char_array_get_string(char_array *array);
|
||||
char *char_array_get_string(char_array *array);
|
||||
|
||||
// Frees the char_array and returns a standard NUL-terminated string
|
||||
LIBPOSTAL_EXPORT char *char_array_to_string(char_array *array);
|
||||
char *char_array_to_string(char_array *array);
|
||||
|
||||
// Can use strlen(array->a) but this is faster
|
||||
size_t char_array_len(char_array *array);
|
||||
|
||||
// append_* methods do not NUL-terminate
|
||||
LIBPOSTAL_EXPORT void char_array_append(char_array *array, char *str);
|
||||
void char_array_append(char_array *array, char *str);
|
||||
void char_array_append_len(char_array *array, char *str, size_t len);
|
||||
void char_array_append_reversed(char_array *array, char *str);
|
||||
void char_array_append_reversed_len(char_array *array, char *str, size_t len);
|
||||
// add NUL terminator to a char_array
|
||||
void char_array_strip_nul_byte(char_array *array);
|
||||
LIBPOSTAL_EXPORT void char_array_terminate(char_array *array);
|
||||
void char_array_terminate(char_array *array);
|
||||
|
||||
// add_* methods NUL-terminate without stripping NUL-byte
|
||||
void char_array_add(char_array *array, char *str);
|
||||
void char_array_add_len(char_array *array, char *str, size_t len);
|
||||
|
||||
// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated
|
||||
LIBPOSTAL_EXPORT void char_array_cat(char_array *array, char *str);
|
||||
void char_array_cat(char_array *array, char *str);
|
||||
void char_array_cat_len(char_array *array, char *str, size_t len);
|
||||
LIBPOSTAL_EXPORT void char_array_cat_reversed(char_array *array, char *str);
|
||||
void char_array_cat_reversed(char_array *array, char *str);
|
||||
void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
|
||||
|
||||
// Similar to cat methods but with printf args
|
||||
void char_array_cat_vprintf(char_array *array, char *format, va_list args);
|
||||
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...);
|
||||
void char_array_cat_printf(char_array *array, char *format, ...);
|
||||
|
||||
// Mainly for paths or delimited strings
|
||||
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
|
||||
LIBPOSTAL_EXPORT void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||
|
||||
|
||||
@@ -171,13 +170,13 @@ typedef struct {
|
||||
char_array *str;
|
||||
} cstring_array;
|
||||
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void);
|
||||
cstring_array *cstring_array_new(void);
|
||||
|
||||
cstring_array *cstring_array_new_size(size_t size);
|
||||
|
||||
size_t cstring_array_capacity(cstring_array *self);
|
||||
size_t cstring_array_used(cstring_array *self);
|
||||
LIBPOSTAL_EXPORT size_t cstring_array_num_strings(cstring_array *self);
|
||||
size_t cstring_array_num_strings(cstring_array *self);
|
||||
void cstring_array_resize(cstring_array *self, size_t size);
|
||||
void cstring_array_clear(cstring_array *self);
|
||||
|
||||
@@ -185,7 +184,7 @@ cstring_array *cstring_array_from_char_array(char_array *str);
|
||||
cstring_array *cstring_array_from_strings(char **strings, size_t n);
|
||||
|
||||
// Convert cstring_array to an array of n C strings and destroy the cstring_array
|
||||
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self);
|
||||
char **cstring_array_to_strings(cstring_array *self);
|
||||
|
||||
// Split on delimiter
|
||||
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count);
|
||||
@@ -193,7 +192,7 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa
|
||||
cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count);
|
||||
|
||||
// Split on delimiter by replacing (single character) separator with the NUL byte in the original string
|
||||
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
|
||||
cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
|
||||
|
||||
uint32_t cstring_array_start_token(cstring_array *self);
|
||||
uint32_t cstring_array_add_string(cstring_array *self, char *str);
|
||||
@@ -207,10 +206,10 @@ void cstring_array_cat_string_len(cstring_array *self, char *str, size_t len);
|
||||
|
||||
void cstring_array_terminate(cstring_array *self);
|
||||
int32_t cstring_array_get_offset(cstring_array *self, uint32_t i);
|
||||
LIBPOSTAL_EXPORT char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
||||
LIBPOSTAL_EXPORT int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
||||
char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
||||
int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
||||
|
||||
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self);
|
||||
void cstring_array_destroy(cstring_array *self);
|
||||
|
||||
#define cstring_array_foreach(array, i, s, code) { \
|
||||
for (int __si = 0; __si < array->indices->n; __si++) { \
|
||||
@@ -246,16 +245,16 @@ typedef struct string_tree {
|
||||
cstring_array *strings;
|
||||
} string_tree_t;
|
||||
|
||||
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void);
|
||||
string_tree_t *string_tree_new(void);
|
||||
string_tree_t *string_tree_new_size(size_t size);
|
||||
|
||||
// get
|
||||
char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative);
|
||||
|
||||
// finalize
|
||||
LIBPOSTAL_EXPORT void string_tree_finalize_token(string_tree_t *self);
|
||||
void string_tree_finalize_token(string_tree_t *self);
|
||||
// terminated
|
||||
LIBPOSTAL_EXPORT void string_tree_add_string(string_tree_t *self, char *str);
|
||||
void string_tree_add_string(string_tree_t *self, char *str);
|
||||
void string_tree_add_string_len(string_tree_t *self, char *str, size_t len);
|
||||
// unterminated
|
||||
void string_tree_append_string(string_tree_t *self, char *str);
|
||||
@@ -264,9 +263,9 @@ void string_tree_append_string_len(string_tree_t *self, char *str, size_t len);
|
||||
uint32_t string_tree_num_tokens(string_tree_t *self);
|
||||
uint32_t string_tree_num_strings(string_tree_t *self);
|
||||
|
||||
LIBPOSTAL_EXPORT uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
|
||||
uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
|
||||
|
||||
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self);
|
||||
void string_tree_destroy(string_tree_t *self);
|
||||
|
||||
typedef struct string_tree_iterator {
|
||||
string_tree_t *tree;
|
||||
@@ -275,11 +274,11 @@ typedef struct string_tree_iterator {
|
||||
uint32_t remaining;
|
||||
} string_tree_iterator_t;
|
||||
|
||||
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self);
|
||||
string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
|
||||
void string_tree_iterator_next(string_tree_iterator_t *self);
|
||||
char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i);
|
||||
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self);
|
||||
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self);
|
||||
bool string_tree_iterator_done(string_tree_iterator_t *self);
|
||||
void string_tree_iterator_destroy(string_tree_iterator_t *self);
|
||||
|
||||
|
||||
#define string_tree_iterator_foreach_token(iter, s, code) { \
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef TOKENS_H
|
||||
#define TOKENS_H
|
||||
|
||||
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@@ -35,6 +35,6 @@ char *tokenized_string_get_token(tokenized_string_t *self, uint32_t index);
|
||||
void tokenized_string_destroy(tokenized_string_t *self);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -666,7 +666,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
|
||||
return char_array_to_string(ret);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
if (trans_name == NULL || str == NULL) return NULL;
|
||||
|
||||
transliteration_table_t *trans_table = get_transliteration_table();
|
||||
@@ -1978,7 +1978,7 @@ bool transliteration_module_init(void) {
|
||||
return trans_table != NULL;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) {
|
||||
bool transliteration_module_setup(char *filename) {
|
||||
if (trans_table == NULL) {
|
||||
return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename);
|
||||
}
|
||||
@@ -1987,7 +1987,7 @@ LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) {
|
||||
}
|
||||
|
||||
|
||||
LIBPOSTAL_EXPORT void transliteration_module_teardown(void) {
|
||||
void transliteration_module_teardown(void) {
|
||||
transliteration_table_destroy();
|
||||
trans_table = NULL;
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
#include "trie.h"
|
||||
#include "trie_search.h"
|
||||
#include "unicode_scripts.h"
|
||||
#include "export.h"
|
||||
|
||||
#define LATIN_ASCII "latin-ascii"
|
||||
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
||||
@@ -152,7 +151,7 @@ void transliterator_destroy(transliterator_t *self);
|
||||
bool transliteration_table_add_transliterator(transliterator_t *trans);
|
||||
|
||||
transliterator_t *get_transliterator(char *name);
|
||||
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len);
|
||||
char *transliterate(char *trans_name, char *str, size_t len);
|
||||
|
||||
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
|
||||
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
|
||||
@@ -172,7 +171,7 @@ bool transliteration_table_save(char *filename);
|
||||
|
||||
// Module setup/teardown
|
||||
bool transliteration_module_init(void);
|
||||
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename);
|
||||
LIBPOSTAL_EXPORT void transliteration_module_teardown(void);
|
||||
bool transliteration_module_setup(char *filename);
|
||||
void transliteration_module_teardown(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -96,7 +96,7 @@ trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size) {
|
||||
return self;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT trie_t *trie_new(void) {
|
||||
trie_t *trie_new(void) {
|
||||
return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET));
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, ui
|
||||
}
|
||||
|
||||
|
||||
LIBPOSTAL_EXPORT inline bool trie_add(trie_t *self, char *key, uint32_t data) {
|
||||
inline bool trie_add(trie_t *self, char *key, uint32_t data) {
|
||||
size_t len = strlen(key);
|
||||
if (len == 0) return false;
|
||||
return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data);
|
||||
@@ -754,7 +754,7 @@ inline bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data
|
||||
return true;
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
|
||||
inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
|
||||
uint32_t node_id = trie_get(self, key);
|
||||
return trie_get_data_at_index(self, node_id, data);
|
||||
}
|
||||
@@ -899,7 +899,7 @@ inline uint32_t trie_num_keys(trie_t *self) {
|
||||
/*
|
||||
Destructor
|
||||
*/
|
||||
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self) {
|
||||
void trie_destroy(trie_t *self) {
|
||||
if (!self)
|
||||
return;
|
||||
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include "klib/kvec.h"
|
||||
#include "log/log.h"
|
||||
#include "string_utils.h"
|
||||
#include "export.h"
|
||||
|
||||
#define TRIE_SIGNATURE 0xABABABAB
|
||||
#define NULL_NODE_ID 0
|
||||
@@ -80,7 +79,7 @@ typedef struct trie {
|
||||
} trie_t;
|
||||
|
||||
trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size);
|
||||
LIBPOSTAL_EXPORT trie_t *trie_new(void);
|
||||
trie_t *trie_new(void);
|
||||
|
||||
uint32_t trie_get_char_index(trie_t *self, unsigned char c);
|
||||
uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c);
|
||||
@@ -98,7 +97,7 @@ trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node);
|
||||
bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node);
|
||||
|
||||
bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data);
|
||||
LIBPOSTAL_EXPORT bool trie_get_data(trie_t *self, char *key, uint32_t *data);
|
||||
bool trie_get_data(trie_t *self, char *key, uint32_t *data);
|
||||
bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data);
|
||||
bool trie_set_data(trie_t *self, char *key, uint32_t data);
|
||||
|
||||
@@ -114,7 +113,7 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai
|
||||
void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data);
|
||||
|
||||
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data);
|
||||
LIBPOSTAL_EXPORT bool trie_add(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data);
|
||||
bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
|
||||
@@ -147,7 +146,7 @@ bool trie_save(trie_t *self, char *path);
|
||||
trie_t *trie_read(FILE *file);
|
||||
trie_t *trie_load(char *path);
|
||||
|
||||
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self);
|
||||
void trie_destroy(trie_t *self);
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -465,7 +465,7 @@ inline bool trie_search_tokens_with_phrases(trie_t *self, char *str, token_array
|
||||
return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases);
|
||||
}
|
||||
|
||||
LIBPOSTAL_EXPORT inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
|
||||
inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
|
||||
phrase_array *phrases = NULL;
|
||||
if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) {
|
||||
return NULL;
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#include "tokens.h"
|
||||
#include "vector.h"
|
||||
#include "utf8proc/utf8proc.h"
|
||||
#include "export.h"
|
||||
|
||||
typedef struct phrase {
|
||||
uint32_t start;
|
||||
@@ -32,7 +31,7 @@ VECTOR_INIT(phrase_array, phrase_t)
|
||||
phrase_array *trie_search(trie_t *self, char *text);
|
||||
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
|
||||
bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases);
|
||||
LIBPOSTAL_EXPORT phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
|
||||
phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
|
||||
bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases);
|
||||
bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases);
|
||||
phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id);
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
#include "utf8proc_data.c"
|
||||
|
||||
|
||||
UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
|
||||
const utf8proc_int8_t utf8proc_utf8class[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
@@ -87,11 +87,11 @@ UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
|
||||
be different, being based on ABI compatibility.): */
|
||||
#define STRINGIZEx(x) #x
|
||||
#define STRINGIZE(x) STRINGIZEx(x)
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
|
||||
const char *utf8proc_version(void) {
|
||||
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||
const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||
switch (errcode) {
|
||||
case UTF8PROC_ERROR_NOMEM:
|
||||
return "Memory for processing UTF-8 data could not be allocated.";
|
||||
@@ -109,7 +109,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||
}
|
||||
|
||||
#define utf_cont(ch) (((ch) & 0xc0) == 0x80)
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||
utf8proc_ssize_t utf8proc_iterate(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
||||
) {
|
||||
utf8proc_uint32_t uc;
|
||||
@@ -157,11 +157,11 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||
return 4;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
|
||||
utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
|
||||
return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
||||
utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
||||
if (uc < 0x00) {
|
||||
return 0;
|
||||
} else if (uc < 0x80) {
|
||||
@@ -228,7 +228,7 @@ static const utf8proc_property_t *unsafe_get_property(utf8proc_int32_t uc) {
|
||||
);
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
|
||||
const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
|
||||
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc);
|
||||
}
|
||||
|
||||
@@ -259,18 +259,18 @@ static utf8proc_bool grapheme_break(int lbc, int tbc) {
|
||||
}
|
||||
|
||||
/* return whether there is a grapheme break between codepoints c1 and c2 */
|
||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) {
|
||||
utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) {
|
||||
return grapheme_break(utf8proc_get_property(c1)->boundclass,
|
||||
utf8proc_get_property(c2)->boundclass);
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
||||
utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_mapping;
|
||||
return cl >= 0 ? cl : c;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||
utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_mapping;
|
||||
return cu >= 0 ? cu : c;
|
||||
@@ -278,15 +278,15 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||
|
||||
/* return a character width analogous to wcwidth (except portable and
|
||||
hopefully less buggy than most system wcwidth functions). */
|
||||
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
|
||||
int utf8proc_charwidth(utf8proc_int32_t c) {
|
||||
return utf8proc_get_property(c)->charwidth;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
|
||||
utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
|
||||
return utf8proc_get_property(c)->category;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
||||
const char *utf8proc_category_string(utf8proc_int32_t c) {
|
||||
static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
|
||||
return s[utf8proc_category(c)];
|
||||
}
|
||||
@@ -295,7 +295,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
||||
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||
utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||
const utf8proc_property_t *property;
|
||||
utf8proc_propval_t category;
|
||||
utf8proc_int32_t hangul_sindex;
|
||||
@@ -399,7 +399,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
||||
return 1;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
|
||||
utf8proc_ssize_t utf8proc_decompose(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
|
||||
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
|
||||
) {
|
||||
@@ -461,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
|
||||
return wpos;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
|
||||
utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
|
||||
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
|
||||
ASSERT: 'buffer' has one spare byte of free space at the end! */
|
||||
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
|
||||
@@ -583,7 +583,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
||||
}
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
|
||||
utf8proc_ssize_t utf8proc_map(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
|
||||
) {
|
||||
utf8proc_int32_t *buffer;
|
||||
@@ -612,28 +612,28 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
|
||||
return result;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_DECOMPOSE);
|
||||
return retval;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_COMPOSE);
|
||||
return retval;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
|
||||
return retval;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
||||
|
||||
@@ -111,18 +111,6 @@ typedef bool utf8proc_bool;
|
||||
#endif
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef UTF8PROC_EXPORTS
|
||||
# define UTF8PROC_DLLEXPORT __declspec(dllexport)
|
||||
# else
|
||||
# define UTF8PROC_DLLEXPORT __declspec(dllimport)
|
||||
# endif
|
||||
#elif __GNUC__ >= 4
|
||||
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
|
||||
#else
|
||||
# define UTF8PROC_DLLEXPORT
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -365,20 +353,20 @@ typedef enum {
|
||||
* Array containing the byte lengths of a UTF-8 encoded codepoint based
|
||||
* on the first byte.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
|
||||
extern const utf8proc_int8_t utf8proc_utf8class[256];
|
||||
|
||||
/**
|
||||
* Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
|
||||
* (http://semver.org format), possibly with a "-dev" suffix for
|
||||
* development versions.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
|
||||
const char *utf8proc_version(void);
|
||||
|
||||
/**
|
||||
* Returns an informative error string for the given utf8proc error code
|
||||
* (e.g. the error codes returned by @ref utf8proc_map).
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
|
||||
const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
|
||||
|
||||
/**
|
||||
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
||||
@@ -390,7 +378,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
|
||||
* In case of success, the number of bytes read is returned; otherwise, a
|
||||
* negative error code is returned.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
|
||||
utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
|
||||
|
||||
/**
|
||||
* Check if a codepoint is valid (regardless of whether it has been
|
||||
@@ -398,7 +386,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str
|
||||
*
|
||||
* @return 1 if the given `codepoint` is valid and otherwise return 0.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
|
||||
utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Encodes the codepoint as an UTF-8 string in the byte array pointed
|
||||
@@ -409,7 +397,7 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codep
|
||||
*
|
||||
* This function does not check whether `codepoint` is valid Unicode.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
|
||||
utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
|
||||
|
||||
/**
|
||||
* Look up the properties for a given codepoint.
|
||||
@@ -423,7 +411,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepo
|
||||
* If the codepoint is unassigned or invalid, a pointer to a special struct is
|
||||
* returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint);
|
||||
const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint);
|
||||
|
||||
/** Decompose a codepoint into an array of codepoints.
|
||||
*
|
||||
@@ -452,7 +440,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
|
||||
* required buffer size is returned, while the buffer will be overwritten with
|
||||
* undefined data.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
|
||||
utf8proc_ssize_t utf8proc_decompose_char(
|
||||
utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
|
||||
utf8proc_option_t options, int *last_boundclass
|
||||
);
|
||||
@@ -473,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
|
||||
* required buffer size is returned, while the buffer will be overwritten with
|
||||
* undefined data.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
|
||||
utf8proc_ssize_t utf8proc_decompose(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
|
||||
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
|
||||
);
|
||||
@@ -503,13 +491,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
|
||||
* entries of the array pointed to by `str` have to be in the
|
||||
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
|
||||
utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
|
||||
|
||||
/**
|
||||
* Given a pair of consecutive codepoints, return whether a grapheme break is
|
||||
* permitted between them (as defined by the extended grapheme clusters in UAX#29).
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
|
||||
utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
|
||||
|
||||
|
||||
/**
|
||||
@@ -517,14 +505,14 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepo
|
||||
* lower-case character, if any; otherwise (if there is no lower-case
|
||||
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
|
||||
utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
|
||||
|
||||
/**
|
||||
* Given a codepoint `c`, return the codepoint of the corresponding
|
||||
* upper-case character, if any; otherwise (if there is no upper-case
|
||||
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
|
||||
utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
|
||||
|
||||
/**
|
||||
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
|
||||
@@ -534,19 +522,19 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
|
||||
* @note
|
||||
* If you want to check for particular types of non-printable characters,
|
||||
* (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */
|
||||
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint);
|
||||
int utf8proc_charwidth(utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Return the Unicode category for the codepoint (one of the
|
||||
* @ref utf8proc_category_t constants.)
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint);
|
||||
utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Return the two-letter (nul-terminated) Unicode category string for
|
||||
* the codepoint (e.g. `"Lu"` or `"Co"`).
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint);
|
||||
const char *utf8proc_category_string(utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Maps the given UTF-8 string pointed to by `str` to a new UTF-8
|
||||
@@ -566,7 +554,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
|
||||
* @note The memory of the new UTF-8 string will have been allocated
|
||||
* with `malloc`, and should therefore be deallocated with `free`.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
|
||||
utf8proc_ssize_t utf8proc_map(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
|
||||
);
|
||||
|
||||
@@ -579,13 +567,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
|
||||
*/
|
||||
/** @{ */
|
||||
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
|
||||
utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
|
||||
/** NFC normalization (@ref UTF8PROC_COMPOSE). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
|
||||
utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
|
||||
/** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
|
||||
utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
|
||||
/** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
|
||||
utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Reference in New Issue
Block a user