Removing EXPORT statements from all source files and most header files, leaving only the exports for the main API in libpostal.h. Modified Makefiles so that all the test apps build without having extra functions exported from libpostal.

This commit is contained in:
AeroXuk
2017-11-25 04:35:28 +00:00
parent f0246e7333
commit 26ac9ab5c2
26 changed files with 186 additions and 211 deletions

View File

@@ -9,12 +9,12 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include
# Wonky but have to be able to override the user's optimization level to compile the scanner # Wonky but have to be able to override the user's optimization level to compile the scanner
# as it takes an unreasonably long time to compile with the optimizer on. # as it takes an unreasonably long time to compile with the optimizer on.
CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS CFLAGS =
lib_LTLIBRARIES = libpostal.la lib_LTLIBRARIES = libpostal.la
libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
libpostal_la_CFLAGS = $(CFLAGS_O2) libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined
dist_bin_SCRIPTS = libpostal_data dist_bin_SCRIPTS = libpostal_data
@@ -24,41 +24,41 @@ dist_bin_SCRIPTS = libpostal_data
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). # -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
noinst_LTLIBRARIES = libscanner.la noinst_LTLIBRARIES = libscanner.la
libscanner_la_SOURCES = klib/drand48.c scanner.c libscanner_la_SOURCES = klib/drand48.c scanner.c
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) libscanner_la_CFLAGS = $(CFLAGS_O0) -D LIBPOSTAL_EXPORTS $(CFLAGS_SCANNER_EXTRA)
noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test
libpostal_SOURCES = main.c json_encode.c libpostal_SOURCES = strndup.c main.c json_encode.c file_utils.c string_utils.c utf8proc/utf8proc.c
libpostal_LDADD = libpostal.la libpostal_LDADD = libpostal.la
libpostal_CFLAGS = $(CFLAGS_O3) libpostal_CFLAGS = $(CFLAGS_O3)
bench_SOURCES = bench.c bench_SOURCES = bench.c
bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
bench_CFLAGS = $(CFLAGS_O3) bench_CFLAGS = $(CFLAGS_O3)
address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c
address_parser_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
address_parser_CFLAGS = $(CFLAGS_O3) address_parser_CFLAGS = $(CFLAGS_O3)
build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
build_address_dictionary_CFLAGS = $(CFLAGS_O3) build_address_dictionary_CFLAGS = $(CFLAGS_O3)
build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c
build_numex_table_CFLAGS = $(CFLAGS_O3) build_numex_table_CFLAGS = $(CFLAGS_O3)
build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c
build_trans_table_CFLAGS = $(CFLAGS_O3) build_trans_table_CFLAGS = $(CFLAGS_O3)
address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c
address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS)
address_parser_train_CFLAGS = $(CFLAGS_O3) address_parser_train_CFLAGS = $(CFLAGS_O3)
address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c
address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS)
address_parser_test_CFLAGS = $(CFLAGS_O3) address_parser_test_CFLAGS = $(CFLAGS_O3)
language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c
language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS)
language_classifier_train_CFLAGS = $(CFLAGS_O3) language_classifier_train_CFLAGS = $(CFLAGS_O3)
language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_LDADD = libscanner.la $(CBLAS_LIBS)
language_classifier_CFLAGS = $(CFLAGS_O3) language_classifier_CFLAGS = $(CFLAGS_O3)
language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS)
language_classifier_test_CFLAGS = $(CFLAGS_O3) language_classifier_test_CFLAGS = $(CFLAGS_O3)

View File

@@ -1,16 +0,0 @@
#ifndef EXPORT_H
#define EXPORT_H
#ifdef _WIN32
#ifdef LIBPOSTAL_EXPORTS
#define LIBPOSTAL_EXPORT __declspec(dllexport)
#else
#define LIBPOSTAL_EXPORT __declspec(dllimport)
#endif
#elif __GNUC__ >= 4
#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default")))
#else
#define LIBPOSTAL_EXPORT
#endif
#endif //EXPORT_H

View File

@@ -1,7 +1,7 @@
#include "features.h" #include "features.h"
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...) { void feature_array_add(cstring_array *features, size_t count, ...) {
if (count <= 0) { if (count <= 0) {
return; return;
} }

View File

@@ -5,13 +5,12 @@
#include <stdarg.h> #include <stdarg.h>
#include "collections.h" #include "collections.h"
#include "string_utils.h" #include "string_utils.h"
#include "export.h"
#define FEATURE_SEPARATOR_CHAR "|" #define FEATURE_SEPARATOR_CHAR "|"
// Add feature to array // Add feature to array
LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...); void feature_array_add(cstring_array *features, size_t count, ...);
// Add feature using printf format // Add feature using printf format
void feature_array_add_printf(cstring_array *features, char *format, ...); void feature_array_add_printf(cstring_array *features, char *format, ...);

View File

@@ -1,6 +1,6 @@
#include "file_utils.h" #include "file_utils.h"
LIBPOSTAL_EXPORT char *file_getline(FILE * f) char *file_getline(FILE * f)
{ {
char buf[BUFSIZ]; char buf[BUFSIZ];

View File

@@ -8,7 +8,6 @@
#include <stdbool.h> #include <stdbool.h>
#include <sys/types.h> #include <sys/types.h>
#include "export.h"
#include "libpostal_config.h" #include "libpostal_config.h"
#include "string_utils.h" #include "string_utils.h"
@@ -53,7 +52,7 @@
#define COMMA_SEPARATOR "," #define COMMA_SEPARATOR ","
#define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR) #define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR)
LIBPOSTAL_EXPORT char *file_getline(FILE * f); char *file_getline(FILE * f);
bool file_exists(char *filename); bool file_exists(char *filename);

View File

@@ -57,7 +57,7 @@ static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = {
.roman_numerals = true .roman_numerals = true
}; };
LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void) { libpostal_normalize_options_t libpostal_get_default_options(void) {
return LIBPOSTAL_DEFAULT_OPTIONS; return LIBPOSTAL_DEFAULT_OPTIONS;
} }
@@ -942,7 +942,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_
char_array_destroy(temp_string); char_array_destroy(temp_string);
} }
LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) { char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
options.address_components |= LIBPOSTAL_ADDRESS_ANY; options.address_components |= LIBPOSTAL_ADDRESS_ANY;
uint64_t normalize_string_options = get_normalize_string_options(options); uint64_t normalize_string_options = get_normalize_string_options(options);
@@ -1021,14 +1021,14 @@ LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normaliz
} }
LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n) { void libpostal_expansion_array_destroy(char **expansions, size_t n) {
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
free(expansions[i]); free(expansions[i]);
} }
free(expansions); free(expansions);
} }
LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) { void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
if (self == NULL) return; if (self == NULL) return;
for (size_t i = 0; i < self->num_components; i++) { for (size_t i = 0; i < self->num_components; i++) {
@@ -1057,11 +1057,11 @@ static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIO
.country = NULL .country = NULL
}; };
LIBPOSTAL_EXPORT inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) { inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS; return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS;
} }
LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) { libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country); libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country);
if (parsed == NULL) { if (parsed == NULL) {
@@ -1073,7 +1073,7 @@ LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(ch
return parsed; return parsed;
} }
LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) { bool libpostal_setup_datadir(char *datadir) {
char *transliteration_path = NULL; char *transliteration_path = NULL;
char *numex_path = NULL; char *numex_path = NULL;
char *address_dictionary_path = NULL; char *address_dictionary_path = NULL;
@@ -1114,11 +1114,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) {
return true; return true;
} }
LIBPOSTAL_EXPORT bool libpostal_setup(void) { bool libpostal_setup(void) {
return libpostal_setup_datadir(NULL); return libpostal_setup_datadir(NULL);
} }
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) { bool libpostal_setup_language_classifier_datadir(char *datadir) {
char *language_classifier_dir = NULL; char *language_classifier_dir = NULL;
if (datadir != NULL) { if (datadir != NULL) {
@@ -1137,11 +1137,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir)
return true; return true;
} }
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void) { bool libpostal_setup_language_classifier(void) {
return libpostal_setup_language_classifier_datadir(NULL); return libpostal_setup_language_classifier_datadir(NULL);
} }
LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) { bool libpostal_setup_parser_datadir(char *datadir) {
char *parser_dir = NULL; char *parser_dir = NULL;
if (datadir != NULL) { if (datadir != NULL) {
@@ -1160,11 +1160,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) {
return true; return true;
} }
LIBPOSTAL_EXPORT bool libpostal_setup_parser(void) { bool libpostal_setup_parser(void) {
return libpostal_setup_parser_datadir(NULL); return libpostal_setup_parser_datadir(NULL);
} }
LIBPOSTAL_EXPORT void libpostal_teardown(void) { void libpostal_teardown(void) {
transliteration_module_teardown(); transliteration_module_teardown();
numex_module_teardown(); numex_module_teardown();
@@ -1172,10 +1172,10 @@ LIBPOSTAL_EXPORT void libpostal_teardown(void) {
address_dictionary_module_teardown(); address_dictionary_module_teardown();
} }
LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void) { void libpostal_teardown_language_classifier(void) {
language_classifier_module_teardown(); language_classifier_module_teardown();
} }
LIBPOSTAL_EXPORT void libpostal_teardown_parser(void) { void libpostal_teardown_parser(void) {
address_parser_module_teardown(); address_parser_module_teardown();
} }

View File

@@ -9,7 +9,18 @@ extern "C" {
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include "export.h"
#ifdef _WIN32
#ifdef LIBPOSTAL_EXPORTS
#define LIBPOSTAL_EXPORT __declspec(dllexport)
#else
#define LIBPOSTAL_EXPORT __declspec(dllimport)
#endif
#elif __GNUC__ >= 4
#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default")))
#else
#define LIBPOSTAL_EXPORT
#endif
#define LIBPOSTAL_MAX_LANGUAGE_LEN 4 #define LIBPOSTAL_MAX_LANGUAGE_LEN 4

View File

@@ -599,7 +599,7 @@ bool numex_module_init(void) {
Must be called only once before the module can be used Must be called only once before the module can be used
*/ */
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) { bool numex_module_setup(char *filename) {
if (numex_table == NULL) { if (numex_table == NULL) {
return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename); return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename);
} }
@@ -610,7 +610,7 @@ LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) {
Called once when done with the module (usually at Called once when done with the module (usually at
the end of a main method) the end of a main method)
*/ */
LIBPOSTAL_EXPORT void numex_module_teardown(void) { void numex_module_teardown(void) {
numex_table_destroy(); numex_table_destroy();
numex_table = NULL; numex_table = NULL;
} }
@@ -1101,7 +1101,7 @@ size_t ordinal_suffix_len(char *str, size_t len, char *lang) {
return 0; return 0;
} }
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang) { char *replace_numeric_expressions(char *str, char *lang) {
numex_result_array *results = convert_numeric_expressions(str, lang); numex_result_array *results = convert_numeric_expressions(str, lang);
if (results == NULL) return NULL; if (results == NULL) return NULL;

View File

@@ -19,7 +19,6 @@
#include "tokens.h" #include "tokens.h"
#include "trie.h" #include "trie.h"
#include "trie_search.h" #include "trie_search.h"
#include "export.h"
#define NUMEX_DATA_FILE "numex.dat" #define NUMEX_DATA_FILE "numex.dat"
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE #define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
@@ -147,7 +146,7 @@ typedef struct numex_result {
VECTOR_INIT(numex_result_array, numex_result_t) VECTOR_INIT(numex_result_array, numex_result_t)
LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang); char *replace_numeric_expressions(char *str, char *lang);
numex_result_array *convert_numeric_expressions(char *str, char *lang); numex_result_array *convert_numeric_expressions(char *str, char *lang);
size_t ordinal_suffix_len(char *s, size_t len, char *lang); size_t ordinal_suffix_len(char *s, size_t len, char *lang);
size_t possible_ordinal_digit_len(char *str, size_t len); size_t possible_ordinal_digit_len(char *str, size_t len);
@@ -156,9 +155,9 @@ bool numex_table_write(FILE *file);
bool numex_table_save(char *filename); bool numex_table_save(char *filename);
bool numex_module_init(void); bool numex_module_init(void);
LIBPOSTAL_EXPORT bool numex_module_setup(char *filename); bool numex_module_setup(char *filename);
LIBPOSTAL_EXPORT void numex_module_teardown(void); void numex_module_teardown(void);
#endif #endif

View File

@@ -310240,7 +310240,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
} }
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) { token_array *tokenize_keep_whitespace(const char *input) {
token_array *tokens = token_array_new(); token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input), true); tokenize_add_tokens(tokens, input, strlen(input), true);
return tokens; return tokens;

View File

@@ -9,7 +9,6 @@
#include "token_types.h" #include "token_types.h"
#include "tokens.h" #include "tokens.h"
#include "export.h"
typedef struct scanner { typedef struct scanner {
unsigned char *src, *cursor, *start, *end; unsigned char *src, *cursor, *start, *end;
@@ -20,7 +19,7 @@ uint16_t scan_token(scanner_t *s);
scanner_t scanner_from_string(const char *input, size_t len); scanner_t scanner_from_string(const char *input, size_t len);
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace); void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input); token_array *tokenize_keep_whitespace(const char *input);
token_array *tokenize(const char *input); token_array *tokenize(const char *input);

View File

@@ -255,7 +255,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo
} }
LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) { token_array *tokenize_keep_whitespace(const char *input) {
token_array *tokens = token_array_new(); token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input), true); tokenize_add_tokens(tokens, input, strlen(input), true);
return tokens; return tokens;

View File

@@ -58,7 +58,7 @@ inline size_t string_common_suffix(const char *str1, const char *str2) {
return common_suffix; return common_suffix;
} }
LIBPOSTAL_EXPORT inline bool string_starts_with(const char *str, const char *start) { inline bool string_starts_with(const char *str, const char *start) {
for (; *start; str++, start++) for (; *start; str++, start++)
if (*str != *start) if (*str != *start)
return false; return false;
@@ -72,7 +72,7 @@ inline bool string_ends_with(const char *str, const char *ending) {
return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending); return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending);
} }
LIBPOSTAL_EXPORT inline bool string_equals(const char *s1, const char *s2) { inline bool string_equals(const char *s1, const char *s2) {
if (s1 == NULL || s2 == NULL) return false; if (s1 == NULL || s2 == NULL) return false;
return strcmp(s1, s2) == 0; return strcmp(s1, s2) == 0;
} }
@@ -169,7 +169,7 @@ uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_re
return num_replacements; return num_replacements;
} }
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) { ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) {
ssize_t len = 0; ssize_t len = 0;
const uint8_t *ptr = str + start; const uint8_t *ptr = str + start;
@@ -188,7 +188,7 @@ LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t s
return ret_len; return ret_len;
} }
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s) { char *utf8_reversed_string(const char *s) {
int32_t unich; int32_t unich;
ssize_t len, remaining; ssize_t len, remaining;
@@ -478,7 +478,7 @@ size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *st
} }
LIBPOSTAL_EXPORT inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) { inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) {
return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2)); return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2));
} }
@@ -606,7 +606,7 @@ size_t string_left_spaces_len(char *str, size_t len) {
return spaces; return spaces;
} }
LIBPOSTAL_EXPORT char *string_trim(char *str) { char *string_trim(char *str) {
size_t len = strlen(str); size_t len = strlen(str);
size_t left_spaces = string_left_spaces_len(str, len); size_t left_spaces = string_left_spaces_len(str, len);
size_t right_spaces = string_right_spaces_len(str, len); size_t right_spaces = string_right_spaces_len(str, len);
@@ -630,14 +630,14 @@ char_array *char_array_from_string_no_copy(char *str, size_t n) {
return array; return array;
} }
LIBPOSTAL_EXPORT inline char *char_array_get_string(char_array *array) { inline char *char_array_get_string(char_array *array) {
if (array->n == 0 || array->a[array->n - 1] != '\0') { if (array->n == 0 || array->a[array->n - 1] != '\0') {
char_array_terminate(array); char_array_terminate(array);
} }
return array->a; return array->a;
} }
LIBPOSTAL_EXPORT inline char *char_array_to_string(char_array *array) { inline char *char_array_to_string(char_array *array) {
if (array->n == 0 || array->a[array->n - 1] != '\0') { if (array->n == 0 || array->a[array->n - 1] != '\0') {
char_array_terminate(array); char_array_terminate(array);
} }
@@ -662,7 +662,7 @@ inline size_t char_array_len(char_array *array) {
} }
} }
LIBPOSTAL_EXPORT inline void char_array_append(char_array *array, char *str) { inline void char_array_append(char_array *array, char *str) {
while(*str) { while(*str) {
char_array_push(array, *str++); char_array_push(array, *str++);
} }
@@ -696,11 +696,11 @@ inline void char_array_append_reversed(char_array *array, char *str) {
char_array_append_reversed_len(array, str, len); char_array_append_reversed_len(array, str, len);
} }
LIBPOSTAL_EXPORT inline void char_array_terminate(char_array *array) { inline void char_array_terminate(char_array *array) {
char_array_push(array, '\0'); char_array_push(array, '\0');
} }
LIBPOSTAL_EXPORT inline void char_array_cat(char_array *array, char *str) { inline void char_array_cat(char_array *array, char *str) {
char_array_strip_nul_byte(array); char_array_strip_nul_byte(array);
char_array_append(array, str); char_array_append(array, str);
char_array_terminate(array); char_array_terminate(array);
@@ -713,7 +713,7 @@ inline void char_array_cat_len(char_array *array, char *str, size_t len) {
} }
LIBPOSTAL_EXPORT inline void char_array_cat_reversed(char_array *array, char *str) { inline void char_array_cat_reversed(char_array *array, char *str) {
char_array_strip_nul_byte(array); char_array_strip_nul_byte(array);
char_array_append_reversed(array, str); char_array_append_reversed(array, str);
char_array_terminate(array); char_array_terminate(array);
@@ -764,7 +764,7 @@ void char_array_add_vjoined(char_array *array, char *separator, bool strip_separ
} }
LIBPOSTAL_EXPORT inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) { inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
va_list args; va_list args;
va_start(args, count); va_start(args, count);
char_array_add_vjoined(array, separator, strip_separator, count, args); char_array_add_vjoined(array, separator, strip_separator, count, args);
@@ -808,14 +808,14 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args) {
} }
} }
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...) { void char_array_cat_printf(char_array *array, char *format, ...) {
va_list args; va_list args;
va_start(args, format); va_start(args, format);
char_array_cat_vprintf(array, format, args); char_array_cat_vprintf(array, format, args);
va_end(args); va_end(args);
} }
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) { cstring_array *cstring_array_new(void) {
cstring_array *array = malloc(sizeof(cstring_array)); cstring_array *array = malloc(sizeof(cstring_array));
if (array == NULL) return NULL; if (array == NULL) return NULL;
@@ -834,7 +834,7 @@ LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) {
return array; return array;
} }
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self) { void cstring_array_destroy(cstring_array *self) {
if (self == NULL) return; if (self == NULL) return;
if (self->indices) { if (self->indices) {
uint32_array_destroy(self->indices); uint32_array_destroy(self->indices);
@@ -889,7 +889,7 @@ inline size_t cstring_array_used(cstring_array *self) {
return self->str->n; return self->str->n;
} }
LIBPOSTAL_EXPORT inline size_t cstring_array_num_strings(cstring_array *self) { inline size_t cstring_array_num_strings(cstring_array *self) {
if (self == NULL) return 0; if (self == NULL) return 0;
return self->indices->n; return self->indices->n;
} }
@@ -958,13 +958,13 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) {
return (int32_t)self->indices->a[i]; return (int32_t)self->indices->a[i];
} }
LIBPOSTAL_EXPORT inline char *cstring_array_get_string(cstring_array *self, uint32_t i) { inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
int32_t data_index = cstring_array_get_offset(self, i); int32_t data_index = cstring_array_get_offset(self, i);
if (data_index < 0) return NULL; if (data_index < 0) return NULL;
return self->str->a + data_index; return self->str->a + data_index;
} }
LIBPOSTAL_EXPORT inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) { inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) {
if (INVALID_INDEX(i, self->indices->n)) { if (INVALID_INDEX(i, self->indices->n)) {
return -1; return -1;
} }
@@ -1015,7 +1015,7 @@ cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *sep
} }
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) { cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) {
*count = 0; *count = 0;
char *ptr = str; char *ptr = str;
size_t len = strlen(str); size_t len = strlen(str);
@@ -1034,7 +1034,7 @@ LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char sepa
} }
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self) { char **cstring_array_to_strings(cstring_array *self) {
char **strings = malloc(self->indices->n * sizeof(char *)); char **strings = malloc(self->indices->n * sizeof(char *));
for (int i = 0; i < cstring_array_num_strings(self); i++) { for (int i = 0; i < cstring_array_num_strings(self); i++) {
@@ -1073,7 +1073,7 @@ string_tree_t *string_tree_new_size(size_t size) {
#define DEFAULT_STRING_TREE_SIZE 8 #define DEFAULT_STRING_TREE_SIZE 8
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void) { string_tree_t *string_tree_new(void) {
return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE); return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE);
} }
@@ -1085,12 +1085,12 @@ inline char *string_tree_get_alternative(string_tree_t *self, size_t token_index
return cstring_array_get_string(self->strings, token_start + alternative); return cstring_array_get_string(self->strings, token_start + alternative);
} }
LIBPOSTAL_EXPORT inline void string_tree_finalize_token(string_tree_t *self) { inline void string_tree_finalize_token(string_tree_t *self) {
uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings)); uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings));
} }
// terminated // terminated
LIBPOSTAL_EXPORT inline void string_tree_add_string(string_tree_t *self, char *str) { inline void string_tree_add_string(string_tree_t *self, char *str) {
cstring_array_add_string(self->strings, str); cstring_array_add_string(self->strings, str);
} }
@@ -1115,13 +1115,13 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) {
return (uint32_t)cstring_array_num_strings(self->strings); return (uint32_t)cstring_array_num_strings(self->strings);
} }
LIBPOSTAL_EXPORT inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) { inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) {
if (i >= self->token_indices->n) return 0; if (i >= self->token_indices->n) return 0;
uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i]; uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i];
return n > 0 ? n : 1; return n > 0 ? n : 1;
} }
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) { void string_tree_destroy(string_tree_t *self) {
if (self == NULL) return; if (self == NULL) return;
if (self->token_indices != NULL) { if (self->token_indices != NULL) {
@@ -1135,7 +1135,7 @@ LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) {
free(self); free(self);
} }
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) {
string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t)); string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t));
self->tree = tree; self->tree = tree;
@@ -1166,7 +1166,7 @@ LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t
return self; return self;
} }
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self) { void string_tree_iterator_next(string_tree_iterator_t *self) {
if (self->remaining > 0) { if (self->remaining > 0) {
int i; int i;
for (i = self->num_tokens - 1; i >= 0; i--) { for (i = self->num_tokens - 1; i >= 0; i--) {
@@ -1195,11 +1195,11 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i)
return cstring_array_get_string(self->tree->strings, base_index + offset); return cstring_array_get_string(self->tree->strings, base_index + offset);
} }
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self) { bool string_tree_iterator_done(string_tree_iterator_t *self) {
return self->remaining == 0; return self->remaining == 0;
} }
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self) { void string_tree_iterator_destroy(string_tree_iterator_t *self) {
if (self == NULL) return; if (self == NULL) return;
if (self->path) { if (self->path) {

View File

@@ -16,7 +16,6 @@ Utilities for manipulating strings in C.
#include "collections.h" #include "collections.h"
#include "utf8proc/utf8proc.h" #include "utf8proc/utf8proc.h"
#include "vector.h" #include "vector.h"
#include "export.h"
#define MAX_UTF8_CHAR_SIZE 4 #define MAX_UTF8_CHAR_SIZE 4
@@ -60,16 +59,16 @@ char *string_replace_char(char *str, char c1, char c2);
bool string_replace_with_array(char *str, char *replace, char *with, char_array *result); bool string_replace_with_array(char *str, char *replace, char *with, char_array *result);
char *string_replace(char *str, char *replace, char *with); char *string_replace(char *str, char *replace, char *with);
LIBPOSTAL_EXPORT bool string_starts_with(const char *str, const char *start); bool string_starts_with(const char *str, const char *start);
bool string_ends_with(const char *str, const char *ending); bool string_ends_with(const char *str, const char *ending);
LIBPOSTAL_EXPORT bool string_equals(const char *s1, const char *s2); bool string_equals(const char *s1, const char *s2);
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len); uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
// UTF-8 string methods // UTF-8 string methods
LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s); // returns a copy, caller frees char *utf8_reversed_string(const char *s); // returns a copy, caller frees
LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
// Casing functions return a copy, caller frees // Casing functions return a copy, caller frees
char *utf8_lower_options(const char *s, utf8proc_option_t options); char *utf8_lower_options(const char *s, utf8proc_option_t options);
@@ -81,7 +80,7 @@ int utf8_compare(const char *str1, const char *str2);
int utf8_compare_len(const char *str1, const char *str2, size_t len); int utf8_compare_len(const char *str1, const char *str2, size_t len);
size_t utf8_common_prefix(const char *str1, const char *str2); size_t utf8_common_prefix(const char *str1, const char *str2);
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
LIBPOSTAL_EXPORT size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
bool utf8_is_hyphen(int32_t ch); bool utf8_is_hyphen(int32_t ch);
@@ -100,7 +99,7 @@ ssize_t string_next_hyphen_index(char *str, size_t len);
bool string_contains_hyphen(char *str); bool string_contains_hyphen(char *str);
bool string_contains_hyphen_len(char *str, size_t len); bool string_contains_hyphen_len(char *str, size_t len);
LIBPOSTAL_EXPORT char *string_trim(char *str); char *string_trim(char *str);
/* char_array is a dynamic character array defined in collections.h /* char_array is a dynamic character array defined in collections.h
but has a few additional methods related to string manipulation. but has a few additional methods related to string manipulation.
@@ -113,40 +112,40 @@ char_array *char_array_from_string(char *str);
char_array *char_array_from_string_no_copy(char *str, size_t n); char_array *char_array_from_string_no_copy(char *str, size_t n);
// Gets the underlying C string for a char_array // Gets the underlying C string for a char_array
LIBPOSTAL_EXPORT char *char_array_get_string(char_array *array); char *char_array_get_string(char_array *array);
// Frees the char_array and returns a standard NUL-terminated string // Frees the char_array and returns a standard NUL-terminated string
LIBPOSTAL_EXPORT char *char_array_to_string(char_array *array); char *char_array_to_string(char_array *array);
// Can use strlen(array->a) but this is faster // Can use strlen(array->a) but this is faster
size_t char_array_len(char_array *array); size_t char_array_len(char_array *array);
// append_* methods do not NUL-terminate // append_* methods do not NUL-terminate
LIBPOSTAL_EXPORT void char_array_append(char_array *array, char *str); void char_array_append(char_array *array, char *str);
void char_array_append_len(char_array *array, char *str, size_t len); void char_array_append_len(char_array *array, char *str, size_t len);
void char_array_append_reversed(char_array *array, char *str); void char_array_append_reversed(char_array *array, char *str);
void char_array_append_reversed_len(char_array *array, char *str, size_t len); void char_array_append_reversed_len(char_array *array, char *str, size_t len);
// add NUL terminator to a char_array // add NUL terminator to a char_array
void char_array_strip_nul_byte(char_array *array); void char_array_strip_nul_byte(char_array *array);
LIBPOSTAL_EXPORT void char_array_terminate(char_array *array); void char_array_terminate(char_array *array);
// add_* methods NUL-terminate without stripping NUL-byte // add_* methods NUL-terminate without stripping NUL-byte
void char_array_add(char_array *array, char *str); void char_array_add(char_array *array, char *str);
void char_array_add_len(char_array *array, char *str, size_t len); void char_array_add_len(char_array *array, char *str, size_t len);
// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated // Similar to strcat but with dynamic resizing, guaranteed NUL-terminated
LIBPOSTAL_EXPORT void char_array_cat(char_array *array, char *str); void char_array_cat(char_array *array, char *str);
void char_array_cat_len(char_array *array, char *str, size_t len); void char_array_cat_len(char_array *array, char *str, size_t len);
LIBPOSTAL_EXPORT void char_array_cat_reversed(char_array *array, char *str); void char_array_cat_reversed(char_array *array, char *str);
void char_array_cat_reversed_len(char_array *array, char *str, size_t len); void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
// Similar to cat methods but with printf args // Similar to cat methods but with printf args
void char_array_cat_vprintf(char_array *array, char *format, va_list args); void char_array_cat_vprintf(char_array *array, char *format, va_list args);
LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...); void char_array_cat_printf(char_array *array, char *format, ...);
// Mainly for paths or delimited strings // Mainly for paths or delimited strings
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
LIBPOSTAL_EXPORT void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
@@ -171,13 +170,13 @@ typedef struct {
char_array *str; char_array *str;
} cstring_array; } cstring_array;
LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void); cstring_array *cstring_array_new(void);
cstring_array *cstring_array_new_size(size_t size); cstring_array *cstring_array_new_size(size_t size);
size_t cstring_array_capacity(cstring_array *self); size_t cstring_array_capacity(cstring_array *self);
size_t cstring_array_used(cstring_array *self); size_t cstring_array_used(cstring_array *self);
LIBPOSTAL_EXPORT size_t cstring_array_num_strings(cstring_array *self); size_t cstring_array_num_strings(cstring_array *self);
void cstring_array_resize(cstring_array *self, size_t size); void cstring_array_resize(cstring_array *self, size_t size);
void cstring_array_clear(cstring_array *self); void cstring_array_clear(cstring_array *self);
@@ -185,7 +184,7 @@ cstring_array *cstring_array_from_char_array(char_array *str);
cstring_array *cstring_array_from_strings(char **strings, size_t n); cstring_array *cstring_array_from_strings(char **strings, size_t n);
// Convert cstring_array to an array of n C strings and destroy the cstring_array // Convert cstring_array to an array of n C strings and destroy the cstring_array
LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self); char **cstring_array_to_strings(cstring_array *self);
// Split on delimiter // Split on delimiter
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count); cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count);
@@ -193,7 +192,7 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa
cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count); cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count);
// Split on delimiter by replacing (single character) separator with the NUL byte in the original string // Split on delimiter by replacing (single character) separator with the NUL byte in the original string
LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count); cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count);
uint32_t cstring_array_start_token(cstring_array *self); uint32_t cstring_array_start_token(cstring_array *self);
uint32_t cstring_array_add_string(cstring_array *self, char *str); uint32_t cstring_array_add_string(cstring_array *self, char *str);
@@ -207,10 +206,10 @@ void cstring_array_cat_string_len(cstring_array *self, char *str, size_t len);
void cstring_array_terminate(cstring_array *self); void cstring_array_terminate(cstring_array *self);
int32_t cstring_array_get_offset(cstring_array *self, uint32_t i); int32_t cstring_array_get_offset(cstring_array *self, uint32_t i);
LIBPOSTAL_EXPORT char *cstring_array_get_string(cstring_array *self, uint32_t i); char *cstring_array_get_string(cstring_array *self, uint32_t i);
LIBPOSTAL_EXPORT int64_t cstring_array_token_length(cstring_array *self, uint32_t i); int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self); void cstring_array_destroy(cstring_array *self);
#define cstring_array_foreach(array, i, s, code) { \ #define cstring_array_foreach(array, i, s, code) { \
for (int __si = 0; __si < array->indices->n; __si++) { \ for (int __si = 0; __si < array->indices->n; __si++) { \
@@ -246,16 +245,16 @@ typedef struct string_tree {
cstring_array *strings; cstring_array *strings;
} string_tree_t; } string_tree_t;
LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void); string_tree_t *string_tree_new(void);
string_tree_t *string_tree_new_size(size_t size); string_tree_t *string_tree_new_size(size_t size);
// get // get
char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative); char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative);
// finalize // finalize
LIBPOSTAL_EXPORT void string_tree_finalize_token(string_tree_t *self); void string_tree_finalize_token(string_tree_t *self);
// terminated // terminated
LIBPOSTAL_EXPORT void string_tree_add_string(string_tree_t *self, char *str); void string_tree_add_string(string_tree_t *self, char *str);
void string_tree_add_string_len(string_tree_t *self, char *str, size_t len); void string_tree_add_string_len(string_tree_t *self, char *str, size_t len);
// unterminated // unterminated
void string_tree_append_string(string_tree_t *self, char *str); void string_tree_append_string(string_tree_t *self, char *str);
@@ -264,9 +263,9 @@ void string_tree_append_string_len(string_tree_t *self, char *str, size_t len);
uint32_t string_tree_num_tokens(string_tree_t *self); uint32_t string_tree_num_tokens(string_tree_t *self);
uint32_t string_tree_num_strings(string_tree_t *self); uint32_t string_tree_num_strings(string_tree_t *self);
LIBPOSTAL_EXPORT uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i); uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i);
LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self); void string_tree_destroy(string_tree_t *self);
typedef struct string_tree_iterator { typedef struct string_tree_iterator {
string_tree_t *tree; string_tree_t *tree;
@@ -275,11 +274,11 @@ typedef struct string_tree_iterator {
uint32_t remaining; uint32_t remaining;
} string_tree_iterator_t; } string_tree_iterator_t;
LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree); string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree);
LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self); void string_tree_iterator_next(string_tree_iterator_t *self);
char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i); char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i);
LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self); bool string_tree_iterator_done(string_tree_iterator_t *self);
LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self); void string_tree_iterator_destroy(string_tree_iterator_t *self);
#define string_tree_iterator_foreach_token(iter, s, code) { \ #define string_tree_iterator_foreach_token(iter, s, code) { \

View File

@@ -1,7 +1,7 @@
#ifndef TOKENS_H #ifndef TOKENS_H
#define TOKENS_H #define TOKENS_H
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
@@ -35,6 +35,6 @@ char *tokenized_string_get_token(tokenized_string_t *self, uint32_t index);
void tokenized_string_destroy(tokenized_string_t *self); void tokenized_string_destroy(tokenized_string_t *self);
#endif #endif

View File

@@ -666,7 +666,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
return char_array_to_string(ret); return char_array_to_string(ret);
} }
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len) { char *transliterate(char *trans_name, char *str, size_t len) {
if (trans_name == NULL || str == NULL) return NULL; if (trans_name == NULL || str == NULL) return NULL;
transliteration_table_t *trans_table = get_transliteration_table(); transliteration_table_t *trans_table = get_transliteration_table();
@@ -1978,7 +1978,7 @@ bool transliteration_module_init(void) {
return trans_table != NULL; return trans_table != NULL;
} }
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) { bool transliteration_module_setup(char *filename) {
if (trans_table == NULL) { if (trans_table == NULL) {
return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename); return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename);
} }
@@ -1987,7 +1987,7 @@ LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) {
} }
LIBPOSTAL_EXPORT void transliteration_module_teardown(void) { void transliteration_module_teardown(void) {
transliteration_table_destroy(); transliteration_table_destroy();
trans_table = NULL; trans_table = NULL;
} }

View File

@@ -12,7 +12,6 @@
#include "trie.h" #include "trie.h"
#include "trie_search.h" #include "trie_search.h"
#include "unicode_scripts.h" #include "unicode_scripts.h"
#include "export.h"
#define LATIN_ASCII "latin-ascii" #define LATIN_ASCII "latin-ascii"
#define LATIN_ASCII_SIMPLE "latin-ascii-simple" #define LATIN_ASCII_SIMPLE "latin-ascii-simple"
@@ -152,7 +151,7 @@ void transliterator_destroy(transliterator_t *self);
bool transliteration_table_add_transliterator(transliterator_t *trans); bool transliteration_table_add_transliterator(transliterator_t *trans);
transliterator_t *get_transliterator(char *name); transliterator_t *get_transliterator(char *name);
LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len); char *transliterate(char *trans_name, char *str, size_t len);
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index); bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language); transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
@@ -172,7 +171,7 @@ bool transliteration_table_save(char *filename);
// Module setup/teardown // Module setup/teardown
bool transliteration_module_init(void); bool transliteration_module_init(void);
LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename); bool transliteration_module_setup(char *filename);
LIBPOSTAL_EXPORT void transliteration_module_teardown(void); void transliteration_module_teardown(void);
#endif #endif

View File

@@ -96,7 +96,7 @@ trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size) {
return self; return self;
} }
LIBPOSTAL_EXPORT trie_t *trie_new(void) { trie_t *trie_new(void) {
return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET)); return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET));
} }
@@ -661,7 +661,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, ui
} }
LIBPOSTAL_EXPORT inline bool trie_add(trie_t *self, char *key, uint32_t data) { inline bool trie_add(trie_t *self, char *key, uint32_t data) {
size_t len = strlen(key); size_t len = strlen(key);
if (len == 0) return false; if (len == 0) return false;
return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data); return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data);
@@ -754,7 +754,7 @@ inline bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data
return true; return true;
} }
LIBPOSTAL_EXPORT inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) { inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) {
uint32_t node_id = trie_get(self, key); uint32_t node_id = trie_get(self, key);
return trie_get_data_at_index(self, node_id, data); return trie_get_data_at_index(self, node_id, data);
} }
@@ -899,7 +899,7 @@ inline uint32_t trie_num_keys(trie_t *self) {
/* /*
Destructor Destructor
*/ */
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self) { void trie_destroy(trie_t *self) {
if (!self) if (!self)
return; return;

View File

@@ -33,7 +33,6 @@
#include "klib/kvec.h" #include "klib/kvec.h"
#include "log/log.h" #include "log/log.h"
#include "string_utils.h" #include "string_utils.h"
#include "export.h"
#define TRIE_SIGNATURE 0xABABABAB #define TRIE_SIGNATURE 0xABABABAB
#define NULL_NODE_ID 0 #define NULL_NODE_ID 0
@@ -80,7 +79,7 @@ typedef struct trie {
} trie_t; } trie_t;
trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size); trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size);
LIBPOSTAL_EXPORT trie_t *trie_new(void); trie_t *trie_new(void);
uint32_t trie_get_char_index(trie_t *self, unsigned char c); uint32_t trie_get_char_index(trie_t *self, unsigned char c);
uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c); uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c);
@@ -98,7 +97,7 @@ trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node);
bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node); bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node);
bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data); bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data);
LIBPOSTAL_EXPORT bool trie_get_data(trie_t *self, char *key, uint32_t *data); bool trie_get_data(trie_t *self, char *key, uint32_t *data);
bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data); bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data);
bool trie_set_data(trie_t *self, char *key, uint32_t data); bool trie_set_data(trie_t *self, char *key, uint32_t data);
@@ -114,7 +113,7 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai
void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data); void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data);
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data); bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data);
LIBPOSTAL_EXPORT bool trie_add(trie_t *self, char *key, uint32_t data); bool trie_add(trie_t *self, char *key, uint32_t data);
bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data); bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data);
bool trie_add_suffix(trie_t *self, char *key, uint32_t data); bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data); bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
@@ -147,7 +146,7 @@ bool trie_save(trie_t *self, char *path);
trie_t *trie_read(FILE *file); trie_t *trie_read(FILE *file);
trie_t *trie_load(char *path); trie_t *trie_load(char *path);
LIBPOSTAL_EXPORT void trie_destroy(trie_t *self); void trie_destroy(trie_t *self);

View File

@@ -465,7 +465,7 @@ inline bool trie_search_tokens_with_phrases(trie_t *self, char *str, token_array
return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases); return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases);
} }
LIBPOSTAL_EXPORT inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) { inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
phrase_array *phrases = NULL; phrase_array *phrases = NULL;
if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) { if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) {
return NULL; return NULL;

View File

@@ -16,7 +16,6 @@
#include "tokens.h" #include "tokens.h"
#include "vector.h" #include "vector.h"
#include "utf8proc/utf8proc.h" #include "utf8proc/utf8proc.h"
#include "export.h"
typedef struct phrase { typedef struct phrase {
uint32_t start; uint32_t start;
@@ -32,7 +31,7 @@ VECTOR_INIT(phrase_array, phrase_t)
phrase_array *trie_search(trie_t *self, char *text); phrase_array *trie_search(trie_t *self, char *text);
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases); bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases); bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases);
LIBPOSTAL_EXPORT phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens); phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens);
bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases); bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases);
bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases); bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases);
phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id); phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id);

View File

@@ -44,7 +44,7 @@
#include "utf8proc_data.c" #include "utf8proc_data.c"
UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = { const utf8proc_int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -87,11 +87,11 @@ UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
be different, being based on ABI compatibility.): */ be different, being based on ABI compatibility.): */
#define STRINGIZEx(x) #x #define STRINGIZEx(x) #x
#define STRINGIZE(x) STRINGIZEx(x) #define STRINGIZE(x) STRINGIZEx(x)
UTF8PROC_DLLEXPORT const char *utf8proc_version(void) { const char *utf8proc_version(void) {
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) ""; return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
} }
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) { const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
switch (errcode) { switch (errcode) {
case UTF8PROC_ERROR_NOMEM: case UTF8PROC_ERROR_NOMEM:
return "Memory for processing UTF-8 data could not be allocated."; return "Memory for processing UTF-8 data could not be allocated.";
@@ -109,7 +109,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
} }
#define utf_cont(ch) (((ch) & 0xc0) == 0x80) #define utf_cont(ch) (((ch) & 0xc0) == 0x80)
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate( utf8proc_ssize_t utf8proc_iterate(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
) { ) {
utf8proc_uint32_t uc; utf8proc_uint32_t uc;
@@ -157,11 +157,11 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
return 4; return 4;
} }
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) { utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000); return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
} }
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) { utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
if (uc < 0x00) { if (uc < 0x00) {
return 0; return 0;
} else if (uc < 0x80) { } else if (uc < 0x80) {
@@ -228,7 +228,7 @@ static const utf8proc_property_t *unsafe_get_property(utf8proc_int32_t uc) {
); );
} }
UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) { const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc); return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc);
} }
@@ -259,18 +259,18 @@ static utf8proc_bool grapheme_break(int lbc, int tbc) {
} }
/* return whether there is a grapheme break between codepoints c1 and c2 */ /* return whether there is a grapheme break between codepoints c1 and c2 */
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) { utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) {
return grapheme_break(utf8proc_get_property(c1)->boundclass, return grapheme_break(utf8proc_get_property(c1)->boundclass,
utf8proc_get_property(c2)->boundclass); utf8proc_get_property(c2)->boundclass);
} }
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c) utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
{ {
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_mapping; utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_mapping;
return cl >= 0 ? cl : c; return cl >= 0 ? cl : c;
} }
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c) utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
{ {
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_mapping; utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_mapping;
return cu >= 0 ? cu : c; return cu >= 0 ? cu : c;
@@ -278,15 +278,15 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
/* return a character width analogous to wcwidth (except portable and /* return a character width analogous to wcwidth (except portable and
hopefully less buggy than most system wcwidth functions). */ hopefully less buggy than most system wcwidth functions). */
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) { int utf8proc_charwidth(utf8proc_int32_t c) {
return utf8proc_get_property(c)->charwidth; return utf8proc_get_property(c)->charwidth;
} }
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) { utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
return utf8proc_get_property(c)->category; return utf8proc_get_property(c)->category;
} }
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) { const char *utf8proc_category_string(utf8proc_int32_t c) {
static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"}; static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
return s[utf8proc_category(c)]; return s[utf8proc_category(c)];
} }
@@ -295,7 +295,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \ return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
options & ~UTF8PROC_LUMP, last_boundclass) options & ~UTF8PROC_LUMP, last_boundclass)
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
const utf8proc_property_t *property; const utf8proc_property_t *property;
utf8proc_propval_t category; utf8proc_propval_t category;
utf8proc_int32_t hangul_sindex; utf8proc_int32_t hangul_sindex;
@@ -399,7 +399,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
return 1; return 1;
} }
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( utf8proc_ssize_t utf8proc_decompose(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
) { ) {
@@ -461,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
return wpos; return wpos;
} }
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */ ASSERT: 'buffer' has one spare byte of free space at the end! */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
@@ -583,7 +583,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
} }
} }
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( utf8proc_ssize_t utf8proc_map(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
) { ) {
utf8proc_int32_t *buffer; utf8proc_int32_t *buffer;
@@ -612,28 +612,28 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
return result; return result;
} }
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) { utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE); UTF8PROC_DECOMPOSE);
return retval; return retval;
} }
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) { utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE); UTF8PROC_COMPOSE);
return retval; return retval;
} }
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) { utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT); UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
return retval; return retval;
} }
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) { utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE | UTF8PROC_COMPAT); UTF8PROC_COMPOSE | UTF8PROC_COMPAT);

View File

@@ -111,18 +111,6 @@ typedef bool utf8proc_bool;
#endif #endif
#include <limits.h> #include <limits.h>
#ifdef _WIN32
# ifdef UTF8PROC_EXPORTS
# define UTF8PROC_DLLEXPORT __declspec(dllexport)
# else
# define UTF8PROC_DLLEXPORT __declspec(dllimport)
# endif
#elif __GNUC__ >= 4
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
#else
# define UTF8PROC_DLLEXPORT
#endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@@ -365,20 +353,20 @@ typedef enum {
* Array containing the byte lengths of a UTF-8 encoded codepoint based * Array containing the byte lengths of a UTF-8 encoded codepoint based
* on the first byte. * on the first byte.
*/ */
UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256]; extern const utf8proc_int8_t utf8proc_utf8class[256];
/** /**
* Returns the utf8proc API version as a string MAJOR.MINOR.PATCH * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
* (http://semver.org format), possibly with a "-dev" suffix for * (http://semver.org format), possibly with a "-dev" suffix for
* development versions. * development versions.
*/ */
UTF8PROC_DLLEXPORT const char *utf8proc_version(void); const char *utf8proc_version(void);
/** /**
* Returns an informative error string for the given utf8proc error code * Returns an informative error string for the given utf8proc error code
* (e.g. the error codes returned by @ref utf8proc_map). * (e.g. the error codes returned by @ref utf8proc_map).
*/ */
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode); const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
/** /**
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`. * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
@@ -390,7 +378,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
* In case of success, the number of bytes read is returned; otherwise, a * In case of success, the number of bytes read is returned; otherwise, a
* negative error code is returned. * negative error code is returned.
*/ */
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref); utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
/** /**
* Check if a codepoint is valid (regardless of whether it has been * Check if a codepoint is valid (regardless of whether it has been
@@ -398,7 +386,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str
* *
* @return 1 if the given `codepoint` is valid and otherwise return 0. * @return 1 if the given `codepoint` is valid and otherwise return 0.
*/ */
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint); utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
/** /**
* Encodes the codepoint as an UTF-8 string in the byte array pointed * Encodes the codepoint as an UTF-8 string in the byte array pointed
@@ -409,7 +397,7 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codep
* *
* This function does not check whether `codepoint` is valid Unicode. * This function does not check whether `codepoint` is valid Unicode.
*/ */
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst); utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
/** /**
* Look up the properties for a given codepoint. * Look up the properties for a given codepoint.
@@ -423,7 +411,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepo
* If the codepoint is unassigned or invalid, a pointer to a special struct is * If the codepoint is unassigned or invalid, a pointer to a special struct is
* returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN). * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
*/ */
UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint); const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint);
/** Decompose a codepoint into an array of codepoints. /** Decompose a codepoint into an array of codepoints.
* *
@@ -452,7 +440,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
* required buffer size is returned, while the buffer will be overwritten with * required buffer size is returned, while the buffer will be overwritten with
* undefined data. * undefined data.
*/ */
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( utf8proc_ssize_t utf8proc_decompose_char(
utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
utf8proc_option_t options, int *last_boundclass utf8proc_option_t options, int *last_boundclass
); );
@@ -473,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
* required buffer size is returned, while the buffer will be overwritten with * required buffer size is returned, while the buffer will be overwritten with
* undefined data. * undefined data.
*/ */
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( utf8proc_ssize_t utf8proc_decompose(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
); );
@@ -503,13 +491,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
* entries of the array pointed to by `str` have to be in the * entries of the array pointed to by `str` have to be in the
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
*/ */
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
/** /**
* Given a pair of consecutive codepoints, return whether a grapheme break is * Given a pair of consecutive codepoints, return whether a grapheme break is
* permitted between them (as defined by the extended grapheme clusters in UAX#29). * permitted between them (as defined by the extended grapheme clusters in UAX#29).
*/ */
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2); utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
/** /**
@@ -517,14 +505,14 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepo
* lower-case character, if any; otherwise (if there is no lower-case * lower-case character, if any; otherwise (if there is no lower-case
* variant, or if `c` is not a valid codepoint) return `c`. * variant, or if `c` is not a valid codepoint) return `c`.
*/ */
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
/** /**
* Given a codepoint `c`, return the codepoint of the corresponding * Given a codepoint `c`, return the codepoint of the corresponding
* upper-case character, if any; otherwise (if there is no upper-case * upper-case character, if any; otherwise (if there is no upper-case
* variant, or if `c` is not a valid codepoint) return `c`. * variant, or if `c` is not a valid codepoint) return `c`.
*/ */
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
/** /**
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
@@ -534,19 +522,19 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
* @note * @note
* If you want to check for particular types of non-printable characters, * If you want to check for particular types of non-printable characters,
* (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */ * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint); int utf8proc_charwidth(utf8proc_int32_t codepoint);
/** /**
* Return the Unicode category for the codepoint (one of the * Return the Unicode category for the codepoint (one of the
* @ref utf8proc_category_t constants.) * @ref utf8proc_category_t constants.)
*/ */
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint); utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint);
/** /**
* Return the two-letter (nul-terminated) Unicode category string for * Return the two-letter (nul-terminated) Unicode category string for
* the codepoint (e.g. `"Lu"` or `"Co"`). * the codepoint (e.g. `"Lu"` or `"Co"`).
*/ */
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint); const char *utf8proc_category_string(utf8proc_int32_t codepoint);
/** /**
* Maps the given UTF-8 string pointed to by `str` to a new UTF-8 * Maps the given UTF-8 string pointed to by `str` to a new UTF-8
@@ -566,7 +554,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
* @note The memory of the new UTF-8 string will have been allocated * @note The memory of the new UTF-8 string will have been allocated
* with `malloc`, and should therefore be deallocated with `free`. * with `malloc`, and should therefore be deallocated with `free`.
*/ */
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( utf8proc_ssize_t utf8proc_map(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
); );
@@ -579,13 +567,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
*/ */
/** @{ */ /** @{ */
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */ /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str); utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
/** NFC normalization (@ref UTF8PROC_COMPOSE). */ /** NFC normalization (@ref UTF8PROC_COMPOSE). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str); utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
/** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ /** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str); utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
/** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ /** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
/** @} */ /** @} */
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -9,6 +9,6 @@ CFLAGS = $(CFLAGS_BASE)
TESTS = test_libpostal TESTS = test_libpostal
noinst_PROGRAMS = test_libpostal noinst_PROGRAMS = test_libpostal
test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c ../src/strndup.c ../src/file_utils.c ../src/string_utils.c ../src/utf8proc/utf8proc.c ../src/trie.c ../src/trie_search.c ../src/transliterate.c ../src/numex.c ../src/features.c
test_libpostal_LDADD = ../src/libpostal.la $(CBLAS_LIBS) test_libpostal_LDADD = ../src/libpostal.la ../src/libscanner.la $(CBLAS_LIBS)
test_libpostal_CFLAGS = $(CFLAGS_O3) test_libpostal_CFLAGS = $(CFLAGS_O3)

View File

@@ -9,12 +9,12 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include
# Wonky but have to be able to override the user's optimization level to compile the scanner # Wonky but have to be able to override the user's optimization level to compile the scanner
# as it takes an unreasonably long time to compile with the optimizer on. # as it takes an unreasonably long time to compile with the optimizer on.
CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS CFLAGS =
lib_LTLIBRARIES = libpostal.la lib_LTLIBRARIES = libpostal.la
libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
libpostal_la_CFLAGS = $(CFLAGS_O2) libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined
dist_bin_SCRIPTS = libpostal_data dist_bin_SCRIPTS = libpostal_data
@@ -24,18 +24,18 @@ dist_bin_SCRIPTS = libpostal_data
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). # -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
noinst_LTLIBRARIES = libscanner.la noinst_LTLIBRARIES = libscanner.la
libscanner_la_SOURCES = klib/drand48.c scanner.c libscanner_la_SOURCES = klib/drand48.c scanner.c
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) libscanner_la_CFLAGS = $(CFLAGS_O0) -D LIBPOSTAL_EXPORTS $(CFLAGS_SCANNER_EXTRA)
noinst_PROGRAMS = libpostal bench address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test noinst_PROGRAMS = libpostal bench address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test
libpostal_SOURCES = main.c json_encode.c libpostal_SOURCES = strndup.c main.c json_encode.c file_utils.c string_utils.c utf8proc/utf8proc.c
libpostal_LDADD = libpostal.la libpostal_LDADD = libpostal.la
libpostal_CFLAGS = $(CFLAGS_O3) libpostal_CFLAGS = $(CFLAGS_O3)
bench_SOURCES = bench.c bench_SOURCES = bench.c
bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
bench_CFLAGS = $(CFLAGS_O3) bench_CFLAGS = $(CFLAGS_O3)
#address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c #address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c
#address_parser_LDADD = libscanner.la $(CBLAS_LIBS) #address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
#address_parser_CFLAGS = $(CFLAGS_O3) #address_parser_CFLAGS = $(CFLAGS_O3)
build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
@@ -49,7 +49,7 @@ address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS)
address_parser_train_CFLAGS = $(CFLAGS_O3) address_parser_train_CFLAGS = $(CFLAGS_O3)
address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c
address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS)
address_parser_test_CFLAGS = $(CFLAGS_O3) address_parser_test_CFLAGS = $(CFLAGS_O3)
language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c