From 2d3b420d352e32a09f044160d444428c3b4fa6f0 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Sun, 19 Nov 2017 12:44:38 +0000 Subject: [PATCH 01/11] Merging changes from AeroXuk/libpostal_windows. --- .appveyor.yml | 26 ++++++++++ libpostal.def | 16 ++++++ src/export.h | 16 ++++++ src/klib/drand48.c | 70 +++++++++++++++++++++++++++ src/klib/drand48.h | 41 ++++++++++++++++ src/klib/ksort.h | 1 + src/libpostal.c | 30 ++++++------ src/libpostal.h | 31 ++++++------ src/normalize.h | 1 + src/string_utils.h | 1 + src/strndup.c | 16 ++++++ src/strndup.h | 6 +++ src/tokens.h | 1 + src/transliterate.h | 1 + win_build.bat | 21 ++++++++ windows/configure.ac | 105 ++++++++++++++++++++++++++++++++++++++++ windows/src/Makefile.am | 45 +++++++++++++++++ 17 files changed, 398 insertions(+), 30 deletions(-) create mode 100644 .appveyor.yml create mode 100644 libpostal.def create mode 100644 src/export.h create mode 100644 src/klib/drand48.c create mode 100644 src/klib/drand48.h create mode 100644 src/strndup.c create mode 100644 src/strndup.h create mode 100644 win_build.bat create mode 100644 windows/configure.ac create mode 100644 windows/src/Makefile.am diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 00000000..dca32bb7 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,26 @@ +version: 1.0.{build} + +branches: + only: + - master + +image: Visual Studio 2015 +platform: x64 + +environment: + matrix: + - COMPILER: msys2 + PLATFORM: x64 + MSYS2_ARCH: x86_64 + MSYS2_DIR: msys64 + MSYSTEM: MINGW64 + BIT: 64 + +install: + -'%APPVEYOR_BUILD_FOLDER%\win_install.bat' + +build_script: + - '%APPVEYOR_BUILD_FOLDER%\win_build.bat' + +test_script: + - 'echo No tests yet' diff --git a/libpostal.def b/libpostal.def new file mode 100644 index 00000000..5db887c2 --- /dev/null +++ b/libpostal.def @@ -0,0 +1,16 @@ +EXPORTS +libpostal_get_default_options +libpostal_expand_address +libpostal_expansion_array_destroy +libpostal_address_parser_response_destroy +libpostal_get_address_parser_default_options +libpostal_parse_address +libpostal_setup +libpostal_setup_datadir +libpostal_teardown +libpostal_setup_parser +libpostal_setup_parser_datadir +libpostal_teardown_parser +libpostal_setup_language_classifier +libpostal_setup_language_classifier_datadir +libpostal_teardown_language_classifier diff --git a/src/export.h b/src/export.h new file mode 100644 index 00000000..2a5a490b --- /dev/null +++ b/src/export.h @@ -0,0 +1,16 @@ +#ifndef EXPORT_H +#define EXPORT_H + +#ifdef _WIN32 +#ifdef LIBPOSTAL_EXPORTS +#define LIBPOSTAL_EXPORT __declspec(dllexport) +#else +#define LIBPOSTAL_EXPORT __declspec(dllimport) +#endif +#elif __GNUC__ >= 4 +#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default"))) +#else +#define LIBPOSTAL_EXPORT +#endif + +#endif //EXPORT_H diff --git a/src/klib/drand48.c b/src/klib/drand48.c new file mode 100644 index 00000000..ab9230e3 --- /dev/null +++ b/src/klib/drand48.c @@ -0,0 +1,70 @@ + +/* + * Copyright (c) 1993 Martin Birgmeier + * All rights reserved. + + * You may redistribute unmodified or modified versions of this source + * code provided that the above copyright notice and this and the + * following conditions are retained. + + * This software is provided ``as is'', and comes with no warranties + * of any kind. I shall in no event be liable for anything that happens + * to anyone/anything when using this software. +*/ + +//I've rearranged the source into a header-only implementation for drand48() -Benjamin Kusin + +#include +#include "drand48.h" + +#define RAND48_SEED_0 (0x330e) +#define RAND48_SEED_1 (0xabcd) +#define RAND48_SEED_2 (0x1234) +#define RAND48_MULT_0 (0xe66d) +#define RAND48_MULT_1 (0xdeec) +#define RAND48_MULT_2 (0x0005) +#define RAND48_ADD (0x000b) + +unsigned short _rand48_seed[3] = { + RAND48_SEED_0, + RAND48_SEED_1, + RAND48_SEED_2 +}; + +unsigned short _rand48_mult[3] = { + RAND48_MULT_0, + RAND48_MULT_1, + RAND48_MULT_2 +}; + +unsigned short _rand48_add = RAND48_ADD; + +void _dorand48(unsigned short xseed[3]) +{ + unsigned long accu; + unsigned short temp[2]; + + accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] + (unsigned long) _rand48_add; + temp[0] = (unsigned short) accu; /* lower 16 bits */ + accu >>= sizeof(unsigned short) * 8; + accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] + (unsigned long) _rand48_mult[1] * (unsigned long) xseed[0]; + temp[1] = (unsigned short) accu; /* middle 16 bits */ + accu >>= sizeof(unsigned short) * 8; + accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0]; + xseed[0] = temp[0]; + xseed[1] = temp[1]; + xseed[2] = (unsigned short) accu; +} + +double erand48(unsigned short xseed[3]) +{ + _dorand48(xseed); + return ldexp((double) xseed[0], -48) + + ldexp((double) xseed[1], -32) + + ldexp((double) xseed[2], -16); +} + +double drand48(void) +{ + return erand48(_rand48_seed); +} \ No newline at end of file diff --git a/src/klib/drand48.h b/src/klib/drand48.h new file mode 100644 index 00000000..d8fd0f7d --- /dev/null +++ b/src/klib/drand48.h @@ -0,0 +1,41 @@ + +/* + * Copyright (c) 1993 Martin Birgmeier + * All rights reserved. + + * You may redistribute unmodified or modified versions of this source + * code provided that the above copyright notice and this and the + * following conditions are retained. + + * This software is provided ``as is'', and comes with no warranties + * of any kind. I shall in no event be liable for anything that happens + * to anyone/anything when using this software. +*/ + +//I've rearranged the source into a header-only implementation for drand48() -Benjamin Kusin + +#ifndef _DRAND48_H +#define _DRAND48_H + +#define RAND48_SEED_0 (0x330e) +#define RAND48_SEED_1 (0xabcd) +#define RAND48_SEED_2 (0x1234) +#define RAND48_MULT_0 (0xe66d) +#define RAND48_MULT_1 (0xdeec) +#define RAND48_MULT_2 (0x0005) +#define RAND48_ADD (0x000b) + +unsigned short _rand48_seed[3]; + +unsigned short _rand48_mult[3]; + +unsigned short _rand48_add; + +void _dorand48(unsigned short xseed[3]); + +double erand48(unsigned short xseed[3]); + +double drand48(void); + + +#endif // _DRAND48_H \ No newline at end of file diff --git a/src/klib/ksort.h b/src/klib/ksort.h index d2fb3532..1c8342fd 100644 --- a/src/klib/ksort.h +++ b/src/klib/ksort.h @@ -45,6 +45,7 @@ #include #include +#include "drand48.h" typedef struct { void *left, *right; diff --git a/src/libpostal.c b/src/libpostal.c index d226413e..68dd01da 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -57,7 +57,7 @@ static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = { .roman_numerals = true }; -libpostal_normalize_options_t libpostal_get_default_options(void) { +LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void) { return LIBPOSTAL_DEFAULT_OPTIONS; } @@ -942,7 +942,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_ char_array_destroy(temp_string); } -char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) { +LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) { options.address_components |= LIBPOSTAL_ADDRESS_ANY; uint64_t normalize_string_options = get_normalize_string_options(options); @@ -1021,14 +1021,14 @@ char **libpostal_expand_address(char *input, libpostal_normalize_options_t optio } -void libpostal_expansion_array_destroy(char **expansions, size_t n) { +LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n) { for (size_t i = 0; i < n; i++) { free(expansions[i]); } free(expansions); } -void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) { +LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) { if (self == NULL) return; for (size_t i = 0; i < self->num_components; i++) { @@ -1057,11 +1057,11 @@ static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIO .country = NULL }; -inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) { +LIBPOSTAL_EXPORT inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) { return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS; } -libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) { +LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) { libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country); if (parsed == NULL) { @@ -1073,7 +1073,7 @@ libpostal_address_parser_response_t *libpostal_parse_address(char *address, libp return parsed; } -bool libpostal_setup_datadir(char *datadir) { +LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) { char *transliteration_path = NULL; char *numex_path = NULL; char *address_dictionary_path = NULL; @@ -1114,11 +1114,11 @@ bool libpostal_setup_datadir(char *datadir) { return true; } -bool libpostal_setup(void) { +LIBPOSTAL_EXPORT bool libpostal_setup(void) { return libpostal_setup_datadir(NULL); } -bool libpostal_setup_language_classifier_datadir(char *datadir) { +LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) { char *language_classifier_dir = NULL; if (datadir != NULL) { @@ -1137,11 +1137,11 @@ bool libpostal_setup_language_classifier_datadir(char *datadir) { return true; } -bool libpostal_setup_language_classifier(void) { +LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void) { return libpostal_setup_language_classifier_datadir(NULL); } -bool libpostal_setup_parser_datadir(char *datadir) { +LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) { char *parser_dir = NULL; if (datadir != NULL) { @@ -1160,11 +1160,11 @@ bool libpostal_setup_parser_datadir(char *datadir) { return true; } -bool libpostal_setup_parser(void) { +LIBPOSTAL_EXPORT bool libpostal_setup_parser(void) { return libpostal_setup_parser_datadir(NULL); } -void libpostal_teardown(void) { +LIBPOSTAL_EXPORT void libpostal_teardown(void) { transliteration_module_teardown(); numex_module_teardown(); @@ -1172,10 +1172,10 @@ void libpostal_teardown(void) { address_dictionary_module_teardown(); } -void libpostal_teardown_language_classifier(void) { +LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void) { language_classifier_module_teardown(); } -void libpostal_teardown_parser(void) { +LIBPOSTAL_EXPORT void libpostal_teardown_parser(void) { address_parser_module_teardown(); } diff --git a/src/libpostal.h b/src/libpostal.h index 3b86dea3..c844cfa2 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -9,6 +9,7 @@ extern "C" { #include #include #include +#include "export.h" #define LIBPOSTAL_MAX_LANGUAGE_LEN 4 @@ -62,11 +63,11 @@ typedef struct libpostal_normalize_options { } libpostal_normalize_options_t; -libpostal_normalize_options_t libpostal_get_default_options(void); +LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void); -char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n); +LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n); -void libpostal_expansion_array_destroy(char **expansions, size_t n); +LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n); /* Address parser @@ -83,25 +84,25 @@ typedef struct libpostal_address_parser_options { char *country; } libpostal_address_parser_options_t; -void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self); +LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self); -libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void); +LIBPOSTAL_EXPORT libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void); -libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options); +LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options); // Setup/teardown methods -bool libpostal_setup(void); -bool libpostal_setup_datadir(char *datadir); -void libpostal_teardown(void); +LIBPOSTAL_EXPORT bool libpostal_setup(void); +LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir); +LIBPOSTAL_EXPORT void libpostal_teardown(void); -bool libpostal_setup_parser(void); -bool libpostal_setup_parser_datadir(char *datadir); -void libpostal_teardown_parser(void); +LIBPOSTAL_EXPORT bool libpostal_setup_parser(void); +LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir); +LIBPOSTAL_EXPORT void libpostal_teardown_parser(void); -bool libpostal_setup_language_classifier(void); -bool libpostal_setup_language_classifier_datadir(char *datadir); -void libpostal_teardown_language_classifier(void); +LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void); +LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir); +LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void); #ifdef __cplusplus } diff --git a/src/normalize.h b/src/normalize.h index d485f67f..ea5cf864 100644 --- a/src/normalize.h +++ b/src/normalize.h @@ -38,6 +38,7 @@ As well as normalizations for individual string tokens: #include "trie.h" #include "tokens.h" #include "vector.h" +#include "strndup.h" #define NORMALIZE_STRING_LATIN_ASCII 1 << 0 #define NORMALIZE_STRING_TRANSLITERATE 1 << 1 diff --git a/src/string_utils.h b/src/string_utils.h index 0e7dd235..91a56b0b 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -16,6 +16,7 @@ Utilities for manipulating strings in C. #include "collections.h" #include "utf8proc/utf8proc.h" #include "vector.h" +#include "strndup.h" #define MAX_UTF8_CHAR_SIZE 4 diff --git a/src/strndup.c b/src/strndup.c new file mode 100644 index 00000000..90feafe6 --- /dev/null +++ b/src/strndup.c @@ -0,0 +1,16 @@ +#ifndef HAVE_STRNDUP + +#include +#include + +char *strndup(const char *s, size_t n) +{ + char* new = malloc(n+1); + if (new) { + strncpy(new, s, n); + new[n] = '\0'; + } + return new; +} + +#endif /* HAVE_STRNDUP */ \ No newline at end of file diff --git a/src/strndup.h b/src/strndup.h new file mode 100644 index 00000000..893fbcbd --- /dev/null +++ b/src/strndup.h @@ -0,0 +1,6 @@ +#ifndef HAVE_STRNDUP +#define HAVE_STRNDUP + +char *strndup(const char *s, size_t n); + +#endif /* HAVE_STRNDUP */ \ No newline at end of file diff --git a/src/tokens.h b/src/tokens.h index 6b314417..5b7739c5 100644 --- a/src/tokens.h +++ b/src/tokens.h @@ -11,6 +11,7 @@ #include "string_utils.h" #include "token_types.h" #include "vector.h" +#include "strndup.h" typedef struct token { size_t offset; diff --git a/src/transliterate.h b/src/transliterate.h index ab559393..ffbf2e79 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -12,6 +12,7 @@ #include "trie.h" #include "trie_search.h" #include "unicode_scripts.h" +#include "strndup.h" #define LATIN_ASCII "latin-ascii" #define LATIN_ASCII_SIMPLE "latin-ascii-simple" diff --git a/win_build.bat b/win_build.bat new file mode 100644 index 00000000..743015ba --- /dev/null +++ b/win_build.bat @@ -0,0 +1,21 @@ +@echo off + +cd %APPVEYOR_BUILD_FOLDER% + +echo Compiler: %COMPILER% +echo Architecture: %MSYS2_ARCH% +echo Platform: %PLATFORM% +echo MSYS2 directory: %MSYS2_DIR% +echo MSYS2 system: %MSYSTEM% +echo Configuration: %CONFIGURATION% +echo Bits: %BIT% + +IF %COMPILER%==msys2 ( + @echo on + SET "PATH=C:\%MSYS2_DIR%\%MSYSTEM%\bin;C:\%MSYS2_DIR%\usr\bin;%PATH%" + + bash -lc "cd $APPVEYOR_BUILD_FOLDER && . bootstrap.sh" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && . configure --datadir=$APPVEYOR_BUILD_FOLDER/data" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && make" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && make install" +) diff --git a/windows/configure.ac b/windows/configure.ac new file mode 100644 index 00000000..b41080f6 --- /dev/null +++ b/windows/configure.ac @@ -0,0 +1,105 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +m4_define(LIBPOSTAL_MAJOR_VERSION, [1]) +m4_define(LIBPOSTAL_MINOR_VERSION, [0]) +m4_define(LIBPOSTAL_PATCH_VERSION, [0]) + +AC_INIT([libpostal], LIBPOSTAL_MAJOR_VERSION.LIBPOSTAL_MINOR_VERSION.LIBPOSTAL_PATCH_VERSION) + +AC_CONFIG_MACRO_DIR([m4]) + +AM_INIT_AUTOMAKE([foreign subdir-objects]) +AC_CONFIG_SRCDIR([src]) +LT_INIT([win32-dll]) + +AC_CONFIG_HEADERS([config.h]) + +# Checks for programs. +AC_PROG_CC_C99 +AC_PROG_INSTALL + +LDFLAGS="$LDFLAGS -L/usr/local/lib" + +# Checks for libraries. +AC_SEARCH_LIBS([log], + [m],,[AC_MSG_ERROR([Could not find math library])]) + +# Checks for header files. +AC_HEADER_STDC +AC_HEADER_TIME +AC_HEADER_DIRENT +AC_HEADER_STDBOOL +AC_CHECK_HEADERS([fcntl.h float.h inttypes.h limits.h locale.h malloc.h memory.h stddef.h stdint.h stdlib.h string.h unistd.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_INLINE +AC_TYPE_INT16_T +AC_TYPE_INT32_T +AC_TYPE_INT64_T +AC_TYPE_INT8_T +AC_TYPE_OFF_T +AC_TYPE_SIZE_T +AC_TYPE_SSIZE_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_TYPE_UINT8_T +AC_CHECK_TYPES([ptrdiff_t]) + +# Checks for library functions. +AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset regcomp setlocale sqrt strdup strndup]) + +AC_CONFIG_FILES([Makefile + libpostal.pc + src/Makefile + test/Makefile]) + +AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes]) +AC_CHECK_PROG([FOUND_GSHUF], [gshuf], [yes]) + +AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])]) +AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) + +# ------------------------------------------------------------------ +# Checks for SSE2 build +# ------------------------------------------------------------------ +AC_ARG_ENABLE([sse2], + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) + +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" +]) + +AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) + +AC_ARG_ENABLE([data-download], + [ --disable-data-download Disable downloading data], + [case "${enableval}" in + yes) DOWNLOAD_DATA=true ;; + no) DOWNLOAD_DATA=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;; + esac], [DOWNLOAD_DATA=true]) + +AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) + +AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])], +[ + if test "x$withval" = "xno"; then + CFLAGS_SCANNER_EXTRA="" + else + CFLAGS_SCANNER_EXTRA="$withval" + fi +], +[ CFLAGS_SCANNER_EXTRA="" ] +) + +AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA]) +AC_SUBST(CFLAGS_SCANNER_EXTRA) +AC_SUBST(LIBPOSTAL_SO_VERSION, LIBPOSTAL_MAJOR_VERSION:LIBPOSTAL_MINOR_VERSION:LIBPOSTAL_PATCH_VERSION) + +AC_OUTPUT diff --git a/windows/src/Makefile.am b/windows/src/Makefile.am new file mode 100644 index 00000000..7ee29385 --- /dev/null +++ b/windows/src/Makefile.am @@ -0,0 +1,45 @@ +# this version of the makefile skips building the programs. It only builds the libraries and downloads data so you can use the API. + +# Inherited from autoconf / user-specified +CFLAGS_CONF = @CFLAGS@ +CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) +CFLAGS_O0 = $(CFLAGS_BASE) -O0 +CFLAGS_O1 = $(CFLAGS_BASE) -O1 +CFLAGS_O2 = $(CFLAGS_BASE) -O2 +CFLAGS_O3 = $(CFLAGS_BASE) -O3 +DEFAULT_INCLUDES = -I.. -I/usr/local/include + +# Wonky but have to be able to override the user's optimization level to compile the scanner +# as it takes an unreasonably long time to compile with the optimizer on. +#EDIT - add UTF8PROC_EXPORTS so builds on windows +CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS + +lib_LTLIBRARIES = libpostal.la +libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c +libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) +libpostal_la_CFLAGS = $(CFLAGS_O2) +libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined + +dist_bin_SCRIPTS = libpostal_data + +# Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough +# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding +# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). +noinst_LTLIBRARIES = libscanner.la +libscanner_la_SOURCES = klib/drand48.c scanner.c +libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) + + +# program building skipped here + +pkginclude_HEADERS = libpostal.h + +if DOWNLOAD_DATA +all-local: + ${srcdir}/libpostal_data download all $(datadir)/libpostal +endif + +lexer: scanner.re + re2c -F -s -b -8 -o scanner.c scanner.re + +.PHONY: lexer From dbf232b8f890e49b466146441f052b78c1a8a7bc Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Sun, 19 Nov 2017 13:35:08 +0000 Subject: [PATCH 02/11] Fix bugs in AppVeyor config and build script. Added call to test script. --- .appveyor.yml | 5 +---- win_build.bat | 5 +++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index dca32bb7..59702dd3 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -16,11 +16,8 @@ environment: MSYSTEM: MINGW64 BIT: 64 -install: - -'%APPVEYOR_BUILD_FOLDER%\win_install.bat' - build_script: - '%APPVEYOR_BUILD_FOLDER%\win_build.bat' test_script: - - 'echo No tests yet' + - '%APPVEYOR_BUILD_FOLDER%\test\test_libpostal.exe' diff --git a/win_build.bat b/win_build.bat index 743015ba..f8f1b7b5 100644 --- a/win_build.bat +++ b/win_build.bat @@ -14,8 +14,9 @@ IF %COMPILER%==msys2 ( @echo on SET "PATH=C:\%MSYS2_DIR%\%MSYSTEM%\bin;C:\%MSYS2_DIR%\usr\bin;%PATH%" - bash -lc "cd $APPVEYOR_BUILD_FOLDER && . bootstrap.sh" - bash -lc "cd $APPVEYOR_BUILD_FOLDER && . configure --datadir=$APPVEYOR_BUILD_FOLDER/data" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && cp -rf windows/* ./" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && ./bootstrap.sh" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && ./configure --datadir=/c" bash -lc "cd $APPVEYOR_BUILD_FOLDER && make" bash -lc "cd $APPVEYOR_BUILD_FOLDER && make install" ) From ad682b75925f201abaab30d5a4c1a4c204094732 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Mon, 20 Nov 2017 20:24:11 +0000 Subject: [PATCH 03/11] Altered Makefile to include strndup.c on the other programs which require it. For the windows version of the Makefile, commented out address_parser lines as it has dependencies on includes we don't have. --- src/Makefile.am | 24 ++++++++++++------------ windows/src/Makefile.am | 41 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 6707d5aa..15d90d79 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -9,13 +9,13 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include # Wonky but have to be able to override the user's optimization level to compile the scanner # as it takes an unreasonably long time to compile with the optimizer on. -CFLAGS = +CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS lib_LTLIBRARIES = libpostal.la -libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c +libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) libpostal_la_CFLAGS = $(CFLAGS_O2) -libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ +libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined dist_bin_SCRIPTS = libpostal_data @@ -23,7 +23,7 @@ dist_bin_SCRIPTS = libpostal_data # On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding # -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). noinst_LTLIBRARIES = libscanner.la -libscanner_la_SOURCES = scanner.c +libscanner_la_SOURCES = klib/drand48.c scanner.c libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test @@ -38,27 +38,27 @@ address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise. address_parser_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_CFLAGS = $(CFLAGS_O3) -build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c +build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_CFLAGS = $(CFLAGS_O3) -build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c +build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c build_numex_table_CFLAGS = $(CFLAGS_O3) -build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c +build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c build_trans_table_CFLAGS = $(CFLAGS_O3) -address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c +address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_train_CFLAGS = $(CFLAGS_O3) -address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c +address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_test_CFLAGS = $(CFLAGS_O3) -language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c +language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_train_CFLAGS = $(CFLAGS_O3) -language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_CFLAGS = $(CFLAGS_O3) -language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_test_CFLAGS = $(CFLAGS_O3) diff --git a/windows/src/Makefile.am b/windows/src/Makefile.am index 7ee29385..906211fa 100644 --- a/windows/src/Makefile.am +++ b/windows/src/Makefile.am @@ -1,6 +1,4 @@ -# this version of the makefile skips building the programs. It only builds the libraries and downloads data so you can use the API. - -# Inherited from autoconf / user-specified +# Inherited from autoconf / user-specified CFLAGS_CONF = @CFLAGS@ CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) CFLAGS_O0 = $(CFLAGS_BASE) -O0 @@ -11,7 +9,6 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include # Wonky but have to be able to override the user's optimization level to compile the scanner # as it takes an unreasonably long time to compile with the optimizer on. -#EDIT - add UTF8PROC_EXPORTS so builds on windows CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS lib_LTLIBRARIES = libpostal.la @@ -29,8 +26,42 @@ noinst_LTLIBRARIES = libscanner.la libscanner_la_SOURCES = klib/drand48.c scanner.c libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) +noinst_PROGRAMS = libpostal bench address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test + +libpostal_SOURCES = main.c json_encode.c +libpostal_LDADD = libpostal.la +libpostal_CFLAGS = $(CFLAGS_O3) +bench_SOURCES = bench.c +bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) +bench_CFLAGS = $(CFLAGS_O3) +#address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c +#address_parser_LDADD = libscanner.la $(CBLAS_LIBS) +#address_parser_CFLAGS = $(CFLAGS_O3) + +build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c +build_address_dictionary_CFLAGS = $(CFLAGS_O3) +build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c +build_numex_table_CFLAGS = $(CFLAGS_O3) +build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c +build_trans_table_CFLAGS = $(CFLAGS_O3) +address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c +address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) +address_parser_train_CFLAGS = $(CFLAGS_O3) + +address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c +address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) +address_parser_test_CFLAGS = $(CFLAGS_O3) + +language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c +language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) +language_classifier_train_CFLAGS = $(CFLAGS_O3) +language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) +language_classifier_CFLAGS = $(CFLAGS_O3) +language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) +language_classifier_test_CFLAGS = $(CFLAGS_O3) -# program building skipped here pkginclude_HEADERS = libpostal.h From f07ab765cbc345c959a18436749af9ca1e6ce9d2 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Mon, 20 Nov 2017 20:58:37 +0000 Subject: [PATCH 04/11] Adding the export marker to all functions used in tests. --- src/features.c | 2 +- src/features.h | 3 ++- src/file_utils.c | 2 +- src/file_utils.h | 3 ++- src/numex.c | 6 ++--- src/numex.h | 9 ++++--- src/scanner.c | 2 +- src/scanner.h | 3 ++- src/scanner.re | 2 +- src/string_utils.c | 60 ++++++++++++++++++++++---------------------- src/string_utils.h | 61 +++++++++++++++++++++++---------------------- src/transliterate.c | 6 ++--- src/transliterate.h | 7 +++--- src/trie.c | 8 +++--- src/trie.h | 9 ++++--- src/trie_search.c | 2 +- src/trie_search.h | 3 ++- 17 files changed, 98 insertions(+), 90 deletions(-) diff --git a/src/features.c b/src/features.c index ada3586b..066852bc 100644 --- a/src/features.c +++ b/src/features.c @@ -1,7 +1,7 @@ #include "features.h" -void feature_array_add(cstring_array *features, size_t count, ...) { +LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...) { if (count <= 0) { return; } diff --git a/src/features.h b/src/features.h index 6f99ae3e..fb551c83 100644 --- a/src/features.h +++ b/src/features.h @@ -5,12 +5,13 @@ #include #include "collections.h" #include "string_utils.h" +#include "export.h" #define FEATURE_SEPARATOR_CHAR "|" // Add feature to array -void feature_array_add(cstring_array *features, size_t count, ...); +LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...); // Add feature using printf format void feature_array_add_printf(cstring_array *features, char *format, ...); diff --git a/src/file_utils.c b/src/file_utils.c index f25e5ee6..497128f6 100644 --- a/src/file_utils.c +++ b/src/file_utils.c @@ -1,6 +1,6 @@ #include "file_utils.h" -char *file_getline(FILE * f) +LIBPOSTAL_EXPORT char *file_getline(FILE * f) { char buf[BUFSIZ]; diff --git a/src/file_utils.h b/src/file_utils.h index ba9b9511..85d7cdcd 100644 --- a/src/file_utils.h +++ b/src/file_utils.h @@ -8,6 +8,7 @@ #include #include +#include "export.h" #include "libpostal_config.h" #include "string_utils.h" @@ -52,7 +53,7 @@ #define COMMA_SEPARATOR "," #define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR) -char *file_getline(FILE * f); +LIBPOSTAL_EXPORT char *file_getline(FILE * f); bool file_exists(char *filename); diff --git a/src/numex.c b/src/numex.c index 107768fa..9c7e60e1 100644 --- a/src/numex.c +++ b/src/numex.c @@ -599,7 +599,7 @@ bool numex_module_init(void) { Must be called only once before the module can be used */ -bool numex_module_setup(char *filename) { +LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) { if (numex_table == NULL) { return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename); } @@ -610,7 +610,7 @@ bool numex_module_setup(char *filename) { Called once when done with the module (usually at the end of a main method) */ -void numex_module_teardown(void) { +LIBPOSTAL_EXPORT void numex_module_teardown(void) { numex_table_destroy(); numex_table = NULL; } @@ -1101,7 +1101,7 @@ size_t ordinal_suffix_len(char *str, size_t len, char *lang) { return 0; } -char *replace_numeric_expressions(char *str, char *lang) { +LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang) { numex_result_array *results = convert_numeric_expressions(str, lang); if (results == NULL) return NULL; diff --git a/src/numex.h b/src/numex.h index d80f96e1..7a97b237 100644 --- a/src/numex.h +++ b/src/numex.h @@ -19,6 +19,7 @@ #include "tokens.h" #include "trie.h" #include "trie_search.h" +#include "export.h" #define NUMEX_DATA_FILE "numex.dat" #define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE @@ -146,7 +147,7 @@ typedef struct numex_result { VECTOR_INIT(numex_result_array, numex_result_t) -char *replace_numeric_expressions(char *str, char *lang); +LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang); numex_result_array *convert_numeric_expressions(char *str, char *lang); size_t ordinal_suffix_len(char *s, size_t len, char *lang); size_t possible_ordinal_digit_len(char *str, size_t len); @@ -155,9 +156,9 @@ bool numex_table_write(FILE *file); bool numex_table_save(char *filename); bool numex_module_init(void); -bool numex_module_setup(char *filename); -void numex_module_teardown(void); +LIBPOSTAL_EXPORT bool numex_module_setup(char *filename); +LIBPOSTAL_EXPORT void numex_module_teardown(void); + - #endif diff --git a/src/scanner.c b/src/scanner.c index 736fb07a..382c701c 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -310240,7 +310240,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo } -token_array *tokenize_keep_whitespace(const char *input) { +LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) { token_array *tokens = token_array_new(); tokenize_add_tokens(tokens, input, strlen(input), true); return tokens; diff --git a/src/scanner.h b/src/scanner.h index ac113682..e528a7d0 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -9,6 +9,7 @@ #include "token_types.h" #include "tokens.h" +#include "export.h" typedef struct scanner { unsigned char *src, *cursor, *start, *end; @@ -19,7 +20,7 @@ uint16_t scan_token(scanner_t *s); scanner_t scanner_from_string(const char *input, size_t len); void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace); -token_array *tokenize_keep_whitespace(const char *input); +LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input); token_array *tokenize(const char *input); diff --git a/src/scanner.re b/src/scanner.re index eae1286d..fd6d18c0 100644 --- a/src/scanner.re +++ b/src/scanner.re @@ -255,7 +255,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo } -token_array *tokenize_keep_whitespace(const char *input) { +LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) { token_array *tokens = token_array_new(); tokenize_add_tokens(tokens, input, strlen(input), true); return tokens; diff --git a/src/string_utils.c b/src/string_utils.c index b337de47..8bc81354 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -57,7 +57,7 @@ inline size_t string_common_suffix(const char *str1, const char *str2) { return common_suffix; } -inline bool string_starts_with(const char *str, const char *start) { +LIBPOSTAL_EXPORT inline bool string_starts_with(const char *str, const char *start) { for (; *start; str++, start++) if (*str != *start) return false; @@ -71,7 +71,7 @@ inline bool string_ends_with(const char *str, const char *ending) { return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending); } -inline bool string_equals(const char *s1, const char *s2) { +LIBPOSTAL_EXPORT inline bool string_equals(const char *s1, const char *s2) { if (s1 == NULL || s2 == NULL) return false; return strcmp(s1, s2) == 0; } @@ -168,7 +168,7 @@ uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_re return num_replacements; } -ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) { +LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) { ssize_t len = 0; const uint8_t *ptr = str + start; @@ -187,7 +187,7 @@ ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *ds return ret_len; } -char *utf8_reversed_string(const char *s) { +LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s) { int32_t unich; ssize_t len, remaining; @@ -477,7 +477,7 @@ size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *st } -inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) { +LIBPOSTAL_EXPORT inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) { return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2)); } @@ -605,7 +605,7 @@ size_t string_left_spaces_len(char *str, size_t len) { return spaces; } -char *string_trim(char *str) { +LIBPOSTAL_EXPORT char *string_trim(char *str) { size_t len = strlen(str); size_t left_spaces = string_left_spaces_len(str, len); size_t right_spaces = string_right_spaces_len(str, len); @@ -629,14 +629,14 @@ char_array *char_array_from_string_no_copy(char *str, size_t n) { return array; } -inline char *char_array_get_string(char_array *array) { +LIBPOSTAL_EXPORT inline char *char_array_get_string(char_array *array) { if (array->n == 0 || array->a[array->n - 1] != '\0') { char_array_terminate(array); } return array->a; } -inline char *char_array_to_string(char_array *array) { +LIBPOSTAL_EXPORT inline char *char_array_to_string(char_array *array) { if (array->n == 0 || array->a[array->n - 1] != '\0') { char_array_terminate(array); } @@ -661,7 +661,7 @@ inline size_t char_array_len(char_array *array) { } } -inline void char_array_append(char_array *array, char *str) { +LIBPOSTAL_EXPORT inline void char_array_append(char_array *array, char *str) { while(*str) { char_array_push(array, *str++); } @@ -695,11 +695,11 @@ inline void char_array_append_reversed(char_array *array, char *str) { char_array_append_reversed_len(array, str, len); } -inline void char_array_terminate(char_array *array) { +LIBPOSTAL_EXPORT inline void char_array_terminate(char_array *array) { char_array_push(array, '\0'); } -inline void char_array_cat(char_array *array, char *str) { +LIBPOSTAL_EXPORT inline void char_array_cat(char_array *array, char *str) { char_array_strip_nul_byte(array); char_array_append(array, str); char_array_terminate(array); @@ -712,7 +712,7 @@ inline void char_array_cat_len(char_array *array, char *str, size_t len) { } -inline void char_array_cat_reversed(char_array *array, char *str) { +LIBPOSTAL_EXPORT inline void char_array_cat_reversed(char_array *array, char *str) { char_array_strip_nul_byte(array); char_array_append_reversed(array, str); char_array_terminate(array); @@ -763,7 +763,7 @@ void char_array_add_vjoined(char_array *array, char *separator, bool strip_separ } -inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) { +LIBPOSTAL_EXPORT inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) { va_list args; va_start(args, count); char_array_add_vjoined(array, separator, strip_separator, count, args); @@ -807,14 +807,14 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args) { } } -void char_array_cat_printf(char_array *array, char *format, ...) { +LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...) { va_list args; va_start(args, format); char_array_cat_vprintf(array, format, args); va_end(args); } -cstring_array *cstring_array_new(void) { +LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) { cstring_array *array = malloc(sizeof(cstring_array)); if (array == NULL) return NULL; @@ -833,7 +833,7 @@ cstring_array *cstring_array_new(void) { return array; } -void cstring_array_destroy(cstring_array *self) { +LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self) { if (self == NULL) return; if (self->indices) { uint32_array_destroy(self->indices); @@ -888,7 +888,7 @@ inline size_t cstring_array_used(cstring_array *self) { return self->str->n; } -inline size_t cstring_array_num_strings(cstring_array *self) { +LIBPOSTAL_EXPORT inline size_t cstring_array_num_strings(cstring_array *self) { if (self == NULL) return 0; return self->indices->n; } @@ -957,13 +957,13 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) { return (int32_t)self->indices->a[i]; } -inline char *cstring_array_get_string(cstring_array *self, uint32_t i) { +LIBPOSTAL_EXPORT inline char *cstring_array_get_string(cstring_array *self, uint32_t i) { int32_t data_index = cstring_array_get_offset(self, i); if (data_index < 0) return NULL; return self->str->a + data_index; } -inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) { +LIBPOSTAL_EXPORT inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) { if (INVALID_INDEX(i, self->indices->n)) { return -1; } @@ -1014,7 +1014,7 @@ cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *sep } -cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) { +LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) { *count = 0; char *ptr = str; size_t len = strlen(str); @@ -1033,7 +1033,7 @@ cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *co } -char **cstring_array_to_strings(cstring_array *self) { +LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self) { char **strings = malloc(self->indices->n * sizeof(char *)); for (int i = 0; i < cstring_array_num_strings(self); i++) { @@ -1072,7 +1072,7 @@ string_tree_t *string_tree_new_size(size_t size) { #define DEFAULT_STRING_TREE_SIZE 8 -string_tree_t *string_tree_new(void) { +LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void) { return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE); } @@ -1084,12 +1084,12 @@ inline char *string_tree_get_alternative(string_tree_t *self, size_t token_index return cstring_array_get_string(self->strings, token_start + alternative); } -inline void string_tree_finalize_token(string_tree_t *self) { +LIBPOSTAL_EXPORT inline void string_tree_finalize_token(string_tree_t *self) { uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings)); } // terminated -inline void string_tree_add_string(string_tree_t *self, char *str) { +LIBPOSTAL_EXPORT inline void string_tree_add_string(string_tree_t *self, char *str) { cstring_array_add_string(self->strings, str); } @@ -1114,13 +1114,13 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) { return (uint32_t)cstring_array_num_strings(self->strings); } -inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) { +LIBPOSTAL_EXPORT inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) { if (i >= self->token_indices->n) return 0; uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i]; return n > 0 ? n : 1; } -void string_tree_destroy(string_tree_t *self) { +LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) { if (self == NULL) return; if (self->token_indices != NULL) { @@ -1134,7 +1134,7 @@ void string_tree_destroy(string_tree_t *self) { free(self); } -string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { +LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t)); self->tree = tree; @@ -1165,7 +1165,7 @@ string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { return self; } -void string_tree_iterator_next(string_tree_iterator_t *self) { +LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self) { if (self->remaining > 0) { int i; for (i = self->num_tokens - 1; i >= 0; i--) { @@ -1194,11 +1194,11 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i) return cstring_array_get_string(self->tree->strings, base_index + offset); } -bool string_tree_iterator_done(string_tree_iterator_t *self) { +LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self) { return self->remaining == 0; } -void string_tree_iterator_destroy(string_tree_iterator_t *self) { +LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self) { if (self == NULL) return; if (self->path) { diff --git a/src/string_utils.h b/src/string_utils.h index 91a56b0b..cf073922 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -17,6 +17,7 @@ Utilities for manipulating strings in C. #include "utf8proc/utf8proc.h" #include "vector.h" #include "strndup.h" +#include "export.h" #define MAX_UTF8_CHAR_SIZE 4 @@ -60,16 +61,16 @@ char *string_replace_char(char *str, char c1, char c2); bool string_replace_with_array(char *str, char *replace, char *with, char_array *result); char *string_replace(char *str, char *replace, char *with); -bool string_starts_with(const char *str, const char *start); +LIBPOSTAL_EXPORT bool string_starts_with(const char *str, const char *start); bool string_ends_with(const char *str, const char *ending); -bool string_equals(const char *s1, const char *s2); +LIBPOSTAL_EXPORT bool string_equals(const char *s1, const char *s2); uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len); // UTF-8 string methods -char *utf8_reversed_string(const char *s); // returns a copy, caller frees -ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); +LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s); // returns a copy, caller frees +LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); // Casing functions return a copy, caller frees char *utf8_lower_options(const char *s, utf8proc_option_t options); @@ -81,7 +82,7 @@ int utf8_compare(const char *str1, const char *str2); int utf8_compare_len(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix(const char *str1, const char *str2); size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len); -size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); +LIBPOSTAL_EXPORT size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len); bool utf8_is_hyphen(int32_t ch); @@ -100,7 +101,7 @@ ssize_t string_next_hyphen_index(char *str, size_t len); bool string_contains_hyphen(char *str); bool string_contains_hyphen_len(char *str, size_t len); -char *string_trim(char *str); +LIBPOSTAL_EXPORT char *string_trim(char *str); /* char_array is a dynamic character array defined in collections.h but has a few additional methods related to string manipulation. @@ -113,40 +114,40 @@ char_array *char_array_from_string(char *str); char_array *char_array_from_string_no_copy(char *str, size_t n); // Gets the underlying C string for a char_array -char *char_array_get_string(char_array *array); +LIBPOSTAL_EXPORT char *char_array_get_string(char_array *array); // Frees the char_array and returns a standard NUL-terminated string -char *char_array_to_string(char_array *array); +LIBPOSTAL_EXPORT char *char_array_to_string(char_array *array); // Can use strlen(array->a) but this is faster size_t char_array_len(char_array *array); // append_* methods do not NUL-terminate -void char_array_append(char_array *array, char *str); +LIBPOSTAL_EXPORT void char_array_append(char_array *array, char *str); void char_array_append_len(char_array *array, char *str, size_t len); void char_array_append_reversed(char_array *array, char *str); void char_array_append_reversed_len(char_array *array, char *str, size_t len); // add NUL terminator to a char_array void char_array_strip_nul_byte(char_array *array); -void char_array_terminate(char_array *array); +LIBPOSTAL_EXPORT void char_array_terminate(char_array *array); // add_* methods NUL-terminate without stripping NUL-byte void char_array_add(char_array *array, char *str); void char_array_add_len(char_array *array, char *str, size_t len); // Similar to strcat but with dynamic resizing, guaranteed NUL-terminated -void char_array_cat(char_array *array, char *str); +LIBPOSTAL_EXPORT void char_array_cat(char_array *array, char *str); void char_array_cat_len(char_array *array, char *str, size_t len); -void char_array_cat_reversed(char_array *array, char *str); +LIBPOSTAL_EXPORT void char_array_cat_reversed(char_array *array, char *str); void char_array_cat_reversed_len(char_array *array, char *str, size_t len); // Similar to cat methods but with printf args void char_array_cat_vprintf(char_array *array, char *format, va_list args); -void char_array_cat_printf(char_array *array, char *format, ...); +LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...); // Mainly for paths or delimited strings void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); -void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); +LIBPOSTAL_EXPORT void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); @@ -171,13 +172,13 @@ typedef struct { char_array *str; } cstring_array; -cstring_array *cstring_array_new(void); +LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void); cstring_array *cstring_array_new_size(size_t size); size_t cstring_array_capacity(cstring_array *self); size_t cstring_array_used(cstring_array *self); -size_t cstring_array_num_strings(cstring_array *self); +LIBPOSTAL_EXPORT size_t cstring_array_num_strings(cstring_array *self); void cstring_array_resize(cstring_array *self, size_t size); void cstring_array_clear(cstring_array *self); @@ -185,7 +186,7 @@ cstring_array *cstring_array_from_char_array(char_array *str); cstring_array *cstring_array_from_strings(char **strings, size_t n); // Convert cstring_array to an array of n C strings and destroy the cstring_array -char **cstring_array_to_strings(cstring_array *self); +LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self); // Split on delimiter cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count); @@ -193,7 +194,7 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count); // Split on delimiter by replacing (single character) separator with the NUL byte in the original string -cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count); +LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count); uint32_t cstring_array_start_token(cstring_array *self); uint32_t cstring_array_add_string(cstring_array *self, char *str); @@ -207,10 +208,10 @@ void cstring_array_cat_string_len(cstring_array *self, char *str, size_t len); void cstring_array_terminate(cstring_array *self); int32_t cstring_array_get_offset(cstring_array *self, uint32_t i); -char *cstring_array_get_string(cstring_array *self, uint32_t i); -int64_t cstring_array_token_length(cstring_array *self, uint32_t i); +LIBPOSTAL_EXPORT char *cstring_array_get_string(cstring_array *self, uint32_t i); +LIBPOSTAL_EXPORT int64_t cstring_array_token_length(cstring_array *self, uint32_t i); -void cstring_array_destroy(cstring_array *self); +LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self); #define cstring_array_foreach(array, i, s, code) { \ for (int __si = 0; __si < array->indices->n; __si++) { \ @@ -246,16 +247,16 @@ typedef struct string_tree { cstring_array *strings; } string_tree_t; -string_tree_t *string_tree_new(void); +LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void); string_tree_t *string_tree_new_size(size_t size); // get char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative); // finalize -void string_tree_finalize_token(string_tree_t *self); +LIBPOSTAL_EXPORT void string_tree_finalize_token(string_tree_t *self); // terminated -void string_tree_add_string(string_tree_t *self, char *str); +LIBPOSTAL_EXPORT void string_tree_add_string(string_tree_t *self, char *str); void string_tree_add_string_len(string_tree_t *self, char *str, size_t len); // unterminated void string_tree_append_string(string_tree_t *self, char *str); @@ -264,9 +265,9 @@ void string_tree_append_string_len(string_tree_t *self, char *str, size_t len); uint32_t string_tree_num_tokens(string_tree_t *self); uint32_t string_tree_num_strings(string_tree_t *self); -uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i); +LIBPOSTAL_EXPORT uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i); -void string_tree_destroy(string_tree_t *self); +LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self); typedef struct string_tree_iterator { string_tree_t *tree; @@ -275,11 +276,11 @@ typedef struct string_tree_iterator { uint32_t remaining; } string_tree_iterator_t; -string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree); -void string_tree_iterator_next(string_tree_iterator_t *self); +LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree); +LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self); char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i); -bool string_tree_iterator_done(string_tree_iterator_t *self); -void string_tree_iterator_destroy(string_tree_iterator_t *self); +LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self); +LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self); #define string_tree_iterator_foreach_token(iter, s, code) { \ diff --git a/src/transliterate.c b/src/transliterate.c index 368356f3..7f25a449 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -665,7 +665,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca return char_array_to_string(ret); } -char *transliterate(char *trans_name, char *str, size_t len) { +LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len) { if (trans_name == NULL || str == NULL) return NULL; transliteration_table_t *trans_table = get_transliteration_table(); @@ -1977,7 +1977,7 @@ bool transliteration_module_init(void) { return trans_table != NULL; } -bool transliteration_module_setup(char *filename) { +LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) { if (trans_table == NULL) { return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename); } @@ -1986,7 +1986,7 @@ bool transliteration_module_setup(char *filename) { } -void transliteration_module_teardown(void) { +LIBPOSTAL_EXPORT void transliteration_module_teardown(void) { transliteration_table_destroy(); trans_table = NULL; } diff --git a/src/transliterate.h b/src/transliterate.h index ffbf2e79..d1c5308f 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -13,6 +13,7 @@ #include "trie_search.h" #include "unicode_scripts.h" #include "strndup.h" +#include "export.h" #define LATIN_ASCII "latin-ascii" #define LATIN_ASCII_SIMPLE "latin-ascii-simple" @@ -152,7 +153,7 @@ void transliterator_destroy(transliterator_t *self); bool transliteration_table_add_transliterator(transliterator_t *trans); transliterator_t *get_transliterator(char *name); -char *transliterate(char *trans_name, char *str, size_t len); +LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len); bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index); transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language); @@ -172,7 +173,7 @@ bool transliteration_table_save(char *filename); // Module setup/teardown bool transliteration_module_init(void); -bool transliteration_module_setup(char *filename); -void transliteration_module_teardown(void); +LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename); +LIBPOSTAL_EXPORT void transliteration_module_teardown(void); #endif diff --git a/src/trie.c b/src/trie.c index 2e11ba6e..15db41e6 100644 --- a/src/trie.c +++ b/src/trie.c @@ -96,7 +96,7 @@ trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size) { return self; } -trie_t *trie_new(void) { +LIBPOSTAL_EXPORT trie_t *trie_new(void) { return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET)); } @@ -661,7 +661,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, ui } -inline bool trie_add(trie_t *self, char *key, uint32_t data) { +LIBPOSTAL_EXPORT inline bool trie_add(trie_t *self, char *key, uint32_t data) { size_t len = strlen(key); if (len == 0) return false; return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data); @@ -754,7 +754,7 @@ inline bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data return true; } -inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) { +LIBPOSTAL_EXPORT inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) { uint32_t node_id = trie_get(self, key); return trie_get_data_at_index(self, node_id, data); } @@ -899,7 +899,7 @@ inline uint32_t trie_num_keys(trie_t *self) { /* Destructor */ -void trie_destroy(trie_t *self) { +LIBPOSTAL_EXPORT void trie_destroy(trie_t *self) { if (!self) return; diff --git a/src/trie.h b/src/trie.h index d2f8519e..be3d1c64 100644 --- a/src/trie.h +++ b/src/trie.h @@ -33,6 +33,7 @@ #include "klib/kvec.h" #include "log/log.h" #include "string_utils.h" +#include "export.h" #define TRIE_SIGNATURE 0xABABABAB #define NULL_NODE_ID 0 @@ -79,7 +80,7 @@ typedef struct trie { } trie_t; trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size); -trie_t *trie_new(void); +LIBPOSTAL_EXPORT trie_t *trie_new(void); uint32_t trie_get_char_index(trie_t *self, unsigned char c); uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c); @@ -97,7 +98,7 @@ trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node); bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node); bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data); -bool trie_get_data(trie_t *self, char *key, uint32_t *data); +LIBPOSTAL_EXPORT bool trie_get_data(trie_t *self, char *key, uint32_t *data); bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data); bool trie_set_data(trie_t *self, char *key, uint32_t data); @@ -113,7 +114,7 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data); bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data); -bool trie_add(trie_t *self, char *key, uint32_t data); +LIBPOSTAL_EXPORT bool trie_add(trie_t *self, char *key, uint32_t data); bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data); bool trie_add_suffix(trie_t *self, char *key, uint32_t data); bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data); @@ -146,7 +147,7 @@ bool trie_save(trie_t *self, char *path); trie_t *trie_read(FILE *file); trie_t *trie_load(char *path); -void trie_destroy(trie_t *self); +LIBPOSTAL_EXPORT void trie_destroy(trie_t *self); diff --git a/src/trie_search.c b/src/trie_search.c index 8518db89..233ab780 100644 --- a/src/trie_search.c +++ b/src/trie_search.c @@ -465,7 +465,7 @@ inline bool trie_search_tokens_with_phrases(trie_t *self, char *str, token_array return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases); } -inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) { +LIBPOSTAL_EXPORT inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) { phrase_array *phrases = NULL; if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) { return NULL; diff --git a/src/trie_search.h b/src/trie_search.h index df1817e7..09cce48f 100644 --- a/src/trie_search.h +++ b/src/trie_search.h @@ -16,6 +16,7 @@ #include "tokens.h" #include "vector.h" #include "utf8proc/utf8proc.h" +#include "export.h" typedef struct phrase { uint32_t start; @@ -31,7 +32,7 @@ VECTOR_INIT(phrase_array, phrase_t) phrase_array *trie_search(trie_t *self, char *text); bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases); bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases); -phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens); +LIBPOSTAL_EXPORT phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens); bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases); bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases); phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id); From d205f4d2bb91136778c3305b64ef9ffd54c9e13c Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Thu, 23 Nov 2017 02:24:06 +0000 Subject: [PATCH 05/11] Adding artifacts to AppVeyor config. --- .appveyor.yml | 7 +++++++ win_build.bat | 2 ++ 2 files changed, 9 insertions(+) diff --git a/.appveyor.yml b/.appveyor.yml index 59702dd3..7641ecaa 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -21,3 +21,10 @@ build_script: test_script: - '%APPVEYOR_BUILD_FOLDER%\test\test_libpostal.exe' + +after_build: + - 7z a libpostal.zip %APPVEYOR_BUILD_FOLDER%\libpostal.dll %APPVEYOR_BUILD_FOLDER%\libpostal.def %APPVEYOR_BUILD_FOLDER%\libpostal.exp %APPVEYOR_BUILD_FOLDER%\libpostal.lib + +artifacts: + - path: libpostal.zip + name: libpostal diff --git a/win_build.bat b/win_build.bat index f8f1b7b5..8b0db748 100644 --- a/win_build.bat +++ b/win_build.bat @@ -19,4 +19,6 @@ IF %COMPILER%==msys2 ( bash -lc "cd $APPVEYOR_BUILD_FOLDER && ./configure --datadir=/c" bash -lc "cd $APPVEYOR_BUILD_FOLDER && make" bash -lc "cd $APPVEYOR_BUILD_FOLDER && make install" + bash -lc "cd $APPVEYOR_BUILD_FOLDER && cp src/.libs/libpostal-*.dll libpostal.dll" + "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\lib.exe" /def:libpostal.def /out:libpostal.lib /machine:x64 ) From f0246e7333c379c3f0adb74a7e6928cc83aaf554 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Thu, 23 Nov 2017 19:11:25 +0000 Subject: [PATCH 06/11] Fix bug in strndup fix for windows. Move all includes out of headers and into code for strndup.h and move it to be the last include. --- src/Makefile.am | 18 +++++++++--------- src/normalize.c | 1 + src/normalize.h | 1 - src/string_utils.c | 1 + src/string_utils.h | 1 - src/strndup.c | 2 +- src/strndup.h | 9 +++++++-- src/tokens.c | 1 + src/tokens.h | 5 ++--- src/transliterate.c | 1 + src/transliterate.h | 1 - 11 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 15d90d79..b5d3a10c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS lib_LTLIBRARIES = libpostal.la -libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c +libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) libpostal_la_CFLAGS = $(CFLAGS_O2) libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined @@ -38,27 +38,27 @@ address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise. address_parser_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_CFLAGS = $(CFLAGS_O3) -build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c +build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_CFLAGS = $(CFLAGS_O3) -build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c +build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c build_numex_table_CFLAGS = $(CFLAGS_O3) -build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c +build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c build_trans_table_CFLAGS = $(CFLAGS_O3) -address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c +address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_train_CFLAGS = $(CFLAGS_O3) -address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c +address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_test_CFLAGS = $(CFLAGS_O3) -language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c +language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_train_CFLAGS = $(CFLAGS_O3) -language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_CFLAGS = $(CFLAGS_O3) -language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_test_CFLAGS = $(CFLAGS_O3) diff --git a/src/normalize.c b/src/normalize.c index 076b6e56..802c6d9a 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -1,4 +1,5 @@ #include "normalize.h" +#include "strndup.h" #define FULL_STOP_CODEPOINT 0x002e #define APOSTROPHE_CODEPOINT 0x0027 diff --git a/src/normalize.h b/src/normalize.h index ea5cf864..d485f67f 100644 --- a/src/normalize.h +++ b/src/normalize.h @@ -38,7 +38,6 @@ As well as normalizations for individual string tokens: #include "trie.h" #include "tokens.h" #include "vector.h" -#include "strndup.h" #define NORMALIZE_STRING_LATIN_ASCII 1 << 0 #define NORMALIZE_STRING_TRANSLITERATE 1 << 1 diff --git a/src/string_utils.c b/src/string_utils.c index 8bc81354..f18e7722 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -1,6 +1,7 @@ #include #include "log/log.h" #include "string_utils.h" +#include "strndup.h" #define INVALID_INDEX(i, n) ((i) < 0 || (i) >= (n)) diff --git a/src/string_utils.h b/src/string_utils.h index cf073922..9b771b31 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -16,7 +16,6 @@ Utilities for manipulating strings in C. #include "collections.h" #include "utf8proc/utf8proc.h" #include "vector.h" -#include "strndup.h" #include "export.h" #define MAX_UTF8_CHAR_SIZE 4 diff --git a/src/strndup.c b/src/strndup.c index 90feafe6..d02657d6 100644 --- a/src/strndup.c +++ b/src/strndup.c @@ -13,4 +13,4 @@ char *strndup(const char *s, size_t n) return new; } -#endif /* HAVE_STRNDUP */ \ No newline at end of file +#endif /* HAVE_STRNDUP */ diff --git a/src/strndup.h b/src/strndup.h index 893fbcbd..69dea1a7 100644 --- a/src/strndup.h +++ b/src/strndup.h @@ -1,6 +1,11 @@ +#ifndef STRNDUP_H +#define STRNDUP_H + +#include + #ifndef HAVE_STRNDUP -#define HAVE_STRNDUP char *strndup(const char *s, size_t n); -#endif /* HAVE_STRNDUP */ \ No newline at end of file +#endif /* HAVE_STRNDUP */ +#endif /* STRNDUP_H */ diff --git a/src/tokens.c b/src/tokens.c index e85183f1..310e861b 100644 --- a/src/tokens.c +++ b/src/tokens.c @@ -1,4 +1,5 @@ #include "tokens.h" +#include "strndup.h" tokenized_string_t *tokenized_string_new(void) { diff --git a/src/tokens.h b/src/tokens.h index 5b7739c5..045902b3 100644 --- a/src/tokens.h +++ b/src/tokens.h @@ -1,7 +1,7 @@ #ifndef TOKENS_H #define TOKENS_H - + #include #include @@ -11,7 +11,6 @@ #include "string_utils.h" #include "token_types.h" #include "vector.h" -#include "strndup.h" typedef struct token { size_t offset; @@ -36,6 +35,6 @@ char *tokenized_string_get_token(tokenized_string_t *self, uint32_t index); void tokenized_string_destroy(tokenized_string_t *self); - + #endif diff --git a/src/transliterate.c b/src/transliterate.c index 7f25a449..e8bc9805 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -3,6 +3,7 @@ #include "file_utils.h" #include "log/log.h" +#include "strndup.h" #define TRANSLITERATION_TABLE_SIGNATURE 0xAAAAAAAA diff --git a/src/transliterate.h b/src/transliterate.h index d1c5308f..1db351e6 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -12,7 +12,6 @@ #include "trie.h" #include "trie_search.h" #include "unicode_scripts.h" -#include "strndup.h" #include "export.h" #define LATIN_ASCII "latin-ascii" From 26ac9ab5c2a89c9b0e2ce5625e1249c5d3a3c722 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Sat, 25 Nov 2017 04:35:28 +0000 Subject: [PATCH 07/11] Removing EXPORT statements from all source files and most header files, leaving only the exports for the main API in libpostal.h. Modified Makefiles so that all the test apps build without having extra functions exported from libpostal. --- src/Makefile.am | 32 ++++++++++----------- src/export.h | 16 ----------- src/features.c | 2 +- src/features.h | 3 +- src/file_utils.c | 2 +- src/file_utils.h | 3 +- src/libpostal.c | 30 ++++++++++---------- src/libpostal.h | 13 ++++++++- src/numex.c | 6 ++-- src/numex.h | 9 +++--- src/scanner.c | 2 +- src/scanner.h | 3 +- src/scanner.re | 2 +- src/string_utils.c | 60 ++++++++++++++++++++-------------------- src/string_utils.h | 61 ++++++++++++++++++++--------------------- src/tokens.h | 4 +-- src/transliterate.c | 6 ++-- src/transliterate.h | 7 ++--- src/trie.c | 8 +++--- src/trie.h | 9 +++--- src/trie_search.c | 2 +- src/trie_search.h | 3 +- src/utf8proc/utf8proc.c | 42 ++++++++++++++-------------- src/utf8proc/utf8proc.h | 54 ++++++++++++++---------------------- test/Makefile.am | 4 +-- windows/src/Makefile.am | 14 +++++----- 26 files changed, 186 insertions(+), 211 deletions(-) delete mode 100644 src/export.h diff --git a/src/Makefile.am b/src/Makefile.am index b5d3a10c..6767219b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -9,12 +9,12 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include # Wonky but have to be able to override the user's optimization level to compile the scanner # as it takes an unreasonably long time to compile with the optimizer on. -CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS +CFLAGS = lib_LTLIBRARIES = libpostal.la -libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c +libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) -libpostal_la_CFLAGS = $(CFLAGS_O2) +libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined dist_bin_SCRIPTS = libpostal_data @@ -24,41 +24,41 @@ dist_bin_SCRIPTS = libpostal_data # -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). noinst_LTLIBRARIES = libscanner.la libscanner_la_SOURCES = klib/drand48.c scanner.c -libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) +libscanner_la_CFLAGS = $(CFLAGS_O0) -D LIBPOSTAL_EXPORTS $(CFLAGS_SCANNER_EXTRA) noinst_PROGRAMS = libpostal bench address_parser address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test -libpostal_SOURCES = main.c json_encode.c +libpostal_SOURCES = strndup.c main.c json_encode.c file_utils.c string_utils.c utf8proc/utf8proc.c libpostal_LDADD = libpostal.la libpostal_CFLAGS = $(CFLAGS_O3) bench_SOURCES = bench.c bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) bench_CFLAGS = $(CFLAGS_O3) -address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c -address_parser_LDADD = libscanner.la $(CBLAS_LIBS) +address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c +address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) address_parser_CFLAGS = $(CFLAGS_O3) -build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c +build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_CFLAGS = $(CFLAGS_O3) -build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c +build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c build_numex_table_CFLAGS = $(CFLAGS_O3) -build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c +build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c build_trans_table_CFLAGS = $(CFLAGS_O3) -address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c +address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_train_CFLAGS = $(CFLAGS_O3) -address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c -address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) +address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c +address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_test_CFLAGS = $(CFLAGS_O3) -language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c +language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_train_CFLAGS = $(CFLAGS_O3) -language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_CFLAGS = $(CFLAGS_O3) -language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) language_classifier_test_CFLAGS = $(CFLAGS_O3) diff --git a/src/export.h b/src/export.h deleted file mode 100644 index 2a5a490b..00000000 --- a/src/export.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef EXPORT_H -#define EXPORT_H - -#ifdef _WIN32 -#ifdef LIBPOSTAL_EXPORTS -#define LIBPOSTAL_EXPORT __declspec(dllexport) -#else -#define LIBPOSTAL_EXPORT __declspec(dllimport) -#endif -#elif __GNUC__ >= 4 -#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default"))) -#else -#define LIBPOSTAL_EXPORT -#endif - -#endif //EXPORT_H diff --git a/src/features.c b/src/features.c index 066852bc..ada3586b 100644 --- a/src/features.c +++ b/src/features.c @@ -1,7 +1,7 @@ #include "features.h" -LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...) { +void feature_array_add(cstring_array *features, size_t count, ...) { if (count <= 0) { return; } diff --git a/src/features.h b/src/features.h index fb551c83..6f99ae3e 100644 --- a/src/features.h +++ b/src/features.h @@ -5,13 +5,12 @@ #include #include "collections.h" #include "string_utils.h" -#include "export.h" #define FEATURE_SEPARATOR_CHAR "|" // Add feature to array -LIBPOSTAL_EXPORT void feature_array_add(cstring_array *features, size_t count, ...); +void feature_array_add(cstring_array *features, size_t count, ...); // Add feature using printf format void feature_array_add_printf(cstring_array *features, char *format, ...); diff --git a/src/file_utils.c b/src/file_utils.c index 497128f6..f25e5ee6 100644 --- a/src/file_utils.c +++ b/src/file_utils.c @@ -1,6 +1,6 @@ #include "file_utils.h" -LIBPOSTAL_EXPORT char *file_getline(FILE * f) +char *file_getline(FILE * f) { char buf[BUFSIZ]; diff --git a/src/file_utils.h b/src/file_utils.h index 85d7cdcd..ba9b9511 100644 --- a/src/file_utils.h +++ b/src/file_utils.h @@ -8,7 +8,6 @@ #include #include -#include "export.h" #include "libpostal_config.h" #include "string_utils.h" @@ -53,7 +52,7 @@ #define COMMA_SEPARATOR "," #define COMMA_SEPARATOR_LEN strlen(COMMA_SEPARATOR) -LIBPOSTAL_EXPORT char *file_getline(FILE * f); +char *file_getline(FILE * f); bool file_exists(char *filename); diff --git a/src/libpostal.c b/src/libpostal.c index 68dd01da..d226413e 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -57,7 +57,7 @@ static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = { .roman_numerals = true }; -LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void) { +libpostal_normalize_options_t libpostal_get_default_options(void) { return LIBPOSTAL_DEFAULT_OPTIONS; } @@ -942,7 +942,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_ char_array_destroy(temp_string); } -LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) { +char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) { options.address_components |= LIBPOSTAL_ADDRESS_ANY; uint64_t normalize_string_options = get_normalize_string_options(options); @@ -1021,14 +1021,14 @@ LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normaliz } -LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n) { +void libpostal_expansion_array_destroy(char **expansions, size_t n) { for (size_t i = 0; i < n; i++) { free(expansions[i]); } free(expansions); } -LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) { +void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) { if (self == NULL) return; for (size_t i = 0; i < self->num_components; i++) { @@ -1057,11 +1057,11 @@ static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIO .country = NULL }; -LIBPOSTAL_EXPORT inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) { +inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) { return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS; } -LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) { +libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) { libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country); if (parsed == NULL) { @@ -1073,7 +1073,7 @@ LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(ch return parsed; } -LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) { +bool libpostal_setup_datadir(char *datadir) { char *transliteration_path = NULL; char *numex_path = NULL; char *address_dictionary_path = NULL; @@ -1114,11 +1114,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) { return true; } -LIBPOSTAL_EXPORT bool libpostal_setup(void) { +bool libpostal_setup(void) { return libpostal_setup_datadir(NULL); } -LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) { +bool libpostal_setup_language_classifier_datadir(char *datadir) { char *language_classifier_dir = NULL; if (datadir != NULL) { @@ -1137,11 +1137,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) return true; } -LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void) { +bool libpostal_setup_language_classifier(void) { return libpostal_setup_language_classifier_datadir(NULL); } -LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) { +bool libpostal_setup_parser_datadir(char *datadir) { char *parser_dir = NULL; if (datadir != NULL) { @@ -1160,11 +1160,11 @@ LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) { return true; } -LIBPOSTAL_EXPORT bool libpostal_setup_parser(void) { +bool libpostal_setup_parser(void) { return libpostal_setup_parser_datadir(NULL); } -LIBPOSTAL_EXPORT void libpostal_teardown(void) { +void libpostal_teardown(void) { transliteration_module_teardown(); numex_module_teardown(); @@ -1172,10 +1172,10 @@ LIBPOSTAL_EXPORT void libpostal_teardown(void) { address_dictionary_module_teardown(); } -LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void) { +void libpostal_teardown_language_classifier(void) { language_classifier_module_teardown(); } -LIBPOSTAL_EXPORT void libpostal_teardown_parser(void) { +void libpostal_teardown_parser(void) { address_parser_module_teardown(); } diff --git a/src/libpostal.h b/src/libpostal.h index c844cfa2..4e62f745 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -9,7 +9,18 @@ extern "C" { #include #include #include -#include "export.h" + +#ifdef _WIN32 +#ifdef LIBPOSTAL_EXPORTS +#define LIBPOSTAL_EXPORT __declspec(dllexport) +#else +#define LIBPOSTAL_EXPORT __declspec(dllimport) +#endif +#elif __GNUC__ >= 4 +#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default"))) +#else +#define LIBPOSTAL_EXPORT +#endif #define LIBPOSTAL_MAX_LANGUAGE_LEN 4 diff --git a/src/numex.c b/src/numex.c index 9c7e60e1..107768fa 100644 --- a/src/numex.c +++ b/src/numex.c @@ -599,7 +599,7 @@ bool numex_module_init(void) { Must be called only once before the module can be used */ -LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) { +bool numex_module_setup(char *filename) { if (numex_table == NULL) { return numex_table_load(filename == NULL ? DEFAULT_NUMEX_PATH : filename); } @@ -610,7 +610,7 @@ LIBPOSTAL_EXPORT bool numex_module_setup(char *filename) { Called once when done with the module (usually at the end of a main method) */ -LIBPOSTAL_EXPORT void numex_module_teardown(void) { +void numex_module_teardown(void) { numex_table_destroy(); numex_table = NULL; } @@ -1101,7 +1101,7 @@ size_t ordinal_suffix_len(char *str, size_t len, char *lang) { return 0; } -LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang) { +char *replace_numeric_expressions(char *str, char *lang) { numex_result_array *results = convert_numeric_expressions(str, lang); if (results == NULL) return NULL; diff --git a/src/numex.h b/src/numex.h index 7a97b237..d80f96e1 100644 --- a/src/numex.h +++ b/src/numex.h @@ -19,7 +19,6 @@ #include "tokens.h" #include "trie.h" #include "trie_search.h" -#include "export.h" #define NUMEX_DATA_FILE "numex.dat" #define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE @@ -147,7 +146,7 @@ typedef struct numex_result { VECTOR_INIT(numex_result_array, numex_result_t) -LIBPOSTAL_EXPORT char *replace_numeric_expressions(char *str, char *lang); +char *replace_numeric_expressions(char *str, char *lang); numex_result_array *convert_numeric_expressions(char *str, char *lang); size_t ordinal_suffix_len(char *s, size_t len, char *lang); size_t possible_ordinal_digit_len(char *str, size_t len); @@ -156,9 +155,9 @@ bool numex_table_write(FILE *file); bool numex_table_save(char *filename); bool numex_module_init(void); -LIBPOSTAL_EXPORT bool numex_module_setup(char *filename); -LIBPOSTAL_EXPORT void numex_module_teardown(void); - +bool numex_module_setup(char *filename); +void numex_module_teardown(void); + #endif diff --git a/src/scanner.c b/src/scanner.c index 382c701c..736fb07a 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -310240,7 +310240,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo } -LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) { +token_array *tokenize_keep_whitespace(const char *input) { token_array *tokens = token_array_new(); tokenize_add_tokens(tokens, input, strlen(input), true); return tokens; diff --git a/src/scanner.h b/src/scanner.h index e528a7d0..ac113682 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -9,7 +9,6 @@ #include "token_types.h" #include "tokens.h" -#include "export.h" typedef struct scanner { unsigned char *src, *cursor, *start, *end; @@ -20,7 +19,7 @@ uint16_t scan_token(scanner_t *s); scanner_t scanner_from_string(const char *input, size_t len); void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace); -LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input); +token_array *tokenize_keep_whitespace(const char *input); token_array *tokenize(const char *input); diff --git a/src/scanner.re b/src/scanner.re index fd6d18c0..eae1286d 100644 --- a/src/scanner.re +++ b/src/scanner.re @@ -255,7 +255,7 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, boo } -LIBPOSTAL_EXPORT token_array *tokenize_keep_whitespace(const char *input) { +token_array *tokenize_keep_whitespace(const char *input) { token_array *tokens = token_array_new(); tokenize_add_tokens(tokens, input, strlen(input), true); return tokens; diff --git a/src/string_utils.c b/src/string_utils.c index f18e7722..b8f3abf0 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -58,7 +58,7 @@ inline size_t string_common_suffix(const char *str1, const char *str2) { return common_suffix; } -LIBPOSTAL_EXPORT inline bool string_starts_with(const char *str, const char *start) { +inline bool string_starts_with(const char *str, const char *start) { for (; *start; str++, start++) if (*str != *start) return false; @@ -72,7 +72,7 @@ inline bool string_ends_with(const char *str, const char *ending) { return str_len < end_len ? false : !strcmp(str + str_len - end_len, ending); } -LIBPOSTAL_EXPORT inline bool string_equals(const char *s1, const char *s2) { +inline bool string_equals(const char *s1, const char *s2) { if (s1 == NULL || s2 == NULL) return false; return strcmp(s1, s2) == 0; } @@ -169,7 +169,7 @@ uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_re return num_replacements; } -LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) { +ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst) { ssize_t len = 0; const uint8_t *ptr = str + start; @@ -188,7 +188,7 @@ LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t s return ret_len; } -LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s) { +char *utf8_reversed_string(const char *s) { int32_t unich; ssize_t len, remaining; @@ -478,7 +478,7 @@ size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *st } -LIBPOSTAL_EXPORT inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) { +inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) { return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2)); } @@ -606,7 +606,7 @@ size_t string_left_spaces_len(char *str, size_t len) { return spaces; } -LIBPOSTAL_EXPORT char *string_trim(char *str) { +char *string_trim(char *str) { size_t len = strlen(str); size_t left_spaces = string_left_spaces_len(str, len); size_t right_spaces = string_right_spaces_len(str, len); @@ -630,14 +630,14 @@ char_array *char_array_from_string_no_copy(char *str, size_t n) { return array; } -LIBPOSTAL_EXPORT inline char *char_array_get_string(char_array *array) { +inline char *char_array_get_string(char_array *array) { if (array->n == 0 || array->a[array->n - 1] != '\0') { char_array_terminate(array); } return array->a; } -LIBPOSTAL_EXPORT inline char *char_array_to_string(char_array *array) { +inline char *char_array_to_string(char_array *array) { if (array->n == 0 || array->a[array->n - 1] != '\0') { char_array_terminate(array); } @@ -662,7 +662,7 @@ inline size_t char_array_len(char_array *array) { } } -LIBPOSTAL_EXPORT inline void char_array_append(char_array *array, char *str) { +inline void char_array_append(char_array *array, char *str) { while(*str) { char_array_push(array, *str++); } @@ -696,11 +696,11 @@ inline void char_array_append_reversed(char_array *array, char *str) { char_array_append_reversed_len(array, str, len); } -LIBPOSTAL_EXPORT inline void char_array_terminate(char_array *array) { +inline void char_array_terminate(char_array *array) { char_array_push(array, '\0'); } -LIBPOSTAL_EXPORT inline void char_array_cat(char_array *array, char *str) { +inline void char_array_cat(char_array *array, char *str) { char_array_strip_nul_byte(array); char_array_append(array, str); char_array_terminate(array); @@ -713,7 +713,7 @@ inline void char_array_cat_len(char_array *array, char *str, size_t len) { } -LIBPOSTAL_EXPORT inline void char_array_cat_reversed(char_array *array, char *str) { +inline void char_array_cat_reversed(char_array *array, char *str) { char_array_strip_nul_byte(array); char_array_append_reversed(array, str); char_array_terminate(array); @@ -764,7 +764,7 @@ void char_array_add_vjoined(char_array *array, char *separator, bool strip_separ } -LIBPOSTAL_EXPORT inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) { +inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) { va_list args; va_start(args, count); char_array_add_vjoined(array, separator, strip_separator, count, args); @@ -808,14 +808,14 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args) { } } -LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...) { +void char_array_cat_printf(char_array *array, char *format, ...) { va_list args; va_start(args, format); char_array_cat_vprintf(array, format, args); va_end(args); } -LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) { +cstring_array *cstring_array_new(void) { cstring_array *array = malloc(sizeof(cstring_array)); if (array == NULL) return NULL; @@ -834,7 +834,7 @@ LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void) { return array; } -LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self) { +void cstring_array_destroy(cstring_array *self) { if (self == NULL) return; if (self->indices) { uint32_array_destroy(self->indices); @@ -889,7 +889,7 @@ inline size_t cstring_array_used(cstring_array *self) { return self->str->n; } -LIBPOSTAL_EXPORT inline size_t cstring_array_num_strings(cstring_array *self) { +inline size_t cstring_array_num_strings(cstring_array *self) { if (self == NULL) return 0; return self->indices->n; } @@ -958,13 +958,13 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) { return (int32_t)self->indices->a[i]; } -LIBPOSTAL_EXPORT inline char *cstring_array_get_string(cstring_array *self, uint32_t i) { +inline char *cstring_array_get_string(cstring_array *self, uint32_t i) { int32_t data_index = cstring_array_get_offset(self, i); if (data_index < 0) return NULL; return self->str->a + data_index; } -LIBPOSTAL_EXPORT inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) { +inline int64_t cstring_array_token_length(cstring_array *self, uint32_t i) { if (INVALID_INDEX(i, self->indices->n)) { return -1; } @@ -1015,7 +1015,7 @@ cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *sep } -LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) { +cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count) { *count = 0; char *ptr = str; size_t len = strlen(str); @@ -1034,7 +1034,7 @@ LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char sepa } -LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self) { +char **cstring_array_to_strings(cstring_array *self) { char **strings = malloc(self->indices->n * sizeof(char *)); for (int i = 0; i < cstring_array_num_strings(self); i++) { @@ -1073,7 +1073,7 @@ string_tree_t *string_tree_new_size(size_t size) { #define DEFAULT_STRING_TREE_SIZE 8 -LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void) { +string_tree_t *string_tree_new(void) { return string_tree_new_size((size_t)DEFAULT_STRING_TREE_SIZE); } @@ -1085,12 +1085,12 @@ inline char *string_tree_get_alternative(string_tree_t *self, size_t token_index return cstring_array_get_string(self->strings, token_start + alternative); } -LIBPOSTAL_EXPORT inline void string_tree_finalize_token(string_tree_t *self) { +inline void string_tree_finalize_token(string_tree_t *self) { uint32_array_push(self->token_indices, (uint32_t)cstring_array_num_strings(self->strings)); } // terminated -LIBPOSTAL_EXPORT inline void string_tree_add_string(string_tree_t *self, char *str) { +inline void string_tree_add_string(string_tree_t *self, char *str) { cstring_array_add_string(self->strings, str); } @@ -1115,13 +1115,13 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) { return (uint32_t)cstring_array_num_strings(self->strings); } -LIBPOSTAL_EXPORT inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) { +inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) { if (i >= self->token_indices->n) return 0; uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i]; return n > 0 ? n : 1; } -LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) { +void string_tree_destroy(string_tree_t *self) { if (self == NULL) return; if (self->token_indices != NULL) { @@ -1135,7 +1135,7 @@ LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self) { free(self); } -LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { +string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t)); self->tree = tree; @@ -1166,7 +1166,7 @@ LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t return self; } -LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self) { +void string_tree_iterator_next(string_tree_iterator_t *self) { if (self->remaining > 0) { int i; for (i = self->num_tokens - 1; i >= 0; i--) { @@ -1195,11 +1195,11 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i) return cstring_array_get_string(self->tree->strings, base_index + offset); } -LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self) { +bool string_tree_iterator_done(string_tree_iterator_t *self) { return self->remaining == 0; } -LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self) { +void string_tree_iterator_destroy(string_tree_iterator_t *self) { if (self == NULL) return; if (self->path) { diff --git a/src/string_utils.h b/src/string_utils.h index 9b771b31..0e7dd235 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -16,7 +16,6 @@ Utilities for manipulating strings in C. #include "collections.h" #include "utf8proc/utf8proc.h" #include "vector.h" -#include "export.h" #define MAX_UTF8_CHAR_SIZE 4 @@ -60,16 +59,16 @@ char *string_replace_char(char *str, char c1, char c2); bool string_replace_with_array(char *str, char *replace, char *with, char_array *result); char *string_replace(char *str, char *replace, char *with); -LIBPOSTAL_EXPORT bool string_starts_with(const char *str, const char *start); +bool string_starts_with(const char *str, const char *start); bool string_ends_with(const char *str, const char *ending); -LIBPOSTAL_EXPORT bool string_equals(const char *s1, const char *s2); +bool string_equals(const char *s1, const char *s2); uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len); // UTF-8 string methods -LIBPOSTAL_EXPORT char *utf8_reversed_string(const char *s); // returns a copy, caller frees -LIBPOSTAL_EXPORT ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); +char *utf8_reversed_string(const char *s); // returns a copy, caller frees +ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); // Casing functions return a copy, caller frees char *utf8_lower_options(const char *s, utf8proc_option_t options); @@ -81,7 +80,7 @@ int utf8_compare(const char *str1, const char *str2); int utf8_compare_len(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix(const char *str1, const char *str2); size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len); -LIBPOSTAL_EXPORT size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); +size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len); bool utf8_is_hyphen(int32_t ch); @@ -100,7 +99,7 @@ ssize_t string_next_hyphen_index(char *str, size_t len); bool string_contains_hyphen(char *str); bool string_contains_hyphen_len(char *str, size_t len); -LIBPOSTAL_EXPORT char *string_trim(char *str); +char *string_trim(char *str); /* char_array is a dynamic character array defined in collections.h but has a few additional methods related to string manipulation. @@ -113,40 +112,40 @@ char_array *char_array_from_string(char *str); char_array *char_array_from_string_no_copy(char *str, size_t n); // Gets the underlying C string for a char_array -LIBPOSTAL_EXPORT char *char_array_get_string(char_array *array); +char *char_array_get_string(char_array *array); // Frees the char_array and returns a standard NUL-terminated string -LIBPOSTAL_EXPORT char *char_array_to_string(char_array *array); +char *char_array_to_string(char_array *array); // Can use strlen(array->a) but this is faster size_t char_array_len(char_array *array); // append_* methods do not NUL-terminate -LIBPOSTAL_EXPORT void char_array_append(char_array *array, char *str); +void char_array_append(char_array *array, char *str); void char_array_append_len(char_array *array, char *str, size_t len); void char_array_append_reversed(char_array *array, char *str); void char_array_append_reversed_len(char_array *array, char *str, size_t len); // add NUL terminator to a char_array void char_array_strip_nul_byte(char_array *array); -LIBPOSTAL_EXPORT void char_array_terminate(char_array *array); +void char_array_terminate(char_array *array); // add_* methods NUL-terminate without stripping NUL-byte void char_array_add(char_array *array, char *str); void char_array_add_len(char_array *array, char *str, size_t len); // Similar to strcat but with dynamic resizing, guaranteed NUL-terminated -LIBPOSTAL_EXPORT void char_array_cat(char_array *array, char *str); +void char_array_cat(char_array *array, char *str); void char_array_cat_len(char_array *array, char *str, size_t len); -LIBPOSTAL_EXPORT void char_array_cat_reversed(char_array *array, char *str); +void char_array_cat_reversed(char_array *array, char *str); void char_array_cat_reversed_len(char_array *array, char *str, size_t len); // Similar to cat methods but with printf args void char_array_cat_vprintf(char_array *array, char *format, va_list args); -LIBPOSTAL_EXPORT void char_array_cat_printf(char_array *array, char *format, ...); +void char_array_cat_printf(char_array *array, char *format, ...); // Mainly for paths or delimited strings void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args); -LIBPOSTAL_EXPORT void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); +void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); @@ -171,13 +170,13 @@ typedef struct { char_array *str; } cstring_array; -LIBPOSTAL_EXPORT cstring_array *cstring_array_new(void); +cstring_array *cstring_array_new(void); cstring_array *cstring_array_new_size(size_t size); size_t cstring_array_capacity(cstring_array *self); size_t cstring_array_used(cstring_array *self); -LIBPOSTAL_EXPORT size_t cstring_array_num_strings(cstring_array *self); +size_t cstring_array_num_strings(cstring_array *self); void cstring_array_resize(cstring_array *self, size_t size); void cstring_array_clear(cstring_array *self); @@ -185,7 +184,7 @@ cstring_array *cstring_array_from_char_array(char_array *str); cstring_array *cstring_array_from_strings(char **strings, size_t n); // Convert cstring_array to an array of n C strings and destroy the cstring_array -LIBPOSTAL_EXPORT char **cstring_array_to_strings(cstring_array *self); +char **cstring_array_to_strings(cstring_array *self); // Split on delimiter cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, size_t *count); @@ -193,7 +192,7 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa cstring_array *cstring_array_split_ignore_consecutive(char *str, const char *separator, size_t separator_len, size_t *count); // Split on delimiter by replacing (single character) separator with the NUL byte in the original string -LIBPOSTAL_EXPORT cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count); +cstring_array *cstring_array_split_no_copy(char *str, char separator, size_t *count); uint32_t cstring_array_start_token(cstring_array *self); uint32_t cstring_array_add_string(cstring_array *self, char *str); @@ -207,10 +206,10 @@ void cstring_array_cat_string_len(cstring_array *self, char *str, size_t len); void cstring_array_terminate(cstring_array *self); int32_t cstring_array_get_offset(cstring_array *self, uint32_t i); -LIBPOSTAL_EXPORT char *cstring_array_get_string(cstring_array *self, uint32_t i); -LIBPOSTAL_EXPORT int64_t cstring_array_token_length(cstring_array *self, uint32_t i); +char *cstring_array_get_string(cstring_array *self, uint32_t i); +int64_t cstring_array_token_length(cstring_array *self, uint32_t i); -LIBPOSTAL_EXPORT void cstring_array_destroy(cstring_array *self); +void cstring_array_destroy(cstring_array *self); #define cstring_array_foreach(array, i, s, code) { \ for (int __si = 0; __si < array->indices->n; __si++) { \ @@ -246,16 +245,16 @@ typedef struct string_tree { cstring_array *strings; } string_tree_t; -LIBPOSTAL_EXPORT string_tree_t *string_tree_new(void); +string_tree_t *string_tree_new(void); string_tree_t *string_tree_new_size(size_t size); // get char *string_tree_get_alternative(string_tree_t *self, size_t token_index, uint32_t alternative); // finalize -LIBPOSTAL_EXPORT void string_tree_finalize_token(string_tree_t *self); +void string_tree_finalize_token(string_tree_t *self); // terminated -LIBPOSTAL_EXPORT void string_tree_add_string(string_tree_t *self, char *str); +void string_tree_add_string(string_tree_t *self, char *str); void string_tree_add_string_len(string_tree_t *self, char *str, size_t len); // unterminated void string_tree_append_string(string_tree_t *self, char *str); @@ -264,9 +263,9 @@ void string_tree_append_string_len(string_tree_t *self, char *str, size_t len); uint32_t string_tree_num_tokens(string_tree_t *self); uint32_t string_tree_num_strings(string_tree_t *self); -LIBPOSTAL_EXPORT uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i); +uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i); -LIBPOSTAL_EXPORT void string_tree_destroy(string_tree_t *self); +void string_tree_destroy(string_tree_t *self); typedef struct string_tree_iterator { string_tree_t *tree; @@ -275,11 +274,11 @@ typedef struct string_tree_iterator { uint32_t remaining; } string_tree_iterator_t; -LIBPOSTAL_EXPORT string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree); -LIBPOSTAL_EXPORT void string_tree_iterator_next(string_tree_iterator_t *self); +string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree); +void string_tree_iterator_next(string_tree_iterator_t *self); char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i); -LIBPOSTAL_EXPORT bool string_tree_iterator_done(string_tree_iterator_t *self); -LIBPOSTAL_EXPORT void string_tree_iterator_destroy(string_tree_iterator_t *self); +bool string_tree_iterator_done(string_tree_iterator_t *self); +void string_tree_iterator_destroy(string_tree_iterator_t *self); #define string_tree_iterator_foreach_token(iter, s, code) { \ diff --git a/src/tokens.h b/src/tokens.h index 045902b3..6b314417 100644 --- a/src/tokens.h +++ b/src/tokens.h @@ -1,7 +1,7 @@ #ifndef TOKENS_H #define TOKENS_H - + #include #include @@ -35,6 +35,6 @@ char *tokenized_string_get_token(tokenized_string_t *self, uint32_t index); void tokenized_string_destroy(tokenized_string_t *self); - + #endif diff --git a/src/transliterate.c b/src/transliterate.c index e8bc9805..bd8cb003 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -666,7 +666,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca return char_array_to_string(ret); } -LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len) { +char *transliterate(char *trans_name, char *str, size_t len) { if (trans_name == NULL || str == NULL) return NULL; transliteration_table_t *trans_table = get_transliteration_table(); @@ -1978,7 +1978,7 @@ bool transliteration_module_init(void) { return trans_table != NULL; } -LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) { +bool transliteration_module_setup(char *filename) { if (trans_table == NULL) { return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename); } @@ -1987,7 +1987,7 @@ LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename) { } -LIBPOSTAL_EXPORT void transliteration_module_teardown(void) { +void transliteration_module_teardown(void) { transliteration_table_destroy(); trans_table = NULL; } diff --git a/src/transliterate.h b/src/transliterate.h index 1db351e6..ab559393 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -12,7 +12,6 @@ #include "trie.h" #include "trie_search.h" #include "unicode_scripts.h" -#include "export.h" #define LATIN_ASCII "latin-ascii" #define LATIN_ASCII_SIMPLE "latin-ascii-simple" @@ -152,7 +151,7 @@ void transliterator_destroy(transliterator_t *self); bool transliteration_table_add_transliterator(transliterator_t *trans); transliterator_t *get_transliterator(char *name); -LIBPOSTAL_EXPORT char *transliterate(char *trans_name, char *str, size_t len); +char *transliterate(char *trans_name, char *str, size_t len); bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index); transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language); @@ -172,7 +171,7 @@ bool transliteration_table_save(char *filename); // Module setup/teardown bool transliteration_module_init(void); -LIBPOSTAL_EXPORT bool transliteration_module_setup(char *filename); -LIBPOSTAL_EXPORT void transliteration_module_teardown(void); +bool transliteration_module_setup(char *filename); +void transliteration_module_teardown(void); #endif diff --git a/src/trie.c b/src/trie.c index 15db41e6..2e11ba6e 100644 --- a/src/trie.c +++ b/src/trie.c @@ -96,7 +96,7 @@ trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size) { return self; } -LIBPOSTAL_EXPORT trie_t *trie_new(void) { +trie_t *trie_new(void) { return trie_new_alphabet(DEFAULT_ALPHABET, sizeof(DEFAULT_ALPHABET)); } @@ -661,7 +661,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, ui } -LIBPOSTAL_EXPORT inline bool trie_add(trie_t *self, char *key, uint32_t data) { +inline bool trie_add(trie_t *self, char *key, uint32_t data) { size_t len = strlen(key); if (len == 0) return false; return trie_add_at_index(self, ROOT_NODE_ID, key, len + 1, data); @@ -754,7 +754,7 @@ inline bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data return true; } -LIBPOSTAL_EXPORT inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) { +inline bool trie_get_data(trie_t *self, char *key, uint32_t *data) { uint32_t node_id = trie_get(self, key); return trie_get_data_at_index(self, node_id, data); } @@ -899,7 +899,7 @@ inline uint32_t trie_num_keys(trie_t *self) { /* Destructor */ -LIBPOSTAL_EXPORT void trie_destroy(trie_t *self) { +void trie_destroy(trie_t *self) { if (!self) return; diff --git a/src/trie.h b/src/trie.h index be3d1c64..d2f8519e 100644 --- a/src/trie.h +++ b/src/trie.h @@ -33,7 +33,6 @@ #include "klib/kvec.h" #include "log/log.h" #include "string_utils.h" -#include "export.h" #define TRIE_SIGNATURE 0xABABABAB #define NULL_NODE_ID 0 @@ -80,7 +79,7 @@ typedef struct trie { } trie_t; trie_t *trie_new_alphabet(uint8_t *alphabet, uint32_t alphabet_size); -LIBPOSTAL_EXPORT trie_t *trie_new(void); +trie_t *trie_new(void); uint32_t trie_get_char_index(trie_t *self, unsigned char c); uint32_t trie_get_transition_index(trie_t *self, trie_node_t node, unsigned char c); @@ -98,7 +97,7 @@ trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node); bool trie_set_data_node(trie_t *self, uint32_t index, trie_data_node_t data_node); bool trie_get_data_at_index(trie_t *self, uint32_t index, uint32_t *data); -LIBPOSTAL_EXPORT bool trie_get_data(trie_t *self, char *key, uint32_t *data); +bool trie_get_data(trie_t *self, char *key, uint32_t *data); bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data); bool trie_set_data(trie_t *self, char *key, uint32_t data); @@ -114,7 +113,7 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data); bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, size_t len, uint32_t data); -LIBPOSTAL_EXPORT bool trie_add(trie_t *self, char *key, uint32_t data); +bool trie_add(trie_t *self, char *key, uint32_t data); bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data); bool trie_add_suffix(trie_t *self, char *key, uint32_t data); bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data); @@ -147,7 +146,7 @@ bool trie_save(trie_t *self, char *path); trie_t *trie_read(FILE *file); trie_t *trie_load(char *path); -LIBPOSTAL_EXPORT void trie_destroy(trie_t *self); +void trie_destroy(trie_t *self); diff --git a/src/trie_search.c b/src/trie_search.c index 233ab780..8518db89 100644 --- a/src/trie_search.c +++ b/src/trie_search.c @@ -465,7 +465,7 @@ inline bool trie_search_tokens_with_phrases(trie_t *self, char *str, token_array return trie_search_tokens_from_index(self, str, tokens, ROOT_NODE_ID, phrases); } -LIBPOSTAL_EXPORT inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) { +inline phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) { phrase_array *phrases = NULL; if (!trie_search_tokens_with_phrases(self, str, tokens, &phrases)) { return NULL; diff --git a/src/trie_search.h b/src/trie_search.h index 09cce48f..df1817e7 100644 --- a/src/trie_search.h +++ b/src/trie_search.h @@ -16,7 +16,6 @@ #include "tokens.h" #include "vector.h" #include "utf8proc/utf8proc.h" -#include "export.h" typedef struct phrase { uint32_t start; @@ -32,7 +31,7 @@ VECTOR_INIT(phrase_array, phrase_t) phrase_array *trie_search(trie_t *self, char *text); bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases); bool trie_search_with_phrases(trie_t *self, char *text, phrase_array **phrases); -LIBPOSTAL_EXPORT phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens); +phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens); bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens, uint32_t start_node_id, phrase_array **phrases); bool trie_search_tokens_with_phrases(trie_t *self, char *text, token_array *tokens, phrase_array **phrases); phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, uint32_t start_node_id); diff --git a/src/utf8proc/utf8proc.c b/src/utf8proc/utf8proc.c index c302b79e..34397d58 100644 --- a/src/utf8proc/utf8proc.c +++ b/src/utf8proc/utf8proc.c @@ -44,7 +44,7 @@ #include "utf8proc_data.c" -UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = { +const utf8proc_int8_t utf8proc_utf8class[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -87,11 +87,11 @@ UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = { be different, being based on ABI compatibility.): */ #define STRINGIZEx(x) #x #define STRINGIZE(x) STRINGIZEx(x) -UTF8PROC_DLLEXPORT const char *utf8proc_version(void) { +const char *utf8proc_version(void) { return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) ""; } -UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) { +const char *utf8proc_errmsg(utf8proc_ssize_t errcode) { switch (errcode) { case UTF8PROC_ERROR_NOMEM: return "Memory for processing UTF-8 data could not be allocated."; @@ -109,7 +109,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) { } #define utf_cont(ch) (((ch) & 0xc0) == 0x80) -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate( +utf8proc_ssize_t utf8proc_iterate( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst ) { utf8proc_uint32_t uc; @@ -157,11 +157,11 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate( return 4; } -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) { +utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) { return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000); } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) { +utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) { if (uc < 0x00) { return 0; } else if (uc < 0x80) { @@ -228,7 +228,7 @@ static const utf8proc_property_t *unsafe_get_property(utf8proc_int32_t uc) { ); } -UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) { +const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) { return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc); } @@ -259,18 +259,18 @@ static utf8proc_bool grapheme_break(int lbc, int tbc) { } /* return whether there is a grapheme break between codepoints c1 and c2 */ -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) { +utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) { return grapheme_break(utf8proc_get_property(c1)->boundclass, utf8proc_get_property(c2)->boundclass); } -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c) +utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c) { utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_mapping; return cl >= 0 ? cl : c; } -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c) +utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c) { utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_mapping; return cu >= 0 ? cu : c; @@ -278,15 +278,15 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c) /* return a character width analogous to wcwidth (except portable and hopefully less buggy than most system wcwidth functions). */ -UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) { +int utf8proc_charwidth(utf8proc_int32_t c) { return utf8proc_get_property(c)->charwidth; } -UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) { +utf8proc_category_t utf8proc_category(utf8proc_int32_t c) { return utf8proc_get_property(c)->category; } -UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) { +const char *utf8proc_category_string(utf8proc_int32_t c) { static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"}; return s[utf8proc_category(c)]; } @@ -295,7 +295,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) { return utf8proc_decompose_char((replacement_uc), dst, bufsize, \ options & ~UTF8PROC_LUMP, last_boundclass) -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { +utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { const utf8proc_property_t *property; utf8proc_propval_t category; utf8proc_int32_t hangul_sindex; @@ -399,7 +399,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, return 1; } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( +utf8proc_ssize_t utf8proc_decompose( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options ) { @@ -461,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( return wpos; } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { +utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored ASSERT: 'buffer' has one spare byte of free space at the end! */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { @@ -583,7 +583,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, } } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( +utf8proc_ssize_t utf8proc_map( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options ) { utf8proc_int32_t *buffer; @@ -612,28 +612,28 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( return result; } -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) { +utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_DECOMPOSE); return retval; } -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) { +utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE); return retval; } -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) { +utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT); return retval; } -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) { +utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT); diff --git a/src/utf8proc/utf8proc.h b/src/utf8proc/utf8proc.h index 00f10c80..6080b029 100644 --- a/src/utf8proc/utf8proc.h +++ b/src/utf8proc/utf8proc.h @@ -111,18 +111,6 @@ typedef bool utf8proc_bool; #endif #include -#ifdef _WIN32 -# ifdef UTF8PROC_EXPORTS -# define UTF8PROC_DLLEXPORT __declspec(dllexport) -# else -# define UTF8PROC_DLLEXPORT __declspec(dllimport) -# endif -#elif __GNUC__ >= 4 -# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default"))) -#else -# define UTF8PROC_DLLEXPORT -#endif - #ifdef __cplusplus extern "C" { #endif @@ -365,20 +353,20 @@ typedef enum { * Array containing the byte lengths of a UTF-8 encoded codepoint based * on the first byte. */ -UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256]; +extern const utf8proc_int8_t utf8proc_utf8class[256]; /** * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH * (http://semver.org format), possibly with a "-dev" suffix for * development versions. */ -UTF8PROC_DLLEXPORT const char *utf8proc_version(void); +const char *utf8proc_version(void); /** * Returns an informative error string for the given utf8proc error code * (e.g. the error codes returned by @ref utf8proc_map). */ -UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode); +const char *utf8proc_errmsg(utf8proc_ssize_t errcode); /** * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`. @@ -390,7 +378,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode); * In case of success, the number of bytes read is returned; otherwise, a * negative error code is returned. */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref); +utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref); /** * Check if a codepoint is valid (regardless of whether it has been @@ -398,7 +386,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str * * @return 1 if the given `codepoint` is valid and otherwise return 0. */ -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint); +utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint); /** * Encodes the codepoint as an UTF-8 string in the byte array pointed @@ -409,7 +397,7 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codep * * This function does not check whether `codepoint` is valid Unicode. */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst); +utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst); /** * Look up the properties for a given codepoint. @@ -423,7 +411,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepo * If the codepoint is unassigned or invalid, a pointer to a special struct is * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN). */ -UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint); +const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint); /** Decompose a codepoint into an array of codepoints. * @@ -452,7 +440,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int * required buffer size is returned, while the buffer will be overwritten with * undefined data. */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( +utf8proc_ssize_t utf8proc_decompose_char( utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass ); @@ -473,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( * required buffer size is returned, while the buffer will be overwritten with * undefined data. */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( +utf8proc_ssize_t utf8proc_decompose( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options ); @@ -503,13 +491,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( * entries of the array pointed to by `str` have to be in the * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); +utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); /** * Given a pair of consecutive codepoints, return whether a grapheme break is * permitted between them (as defined by the extended grapheme clusters in UAX#29). */ -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2); +utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2); /** @@ -517,14 +505,14 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepo * lower-case character, if any; otherwise (if there is no lower-case * variant, or if `c` is not a valid codepoint) return `c`. */ -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); +utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); /** * Given a codepoint `c`, return the codepoint of the corresponding * upper-case character, if any; otherwise (if there is no upper-case * variant, or if `c` is not a valid codepoint) return `c`. */ -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); +utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); /** * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, @@ -534,19 +522,19 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); * @note * If you want to check for particular types of non-printable characters, * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */ -UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint); +int utf8proc_charwidth(utf8proc_int32_t codepoint); /** * Return the Unicode category for the codepoint (one of the * @ref utf8proc_category_t constants.) */ -UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint); +utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint); /** * Return the two-letter (nul-terminated) Unicode category string for * the codepoint (e.g. `"Lu"` or `"Co"`). */ -UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint); +const char *utf8proc_category_string(utf8proc_int32_t codepoint); /** * Maps the given UTF-8 string pointed to by `str` to a new UTF-8 @@ -566,7 +554,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi * @note The memory of the new UTF-8 string will have been allocated * with `malloc`, and should therefore be deallocated with `free`. */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( +utf8proc_ssize_t utf8proc_map( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options ); @@ -579,13 +567,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( */ /** @{ */ /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str); +utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str); /** NFC normalization (@ref UTF8PROC_COMPOSE). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str); +utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str); /** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str); +utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str); /** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); +utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); /** @} */ #ifdef __cplusplus diff --git a/test/Makefile.am b/test/Makefile.am index b35f6110..f2e911f2 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -9,6 +9,6 @@ CFLAGS = $(CFLAGS_BASE) TESTS = test_libpostal noinst_PROGRAMS = test_libpostal -test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c -test_libpostal_LDADD = ../src/libpostal.la $(CBLAS_LIBS) +test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c ../src/strndup.c ../src/file_utils.c ../src/string_utils.c ../src/utf8proc/utf8proc.c ../src/trie.c ../src/trie_search.c ../src/transliterate.c ../src/numex.c ../src/features.c +test_libpostal_LDADD = ../src/libpostal.la ../src/libscanner.la $(CBLAS_LIBS) test_libpostal_CFLAGS = $(CFLAGS_O3) diff --git a/windows/src/Makefile.am b/windows/src/Makefile.am index 906211fa..1112f09a 100644 --- a/windows/src/Makefile.am +++ b/windows/src/Makefile.am @@ -9,12 +9,12 @@ DEFAULT_INCLUDES = -I.. -I/usr/local/include # Wonky but have to be able to override the user's optimization level to compile the scanner # as it takes an unreasonably long time to compile with the optimizer on. -CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS +CFLAGS = lib_LTLIBRARIES = libpostal.la libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) -libpostal_la_CFLAGS = $(CFLAGS_O2) +libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined dist_bin_SCRIPTS = libpostal_data @@ -24,18 +24,18 @@ dist_bin_SCRIPTS = libpostal_data # -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). noinst_LTLIBRARIES = libscanner.la libscanner_la_SOURCES = klib/drand48.c scanner.c -libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA) +libscanner_la_CFLAGS = $(CFLAGS_O0) -D LIBPOSTAL_EXPORTS $(CFLAGS_SCANNER_EXTRA) noinst_PROGRAMS = libpostal bench address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test -libpostal_SOURCES = main.c json_encode.c +libpostal_SOURCES = strndup.c main.c json_encode.c file_utils.c string_utils.c utf8proc/utf8proc.c libpostal_LDADD = libpostal.la libpostal_CFLAGS = $(CFLAGS_O3) bench_SOURCES = bench.c bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) bench_CFLAGS = $(CFLAGS_O3) -#address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c -#address_parser_LDADD = libscanner.la $(CBLAS_LIBS) +#address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c +#address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) #address_parser_CFLAGS = $(CFLAGS_O3) build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c @@ -49,7 +49,7 @@ address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_train_CFLAGS = $(CFLAGS_O3) address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c -address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) +address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) address_parser_test_CFLAGS = $(CFLAGS_O3) language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c From bb5535602ab0975a418dcccecce11551ef23daa2 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Sat, 25 Nov 2017 10:13:14 +0000 Subject: [PATCH 08/11] Adding libpostal.h to the AppVeyor package. --- .appveyor.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 7641ecaa..eac7a36c 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,9 +1,5 @@ version: 1.0.{build} -branches: - only: - - master - image: Visual Studio 2015 platform: x64 @@ -23,7 +19,7 @@ test_script: - '%APPVEYOR_BUILD_FOLDER%\test\test_libpostal.exe' after_build: - - 7z a libpostal.zip %APPVEYOR_BUILD_FOLDER%\libpostal.dll %APPVEYOR_BUILD_FOLDER%\libpostal.def %APPVEYOR_BUILD_FOLDER%\libpostal.exp %APPVEYOR_BUILD_FOLDER%\libpostal.lib + - 7z a libpostal.zip %APPVEYOR_BUILD_FOLDER%\libpostal.dll %APPVEYOR_BUILD_FOLDER%\libpostal.def %APPVEYOR_BUILD_FOLDER%\libpostal.exp %APPVEYOR_BUILD_FOLDER%\libpostal.lib %APPVEYOR_BUILD_FOLDER%\src\libpostal.h artifacts: - path: libpostal.zip From 69e0d5d963213fd930fc1449767ff515c1bae605 Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Mon, 27 Nov 2017 01:42:25 +0000 Subject: [PATCH 09/11] Updated linenoise to be MSys2/MinGW compatible. Updated address_parser app to use the defined libpostal api and not include internal components directly. Removed windows src Makefile as it is now the same as the standard one. --- README.md | 37 ++++++++++++++++++- src/Makefile.am | 4 +-- src/address_parser_cli.c | 37 +++++-------------- src/linenoise/linenoise.c | 69 +++++++++++++++++++++++------------ windows/src/Makefile.am | 76 --------------------------------------- 5 files changed, 94 insertions(+), 129 deletions(-) delete mode 100644 windows/src/Makefile.am diff --git a/README.md b/README.md index a4849ad5..cc2c5630 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # libpostal: international street address NLP -[![Build Status](https://travis-ci.org/openvenues/libpostal.svg?branch=master)](https://travis-ci.org/openvenues/libpostal) [![License](https://img.shields.io/github/license/openvenues/libpostal.svg)](https://github.com/openvenues/libpostal/blob/master/LICENSE) +[![Build Status](https://travis-ci.org/openvenues/libpostal.svg?branch=master)](https://travis-ci.org/openvenues/libpostal) +[![Build Status](https://ci.appveyor.com/api/projects/status/github/openvenues/libpostal?branch=master&svg=true)](https://ci.appveyor.com/project/albarrentine/libpostal/branch/master) +[![License](https://img.shields.io/github/license/openvenues/libpostal.svg)](https://github.com/openvenues/libpostal/blob/master/LICENSE) [![OpenCollective Sponsors](https://opencollective.com/libpostal/sponsors/badge.svg)](#sponsors) [![OpenCollective Backers](https://opencollective.com/libpostal/backers/badge.svg)](#backers) @@ -137,6 +139,39 @@ For example, if you write a program called app.c, you can compile it like this: gcc app.c `pkg-config --cflags --libs libpostal` ``` +**On Windows (MSys2/MinGW)** + +For Windows the build procedure currently requires MSys2 and MinGW. This can be downloaded from http://msys2.org. Please follow the instructions on the MSys2 website for installation. + +Please ensure Msys2 is up-to-date by running: +``` +pacman -Syu +``` + +Install the following prerequisites: +``` +pacman -S autoconf automake curl git make libtool gcc mingw-w64-x86_64-gcc +``` + +Then to build the C library: +``` +git clone https://github.com/openvenues/libpostal +cd libpostal +cp -rf windows/* ./ +./bootstrap.sh +./configure --datadir=[...some dir with a few GB of space...] +make +make install +``` +Notes: When setting the datadir, the `C:` drive would be entered as `/c`. The libpostal build script automatically add `libpostal` on the end of the path, so '/c' would become `C:\libpostal\` on Windows. + +The compiled .dll will be in the `src/.libs/` directory and should be called `libpostal-1.dll`. + +If you require a .lib import library to link this to your application. You can generate one using the Visual Studio `lib.exe` tool and the `libpostal.def` definition file: +``` +lib.exe /def:libpostal.def /out:libpostal.lib /machine:x64 +``` + Examples of parsing ------------------- diff --git a/src/Makefile.am b/src/Makefile.am index 6767219b..6a13fce6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -34,8 +34,8 @@ libpostal_CFLAGS = $(CFLAGS_O3) bench_SOURCES = bench.c bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) bench_CFLAGS = $(CFLAGS_O3) -address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c -address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) +address_parser_SOURCES = strndup.c address_parser_cli.c json_encode.c linenoise/linenoise.c string_utils.c utf8proc/utf8proc.c +address_parser_LDADD = libpostal.la $(CBLAS_LIBS) address_parser_CFLAGS = $(CFLAGS_O3) build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c diff --git a/src/address_parser_cli.c b/src/address_parser_cli.c index 71f1856c..a314f2f1 100644 --- a/src/address_parser_cli.c +++ b/src/address_parser_cli.c @@ -1,35 +1,15 @@ #include #include -#include "address_parser.h" -#include "averaged_perceptron_tagger.h" -#include "address_dictionary.h" -#include "collections.h" -#include "constants.h" -#include "file_utils.h" #include "json_encode.h" #include "libpostal.h" -#include "normalize.h" -#include "scanner.h" -#include "shuffle.h" -#include "tokens.h" #include "linenoise/linenoise.h" #include "log/log.h" - -bool load_address_parser_dependencies(void) { - if (!address_dictionary_module_setup(NULL)) { - log_error("Could not load address dictionaries\n"); - return false; - } - - log_info("address dictionary module loaded\n"); - - return true; -} +#include "strndup.h" int main(int argc, char **argv) { - char *address_parser_dir = LIBPOSTAL_ADDRESS_PARSER_DIR; + char *address_parser_dir = NULL; char *history_file = "address_parser.history"; if (argc > 1) { @@ -38,7 +18,7 @@ int main(int argc, char **argv) { printf("Loading models...\n"); - if (!libpostal_setup() || !address_parser_module_setup(address_parser_dir)) { + if (!libpostal_setup() || !libpostal_setup_parser_datadir(address_parser_dir)) { exit(EXIT_FAILURE); } @@ -54,8 +34,6 @@ int main(int argc, char **argv) { char *input = NULL; - address_parser_t *parser = get_address_parser(); - while((input = linenoise("> ")) != NULL) { if (input[0] != '\0') { @@ -63,7 +41,7 @@ int main(int argc, char **argv) { linenoiseHistorySave(history_file); /* Save the history on disk. */ } - if (strcmp(input, ".exit") == 0) { + if (strncmp(input, ".exit", 5) == 0) { printf("Fin!\n"); free(input); break; @@ -95,7 +73,7 @@ int main(int argc, char **argv) { cstring_array_destroy(command); goto next_input; - } else if (string_starts_with(input, ".print_features")) { + } /*else if (string_starts_with(input, ".print_features")) { size_t num_tokens = 0; cstring_array *command = cstring_array_split(input, " ", 1, &num_tokens); if (cstring_array_num_strings(command) > 1) { @@ -111,13 +89,16 @@ int main(int argc, char **argv) { cstring_array_destroy(command); goto next_input; - } else if (strlen(input) == 0) { + }*/ else if (strlen(input) == 0) { goto next_input; } libpostal_address_parser_response_t *parsed; libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options(); + if (country != NULL) options.country = country; + if (language != NULL) options.language = language; + if ((parsed = libpostal_parse_address(input, options))) { printf("\n"); printf("Result:\n\n"); diff --git a/src/linenoise/linenoise.c b/src/linenoise/linenoise.c index c10557d0..c0a06588 100644 --- a/src/linenoise/linenoise.c +++ b/src/linenoise/linenoise.c @@ -105,7 +105,6 @@ * */ -#include #include #include #include @@ -114,7 +113,12 @@ #include #include #include + +#ifndef _WIN32 +#include #include +#endif //_WIN32 + #include #include "linenoise.h" @@ -123,8 +127,10 @@ static char *unsupported_term[] = {"dumb","cons25","emacs",NULL}; static linenoiseCompletionCallback *completionCallback = NULL; +#ifndef _WIN32 static struct termios orig_termios; /* In order to restore at exit.*/ static int rawmode = 0; /* For atexit() function to check if restore is needed*/ +#endif //_WIN32 static int mlmode = 0; /* Multi line mode. Default is single line. */ static int atexit_registered = 0; /* Register atexit just 1 time. */ static int history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN; @@ -150,25 +156,25 @@ struct linenoiseState { }; enum KEY_ACTION{ - KEY_NULL = 0, /* NULL */ - CTRL_A = 1, /* Ctrl+a */ - CTRL_B = 2, /* Ctrl-b */ - CTRL_C = 3, /* Ctrl-c */ - CTRL_D = 4, /* Ctrl-d */ - CTRL_E = 5, /* Ctrl-e */ - CTRL_F = 6, /* Ctrl-f */ - CTRL_H = 8, /* Ctrl-h */ - TAB = 9, /* Tab */ - CTRL_K = 11, /* Ctrl+k */ - CTRL_L = 12, /* Ctrl+l */ - ENTER = 13, /* Enter */ - CTRL_N = 14, /* Ctrl-n */ - CTRL_P = 16, /* Ctrl-p */ - CTRL_T = 20, /* Ctrl-t */ - CTRL_U = 21, /* Ctrl+u */ - CTRL_W = 23, /* Ctrl+w */ - ESC = 27, /* Escape */ - BACKSPACE = 127 /* Backspace */ + KEY_NULL = 0, /* NULL */ + CTRL_A = 1, /* Ctrl+a */ + CTRL_B = 2, /* Ctrl-b */ + CTRL_C = 3, /* Ctrl-c */ + CTRL_D = 4, /* Ctrl-d */ + CTRL_E = 5, /* Ctrl-e */ + CTRL_F = 6, /* Ctrl-f */ + CTRL_H = 8, /* Ctrl-h */ + TAB = 9, /* Tab */ + CTRL_K = 11, /* Ctrl+k */ + CTRL_L = 12, /* Ctrl+l */ + ENTER = 13, /* Enter */ + CTRL_N = 14, /* Ctrl-n */ + CTRL_P = 16, /* Ctrl-p */ + CTRL_T = 20, /* Ctrl-t */ + CTRL_U = 21, /* Ctrl+u */ + CTRL_W = 23, /* Ctrl+w */ + ESC = 27, /* Escape */ + BACKSPACE = 127 /* Backspace */ }; static void linenoiseAtExit(void); @@ -207,7 +213,13 @@ static int isUnsupportedTerm(void) { char *term = getenv("TERM"); int j; - if (term == NULL) return 0; + if (term == NULL) { +#ifdef _WIN32 + return 1; +#else + return 0; +#endif // _WIN32 + } for (j = 0; unsupported_term[j]; j++) if (!strcasecmp(term,unsupported_term[j])) return 1; return 0; @@ -215,6 +227,7 @@ static int isUnsupportedTerm(void) { /* Raw mode: 1960 magic shit. */ static int enableRawMode(int fd) { +#ifndef _WIN32 struct termios raw; if (!isatty(STDIN_FILENO)) goto fatal; @@ -247,12 +260,17 @@ static int enableRawMode(int fd) { fatal: errno = ENOTTY; return -1; +#else + return 0; +#endif //_WIN32 } static void disableRawMode(int fd) { +#ifndef _WIN32 /* Don't even check the return value as it's too late. */ if (rawmode && tcsetattr(fd,TCSAFLUSH,&orig_termios) != -1) rawmode = 0; +#endif //_WIN32 } /* Use the ESC [6n escape sequence to query the horizontal cursor position @@ -283,9 +301,13 @@ static int getCursorPosition(int ifd, int ofd) { /* Try to get the number of columns in the current terminal, or assume 80 * if it fails. */ static int getColumns(int ifd, int ofd) { +#ifndef _WIN32 struct winsize ws; if (ioctl(1, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0) { +#else + if(1) { +#endif //_WIN32 /* ioctl() failed. Try to query the terminal itself. */ int start, cols; @@ -307,9 +329,12 @@ static int getColumns(int ifd, int ofd) { } } return cols; - } else { + } +#ifndef _WIN32 + else { return ws.ws_col; } +#endif //_WIN32 failed: return 80; diff --git a/windows/src/Makefile.am b/windows/src/Makefile.am deleted file mode 100644 index 1112f09a..00000000 --- a/windows/src/Makefile.am +++ /dev/null @@ -1,76 +0,0 @@ -# Inherited from autoconf / user-specified -CFLAGS_CONF = @CFLAGS@ -CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) -CFLAGS_O0 = $(CFLAGS_BASE) -O0 -CFLAGS_O1 = $(CFLAGS_BASE) -O1 -CFLAGS_O2 = $(CFLAGS_BASE) -O2 -CFLAGS_O3 = $(CFLAGS_BASE) -O3 -DEFAULT_INCLUDES = -I.. -I/usr/local/include - -# Wonky but have to be able to override the user's optimization level to compile the scanner -# as it takes an unreasonably long time to compile with the optimizer on. -CFLAGS = - -lib_LTLIBRARIES = libpostal.la -libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c -libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS) -libpostal_la_CFLAGS = $(CFLAGS_O2) -D LIBPOSTAL_EXPORTS -libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined - -dist_bin_SCRIPTS = libpostal_data - -# Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough -# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding -# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help). -noinst_LTLIBRARIES = libscanner.la -libscanner_la_SOURCES = klib/drand48.c scanner.c -libscanner_la_CFLAGS = $(CFLAGS_O0) -D LIBPOSTAL_EXPORTS $(CFLAGS_SCANNER_EXTRA) - -noinst_PROGRAMS = libpostal bench address_parser_train address_parser_test build_address_dictionary build_numex_table build_trans_table address_parser_train address_parser_test language_classifier_train language_classifier language_classifier_test - -libpostal_SOURCES = strndup.c main.c json_encode.c file_utils.c string_utils.c utf8proc/utf8proc.c -libpostal_LDADD = libpostal.la -libpostal_CFLAGS = $(CFLAGS_O3) -bench_SOURCES = bench.c -bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) -bench_CFLAGS = $(CFLAGS_O3) -#address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c -#address_parser_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS) -#address_parser_CFLAGS = $(CFLAGS_O3) - -build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c -build_address_dictionary_CFLAGS = $(CFLAGS_O3) -build_numex_table_SOURCES = strndup.c numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c -build_numex_table_CFLAGS = $(CFLAGS_O3) -build_trans_table_SOURCES = strndup.c transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c -build_trans_table_CFLAGS = $(CFLAGS_O3) -address_parser_train_SOURCES = strndup.c address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c -address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS) -address_parser_train_CFLAGS = $(CFLAGS_O3) - -address_parser_test_SOURCES = strndup.c address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c numex.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c -address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS) -address_parser_test_CFLAGS = $(CFLAGS_O3) - -language_classifier_train_SOURCES = strndup.c language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c -language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS) -language_classifier_train_CFLAGS = $(CFLAGS_O3) -language_classifier_SOURCES = strndup.c language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c -language_classifier_LDADD = libscanner.la $(CBLAS_LIBS) -language_classifier_CFLAGS = $(CFLAGS_O3) -language_classifier_test_SOURCES = strndup.c language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c numex.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c -language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS) -language_classifier_test_CFLAGS = $(CFLAGS_O3) - - -pkginclude_HEADERS = libpostal.h - -if DOWNLOAD_DATA -all-local: - ${srcdir}/libpostal_data download all $(datadir)/libpostal -endif - -lexer: scanner.re - re2c -F -s -b -8 -o scanner.c scanner.re - -.PHONY: lexer From 90908118269e3c5c39e707f7bc371aa2e6f669ea Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Mon, 27 Nov 2017 19:20:37 +0000 Subject: [PATCH 10/11] Modifed the libpostal API to add an extra function libpostal_parser_print_features to toggle debugging info. Updated address_parser app to use the new function. --- libpostal.def | 1 + src/address_parser.c | 7 +++++++ src/address_parser.h | 1 + src/address_parser_cli.c | 10 +++++----- src/libpostal.c | 4 ++++ src/libpostal.h | 2 ++ 6 files changed, 20 insertions(+), 5 deletions(-) diff --git a/libpostal.def b/libpostal.def index 5db887c2..7deba4f7 100644 --- a/libpostal.def +++ b/libpostal.def @@ -5,6 +5,7 @@ libpostal_expansion_array_destroy libpostal_address_parser_response_destroy libpostal_get_address_parser_default_options libpostal_parse_address +libpostal_parser_print_features libpostal_setup libpostal_setup_datadir libpostal_teardown diff --git a/src/address_parser.c b/src/address_parser.c index 7e6de097..613b619d 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -49,6 +49,13 @@ address_parser_t *get_address_parser(void) { return parser; } +bool address_parser_print_features(bool print_features) { + if (parser == NULL) return false; + + parser->options.print_features = print_features; + return true; +} + bool address_parser_save(address_parser_t *self, char *output_dir) { if (self == NULL || output_dir == NULL) return false; diff --git a/src/address_parser.h b/src/address_parser.h index 2518a9ef..4c5e699f 100644 --- a/src/address_parser.h +++ b/src/address_parser.h @@ -215,6 +215,7 @@ address_parser_t *address_parser_new_options(parser_options_t options); address_parser_t *get_address_parser(void); bool address_parser_load(char *dir); +bool address_parser_print_features(bool print_features); libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country); void address_parser_destroy(address_parser_t *self); diff --git a/src/address_parser_cli.c b/src/address_parser_cli.c index a314f2f1..9c50a8c9 100644 --- a/src/address_parser_cli.c +++ b/src/address_parser_cli.c @@ -73,23 +73,23 @@ int main(int argc, char **argv) { cstring_array_destroy(command); goto next_input; - } /*else if (string_starts_with(input, ".print_features")) { + } else if (string_starts_with(input, ".print_features")) { size_t num_tokens = 0; cstring_array *command = cstring_array_split(input, " ", 1, &num_tokens); if (cstring_array_num_strings(command) > 1) { char *flag = cstring_array_get_string(command, 1); if (string_compare_case_insensitive(flag, "off") == 0) { - parser->options.print_features = false; + libpostal_parser_print_features(false); } else if (string_compare_case_insensitive(flag, "on") == 0) { - parser->options.print_features = true; + libpostal_parser_print_features(true); } } else { - parser->options.print_features = true; + libpostal_parser_print_features(true); } cstring_array_destroy(command); goto next_input; - }*/ else if (strlen(input) == 0) { + } else if (strlen(input) == 0) { goto next_input; } diff --git a/src/libpostal.c b/src/libpostal.c index d226413e..152cf77b 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -1073,6 +1073,10 @@ libpostal_address_parser_response_t *libpostal_parse_address(char *address, libp return parsed; } +bool libpostal_parser_print_features(bool print_features) { + return address_parser_print_features(print_features); +} + bool libpostal_setup_datadir(char *datadir) { char *transliteration_path = NULL; char *numex_path = NULL; diff --git a/src/libpostal.h b/src/libpostal.h index 4e62f745..ce428e62 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -101,6 +101,8 @@ LIBPOSTAL_EXPORT libpostal_address_parser_options_t libpostal_get_address_parser LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options); +LIBPOSTAL_EXPORT bool libpostal_parser_print_features(bool print_features); + // Setup/teardown methods LIBPOSTAL_EXPORT bool libpostal_setup(void); From 19ae97d52792b56353c52deecab145aa5ccb71bc Mon Sep 17 00:00:00 2001 From: AeroXuk Date: Mon, 27 Nov 2017 23:40:46 +0000 Subject: [PATCH 11/11] Adding include config.h to strndup.c so that the function is not compiled and doesn't cause errors when the system has its own implementation. --- src/strndup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/strndup.c b/src/strndup.c index d02657d6..61f605b6 100644 --- a/src/strndup.c +++ b/src/strndup.c @@ -1,3 +1,4 @@ +#include #ifndef HAVE_STRNDUP #include