Merging changes from AeroXuk/libpostal_windows.

This commit is contained in:
AeroXuk
2017-11-19 12:44:38 +00:00
parent 7d6e648fc3
commit 2d3b420d35
17 changed files with 398 additions and 30 deletions

105
windows/configure.ac Normal file
View File

@@ -0,0 +1,105 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
m4_define(LIBPOSTAL_MAJOR_VERSION, [1])
m4_define(LIBPOSTAL_MINOR_VERSION, [0])
m4_define(LIBPOSTAL_PATCH_VERSION, [0])
AC_INIT([libpostal], LIBPOSTAL_MAJOR_VERSION.LIBPOSTAL_MINOR_VERSION.LIBPOSTAL_PATCH_VERSION)
AC_CONFIG_MACRO_DIR([m4])
AM_INIT_AUTOMAKE([foreign subdir-objects])
AC_CONFIG_SRCDIR([src])
LT_INIT([win32-dll])
AC_CONFIG_HEADERS([config.h])
# Checks for programs.
AC_PROG_CC_C99
AC_PROG_INSTALL
LDFLAGS="$LDFLAGS -L/usr/local/lib"
# Checks for libraries.
AC_SEARCH_LIBS([log],
[m],,[AC_MSG_ERROR([Could not find math library])])
# Checks for header files.
AC_HEADER_STDC
AC_HEADER_TIME
AC_HEADER_DIRENT
AC_HEADER_STDBOOL
AC_CHECK_HEADERS([fcntl.h float.h inttypes.h limits.h locale.h malloc.h memory.h stddef.h stdint.h stdlib.h string.h unistd.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_C_INLINE
AC_TYPE_INT16_T
AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T
AC_TYPE_OFF_T
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
AC_CHECK_TYPES([ptrdiff_t])
# Checks for library functions.
AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset regcomp setlocale sqrt strdup strndup])
AC_CONFIG_FILES([Makefile
libpostal.pc
src/Makefile
test/Makefile])
AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes])
AC_CHECK_PROG([FOUND_GSHUF], [gshuf], [yes])
AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])])
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
# ------------------------------------------------------------------
# Checks for SSE2 build
# ------------------------------------------------------------------
AC_ARG_ENABLE([sse2],
AS_HELP_STRING(
[--disable-sse2],
[disable SSE2 optimization routines]
)
)
AS_IF([test "x$enable_sse2" != "xno"], [
CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
])
AC_CHECK_HEADER(cblas.h, [AX_CBLAS])
AC_ARG_ENABLE([data-download],
[ --disable-data-download Disable downloading data],
[case "${enableval}" in
yes) DOWNLOAD_DATA=true ;;
no) DOWNLOAD_DATA=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;;
esac], [DOWNLOAD_DATA=true])
AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"])
AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])],
[
if test "x$withval" = "xno"; then
CFLAGS_SCANNER_EXTRA=""
else
CFLAGS_SCANNER_EXTRA="$withval"
fi
],
[ CFLAGS_SCANNER_EXTRA="" ]
)
AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA])
AC_SUBST(CFLAGS_SCANNER_EXTRA)
AC_SUBST(LIBPOSTAL_SO_VERSION, LIBPOSTAL_MAJOR_VERSION:LIBPOSTAL_MINOR_VERSION:LIBPOSTAL_PATCH_VERSION)
AC_OUTPUT

45
windows/src/Makefile.am Normal file
View File

@@ -0,0 +1,45 @@
# this version of the makefile skips building the programs. It only builds the libraries and downloads data so you can use the API.
# Inherited from autoconf / user-specified
CFLAGS_CONF = @CFLAGS@
CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF)
CFLAGS_O0 = $(CFLAGS_BASE) -O0
CFLAGS_O1 = $(CFLAGS_BASE) -O1
CFLAGS_O2 = $(CFLAGS_BASE) -O2
CFLAGS_O3 = $(CFLAGS_BASE) -O3
DEFAULT_INCLUDES = -I.. -I/usr/local/include
# Wonky but have to be able to override the user's optimization level to compile the scanner
# as it takes an unreasonably long time to compile with the optimizer on.
#EDIT - add UTF8PROC_EXPORTS so builds on windows
CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS
lib_LTLIBRARIES = libpostal.la
libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
libpostal_la_CFLAGS = $(CFLAGS_O2)
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined
dist_bin_SCRIPTS = libpostal_data
# Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough
# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
noinst_LTLIBRARIES = libscanner.la
libscanner_la_SOURCES = klib/drand48.c scanner.c
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA)
# program building skipped here
pkginclude_HEADERS = libpostal.h
if DOWNLOAD_DATA
all-local:
${srcdir}/libpostal_data download all $(datadir)/libpostal
endif
lexer: scanner.re
re2c -F -s -b -8 -o scanner.c scanner.re
.PHONY: lexer