From 3137ef5c6a88200afa97739ef7a6a0f1ab12623d Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 6 Aug 2016 00:43:24 -0400 Subject: [PATCH] [build] configure/Makefile changes to use SIMD exp and BLAS when available --- configure.ac | 18 ++++ m4/ax_blas.m4 | 238 ++++++++++++++++++++++++++++++++++++++++++++++++ src/Makefile.am | 36 ++++---- 3 files changed, 276 insertions(+), 16 deletions(-) create mode 100644 m4/ax_blas.m4 diff --git a/configure.ac b/configure.ac index ae44cbfd..de9fd8a5 100644 --- a/configure.ac +++ b/configure.ac @@ -3,6 +3,8 @@ AC_INIT([libpostal], [0.3]) +AC_CONFIG_MACRO_DIRS([m4]) + AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src]) LT_INIT([shared]) @@ -60,4 +62,20 @@ AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes]) AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])]) +# ------------------------------------------------------------------ +# Checks for SSE2 build +# ------------------------------------------------------------------ +AC_ARG_ENABLE([sse2], + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) + +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" +]) + +AX_BLAS() + AC_OUTPUT diff --git a/m4/ax_blas.m4 b/m4/ax_blas.m4 new file mode 100644 index 00000000..0cd94c1b --- /dev/null +++ b/m4/ax_blas.m4 @@ -0,0 +1,238 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_blas.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BLAS([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro looks for a library that implements the BLAS linear-algebra +# interface (see http://www.netlib.org/blas/). On success, it sets the +# BLAS_LIBS output variable to hold the requisite library linkages. +# +# To link with BLAS, you should link with: +# +# $BLAS_LIBS $LIBS $FLIBS +# +# in that order. FLIBS is the output variable of the +# AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is +# sometimes necessary in order to link with F77 libraries. Users will also +# need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same +# reason. +# +# Many libraries are searched for, from ATLAS to CXML to ESSL. The user +# may also use --with-blas= in order to use some specific BLAS +# library . In order to link successfully, however, be aware that you +# will probably need to use the same Fortran compiler (which can be set +# via the F77 env. var.) as was used to compile the BLAS library. +# +# ACTION-IF-FOUND is a list of shell commands to run if a BLAS library is +# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is +# not found. If ACTION-IF-FOUND is not specified, the default action will +# define HAVE_BLAS. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 14 + +AU_ALIAS([ACX_BLAS], [AX_BLAS]) +AC_DEFUN([AX_BLAS], [ +AC_PREREQ(2.50) +AC_REQUIRE([AC_F77_LIBRARY_LDFLAGS]) +AC_REQUIRE([AC_CANONICAL_HOST]) +ax_blas_ok=no + +AC_ARG_WITH(blas, + [AS_HELP_STRING([--with-blas=], [use BLAS library ])]) +case $with_blas in + yes | "") ;; + no) ax_blas_ok=disable ;; + -* | */* | *.a | *.so | *.so.* | *.o) BLAS_LIBS="$with_blas" ;; + *) BLAS_LIBS="-l$with_blas" ;; +esac + +# Get fortran linker names of BLAS functions to check for. +AC_F77_FUNC(sgemm) +AC_F77_FUNC(dgemm) + +ax_blas_save_LIBS="$LIBS" +LIBS="$LIBS $FLIBS" + +# First, check BLAS_LIBS environment variable +if test $ax_blas_ok = no; then +if test "x$BLAS_LIBS" != x; then + save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS" + AC_MSG_CHECKING([for $sgemm in $BLAS_LIBS]) + AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes], [BLAS_LIBS=""]) + AC_MSG_RESULT($ax_blas_ok) + LIBS="$save_LIBS" +fi +fi + +# BLAS linked to by default? (happens on some supercomputers) +if test $ax_blas_ok = no; then + save_LIBS="$LIBS"; LIBS="$LIBS" + AC_MSG_CHECKING([if $sgemm is being linked in already]) + AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes]) + AC_MSG_RESULT($ax_blas_ok) + LIBS="$save_LIBS" +fi + +# BLAS in OpenBLAS library? (http://xianyi.github.com/OpenBLAS/) +if test $ax_blas_ok = no; then + AC_CHECK_LIB(openblas, $sgemm, [ax_blas_ok=yes + BLAS_LIBS="-lopenblas"]) +fi + +# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) +if test $ax_blas_ok = no; then + AC_CHECK_LIB(atlas, ATL_xerbla, + [AC_CHECK_LIB(f77blas, $sgemm, + [AC_CHECK_LIB(cblas, cblas_dgemm, + [ax_blas_ok=yes + BLAS_LIBS="-lcblas -lf77blas -latlas"], + [], [-lf77blas -latlas])], + [], [-latlas])]) +fi + +# BLAS in PhiPACK libraries? (requires generic BLAS lib, too) +if test $ax_blas_ok = no; then + AC_CHECK_LIB(blas, $sgemm, + [AC_CHECK_LIB(dgemm, $dgemm, + [AC_CHECK_LIB(sgemm, $sgemm, + [ax_blas_ok=yes; BLAS_LIBS="-lsgemm -ldgemm -lblas"], + [], [-lblas])], + [], [-lblas])]) +fi + +# BLAS in Intel MKL library? +if test $ax_blas_ok = no; then + # MKL for gfortran + if test x"$ac_cv_fc_compiler_gnu" = xyes; then + # 64 bit + if test $host_cpu = x86_64; then + AC_CHECK_LIB(mkl_gf_lp64, $sgemm, + [ax_blas_ok=yes;BLAS_LIBS="-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread"],, + [-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread]) + # 32 bit + elif test $host_cpu = i686; then + AC_CHECK_LIB(mkl_gf, $sgemm, + [ax_blas_ok=yes;BLAS_LIBS="-lmkl_gf -lmkl_sequential -lmkl_core -lpthread"],, + [-lmkl_gf -lmkl_sequential -lmkl_core -lpthread]) + fi + # MKL for other compilers (Intel, PGI, ...?) + else + # 64-bit + if test $host_cpu = x86_64; then + AC_CHECK_LIB(mkl_intel_lp64, $sgemm, + [ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread"],, + [-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread]) + # 32-bit + elif test $host_cpu = i686; then + AC_CHECK_LIB(mkl_intel, $sgemm, + [ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel -lmkl_sequential -lmkl_core -lpthread"],, + [-lmkl_intel -lmkl_sequential -lmkl_core -lpthread]) + fi + fi +fi +# Old versions of MKL +if test $ax_blas_ok = no; then + AC_CHECK_LIB(mkl, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl -lguide -lpthread"],,[-lguide -lpthread]) +fi + +# BLAS in Apple vecLib library? +if test $ax_blas_ok = no; then + save_LIBS="$LIBS"; LIBS="-framework vecLib $LIBS" + AC_MSG_CHECKING([for $sgemm in -framework vecLib]) + AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes;BLAS_LIBS="-framework vecLib"]) + AC_MSG_RESULT($ax_blas_ok) + LIBS="$save_LIBS" +fi + +# BLAS in Alpha CXML library? +if test $ax_blas_ok = no; then + AC_CHECK_LIB(cxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lcxml"]) +fi + +# BLAS in Alpha DXML library? (now called CXML, see above) +if test $ax_blas_ok = no; then + AC_CHECK_LIB(dxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-ldxml"]) +fi + +# BLAS in Sun Performance library? +if test $ax_blas_ok = no; then + if test "x$GCC" != xyes; then # only works with Sun CC + AC_CHECK_LIB(sunmath, acosp, + [AC_CHECK_LIB(sunperf, $sgemm, + [BLAS_LIBS="-xlic_lib=sunperf -lsunmath" + ax_blas_ok=yes],[],[-lsunmath])]) + fi +fi + +# BLAS in SCSL library? (SGI/Cray Scientific Library) +if test $ax_blas_ok = no; then + AC_CHECK_LIB(scs, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lscs"]) +fi + +# BLAS in SGIMATH library? +if test $ax_blas_ok = no; then + AC_CHECK_LIB(complib.sgimath, $sgemm, + [ax_blas_ok=yes; BLAS_LIBS="-lcomplib.sgimath"]) +fi + +# BLAS in IBM ESSL library? (requires generic BLAS lib, too) +if test $ax_blas_ok = no; then + AC_CHECK_LIB(blas, $sgemm, + [AC_CHECK_LIB(essl, $sgemm, + [ax_blas_ok=yes; BLAS_LIBS="-lessl -lblas"], + [], [-lblas $FLIBS])]) +fi + +# Generic BLAS library? +if test $ax_blas_ok = no; then + AC_CHECK_LIB(blas, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lblas"]) +fi + +AC_SUBST(BLAS_LIBS) + +LIBS="$ax_blas_save_LIBS" + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_blas_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1]) + : +else + ax_blas_ok=no + $2 +fi +])dnl AX_BLAS diff --git a/src/Makefile.am b/src/Makefile.am index 6268c87b..c7d9e9a0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,17 +1,21 @@ SUBDIRS = sparkey -CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' +# Inherited from autoconf / user-specified +CFLAGS_CONF = @CFLAGS@ +CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wdeclaration-after-statement -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) CFLAGS_O0 = $(CFLAGS_BASE) -O0 CFLAGS_O1 = $(CFLAGS_BASE) -O1 CFLAGS_O2 = $(CFLAGS_BASE) -O2 CFLAGS_O3 = $(CFLAGS_BASE) -O3 DEFAULT_INCLUDES = -I.. -I/usr/local/include -CFLAGS = $(CFLAGS_BASE) +# Wonky but have to be able to override the user's optimization level to compile the scanner +# as it takes an unreasonably long time to compile with the optimizer on. +CFLAGS = lib_LTLIBRARIES = libpostal.la -libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c geodb.c geo_disambiguation.c normalize.c bloom.c features.c geonames.c geohash/geohash.c unicode_scripts.c msgpack_utils.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c matrix.c minibatch.c float_utils.c -libpostal_la_LIBADD = libscanner.la sparkey/libsparkey.la +libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c geodb.c geo_disambiguation.c normalize.c bloom.c features.c geonames.c geohash/geohash.c unicode_scripts.c msgpack_utils.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c +libpostal_la_LIBADD = libscanner.la sparkey/libsparkey.la $(BLAS_LIBS) libpostal_la_CFLAGS = $(CFLAGS_O2) dist_bin_SCRIPTS = libpostal_data @@ -26,7 +30,7 @@ libpostal_SOURCES = main.c json_encode.c libpostal_LDADD = libpostal.la libpostal_CFLAGS = $(CFLAGS_O3) bench_SOURCES = bench.c -bench_LDADD = libpostal.la libscanner.la +bench_LDADD = libpostal.la libscanner.la $(BLAS_LIBS) bench_CFLAGS = $(CFLAGS_O3) build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_CFLAGS = $(CFLAGS_O3) @@ -37,23 +41,23 @@ build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_ut build_numex_table_CFLAGS = $(CFLAGS_O3) build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c build_trans_table_CFLAGS = $(CFLAGS_O3) -address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c matrix.c float_utils.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c address_dictionary.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c msgpack_utils.c file_utils.c shuffle.c utf8proc/utf8proc.c cmp/cmp.c -address_parser_train_LDADD = sparkey/libsparkey.la libscanner.la +address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c float_utils.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c address_dictionary.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c msgpack_utils.c file_utils.c shuffle.c utf8proc/utf8proc.c cmp/cmp.c +address_parser_train_LDADD = sparkey/libsparkey.la libscanner.la $(BLAS_LIBS) address_parser_train_CFLAGS = $(CFLAGS_O3) -address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c matrix.c float_utils.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c address_dictionary.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c msgpack_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c -address_parser_test_LDADD = sparkey/libsparkey.la libscanner.la +address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c float_utils.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c address_dictionary.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c msgpack_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c +address_parser_test_LDADD = sparkey/libsparkey.la libscanner.la $(BLAS_LIBS) address_parser_test_CFLAGS = $(CFLAGS_O3) address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c -address_parser_LDADD = sparkey/libsparkey.la libscanner.la libpostal.la +address_parser_LDADD = libpostal.la address_parser_CFLAGS = $(CFLAGS_O3) -language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c matrix.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c -language_classifier_train_LDADD = libscanner.la +language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c +language_classifier_train_LDADD = libscanner.la $(BLAS_LIBS) language_classifier_train_CFLAGS = $(CFLAGS_O3) -language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c matrix.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c -language_classifier_LDADD = libscanner.la +language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_LDADD = libscanner.la $(BLAS_LIBS) language_classifier_CFLAGS = $(CFLAGS_O3) -language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c matrix.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c -language_classifier_test_LDADD = libscanner.la +language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c +language_classifier_test_LDADD = libscanner.la $(BLAS_LIBS) language_classifier_test_CFLAGS = $(CFLAGS_O3)