From 57eaa414ceadb48d5922099eeaa446b02894a2e4 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:49:02 -0400 Subject: [PATCH] [revert] reverting the commits from #578, leaving header file in repo for the moment --- README.md | 2 ++ configure.ac | 53 +++++++------------------------------------- src/crf_context.c | 16 ++++++------- src/vector_math.h | 6 ++--- test/Makefile.am | 2 +- windows/configure.ac | 53 +++++++------------------------------------- 6 files changed, 29 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index d8e2cb9c..3d641717 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,8 @@ brew install curl autoconf automake libtool pkg-config Then to install the C library: +If you're using an M1 Mac, add `--disable-sse2` to the `./configure` command. This will result in poorer performance but the build will succeed. + ``` git clone https://github.com/openvenues/libpostal cd libpostal diff --git a/configure.ac b/configure.ac index ed997e32..b7339215 100644 --- a/configure.ac +++ b/configure.ac @@ -84,57 +84,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) # ------------------------------------------------------------------ -# Architecture-specific options +# Checks for SSE2 build # ------------------------------------------------------------------ -# allow enabling hardware optimization on any system: -case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - enable_intel_sse=no - AC_DEFINE([ARM_NEON], [1], - [Enable ARM_NEON optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - enable_arm_neon=no - AC_DEFINE([INTEL_SSE], [1], - [Enable Intel SSE optimizations]) - ;; -esac - -AC_ARG_ENABLE([neon], - AS_HELP_STRING([[[--disable-neon]]], - [Disable ARM NEON hardware optimizations]), - [ - enable_arm_neon=no - AC_DEFINE([ARM_NEON], [0], - [Disable ARM_NEON optimizations]) - ]) - AC_ARG_ENABLE([sse2], - AS_HELP_STRING([[[--disable-sse2]]], - [Disable Intel SSE2 hardware optimizations]), - [ - enable_intel_sse=no - AC_DEFINE([INTEL_SSE], [0], - [Disable INTEL_SSE optimizations]) - ]) + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) -SIMDFLAGS="" - -AS_IF([test "x$enable_intel_sse" != "xno"], [ - SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE" +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) -AS_IF([test "x$enable_arm_neon" != "xno"], [ - SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON" -]) - -CFLAGS="${SIMDFLAGS} ${CFLAGS}" - -AC_SUBST([SIMDFLAGS], [$SIMDFLAGS]) - AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) AC_ARG_ENABLE([data-download], diff --git a/src/crf_context.c b/src/crf_context.c index e8635e28..8e1a759e 100644 --- a/src/crf_context.c +++ b/src/crf_context.c @@ -40,7 +40,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { } if (context->flag & CRF_CONTEXT_MARGINALS) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_state = double_matrix_new_aligned(T, L, 32); if (context->exp_state == NULL) goto exit_context_created; double_matrix_zero(context->exp_state); @@ -52,7 +52,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { context->mexp_state = double_matrix_new_zeros(T, L); if (context->mexp_state == NULL) goto exit_context_created; -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_state_trans = double_matrix_new_aligned(T, L * L, 32); if (context->exp_state_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_state_trans); @@ -64,7 +64,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { context->mexp_state_trans = double_matrix_new_zeros(T, L * L); if (context->mexp_state_trans == NULL) goto exit_context_created; -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_trans = double_matrix_new_aligned(L, L, 32); if (context->exp_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_trans); @@ -130,13 +130,13 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) { if (self->flag & CRF_CONTEXT_MARGINALS && ( -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) !double_matrix_resize_aligned(self->exp_state, T, L, 32) || #else !double_matrix_resize(self->exp_state, T, L) || #endif !double_matrix_resize(self->mexp_state, T, L) || -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 32) || #else !double_matrix_resize(self->exp_state_trans, T, L * L) || @@ -184,7 +184,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_state != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_state); #else double_matrix_destroy(self->exp_state); @@ -200,7 +200,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_state_trans != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_state_trans); #else double_matrix_destroy(self->exp_state_trans); @@ -216,7 +216,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_trans != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_trans); #else double_matrix_destroy(self->exp_trans); diff --git a/src/vector_math.h b/src/vector_math.h index 7dbdb049..eff90466 100644 --- a/src/vector_math.h +++ b/src/vector_math.h @@ -8,10 +8,8 @@ #define ks_lt_index(a, b) ((a).value < (b).value) -#if defined(INTEL_SSE) +#if defined(USE_SSE) #include -#elif defined(ARM_NEON) -#include "sse2neon.h" #endif /* @@ -340,7 +338,7 @@ -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) /* From https://github.com/herumi/fmath/blob/master/fastexp.cpp diff --git a/test/Makefile.am b/test/Makefile.am index 5289e3c2..f2e911f2 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -5,7 +5,7 @@ CFLAGS_O2 = $(CFLAGS_BASE) -O2 CFLAGS_O3 = $(CFLAGS_BASE) -O3 DEFAULT_INCLUDES = -I.. -I/usr/local/include -CFLAGS = $(SIMDFLAGS) $(CFLAGS_BASE) +CFLAGS = $(CFLAGS_BASE) TESTS = test_libpostal noinst_PROGRAMS = test_libpostal diff --git a/windows/configure.ac b/windows/configure.ac index d19cd967..24e73fec 100644 --- a/windows/configure.ac +++ b/windows/configure.ac @@ -73,57 +73,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) # ------------------------------------------------------------------ -# Architecture-specific options +# Checks for SSE2 build # ------------------------------------------------------------------ -# allow enabling hardware optimization on any system: -case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - enable_intel_sse=no - AC_DEFINE([ARM_NEON], [1], - [Enable ARM_NEON optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - enable_arm_neon=no - AC_DEFINE([INTEL_SSE], [1], - [Enable Intel SSE optimizations]) - ;; -esac - -AC_ARG_ENABLE([neon], - AS_HELP_STRING([[[--disable-neon]]], - [Disable ARM NEON hardware optimizations]), - [ - enable_arm_neon=no - AC_DEFINE([ARM_NEON], [0], - [Disable ARM_NEON optimizations]) - ]) - AC_ARG_ENABLE([sse2], - AS_HELP_STRING([[[--disable-sse2]]], - [Disable Intel SSE2 hardware optimizations]), - [ - enable_intel_sse=no - AC_DEFINE([INTEL_SSE], [0], - [Disable INTEL_SSE optimizations]) - ]) + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) -SIMDFLAGS="" - -AS_IF([test "x$enable_intel_sse" != "xno"], [ - SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE" +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) -AS_IF([test "x$enable_arm_neon" != "xno"], [ - SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON" -]) - -CFLAGS="${SIMDFLAGS} ${CFLAGS}" - -AC_SUBST([SIMDFLAGS], [$SIMDFLAGS]) - AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) AC_ARG_ENABLE([data-download],