Use NEON on ARM hardware via sse2neon.h
The autoconf changes were adapted from: https://github.com/glennrp/libpng/blob/libpng16/configure.ac
This commit is contained in:
@@ -73,19 +73,126 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl
|
||||
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Checks for SSE2 build
|
||||
# Architecture-specific options
|
||||
# ------------------------------------------------------------------
|
||||
AC_ARG_ENABLE([sse2],
|
||||
AS_HELP_STRING(
|
||||
[--disable-sse2],
|
||||
[disable SSE2 optimization routines]
|
||||
)
|
||||
)
|
||||
|
||||
AS_IF([test "x$enable_sse2" != "xno"], [
|
||||
CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
|
||||
# allow enabling hardware optimization on any system:
|
||||
case "$host_cpu" in
|
||||
arm*|aarch64*)
|
||||
enable_arm_neon=yes
|
||||
enable_intel_sse=no
|
||||
AC_DEFINE([ARM_NEON], [1],
|
||||
[Enable ARM_NEON optimizations])
|
||||
;;
|
||||
i?86|x86_64)
|
||||
enable_intel_sse=yes
|
||||
enable_arm_neon=no
|
||||
AC_DEFINE([INTEL_SSE], [1],
|
||||
[Enable Intel SSE optimizations])
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_ARG_ENABLE([hardware-optimizations],
|
||||
AS_HELP_STRING([[[--disable-hardware-optimizations]]],
|
||||
[Disable hardware optimizations (Intel SSE2 / ARM NEON)]),
|
||||
[
|
||||
# disable hardware optimization on all systems:
|
||||
enable_arm_neon=no
|
||||
AC_DEFINE([ARM_NEON], [0],
|
||||
[Disable ARM_NEON optimizations])
|
||||
enable_intel_sse=no
|
||||
AC_DEFINE([INTEL_SSE], [0],
|
||||
[Disable INTEL_SSE optimizations])
|
||||
])
|
||||
|
||||
# INTEL
|
||||
# =====
|
||||
#
|
||||
# INTEL SSE (SIMD) support.
|
||||
|
||||
AC_ARG_ENABLE([intel-sse],
|
||||
AS_HELP_STRING([[[--enable-intel-sse]]],
|
||||
[Enable Intel SSE optimizations: =no/off, yes/on:]
|
||||
[no/off: disable the optimizations;]
|
||||
[yes/on: enable the optimizations.]
|
||||
[If not specified: determined by the compiler.]),
|
||||
[case "$enableval" in
|
||||
no|off)
|
||||
# disable the default enabling:
|
||||
AC_DEFINE([INTEL_SSE], [0],
|
||||
[Disable Intel SSE optimizations])
|
||||
# Prevent inclusion of the assembler files below:
|
||||
enable_intel_sse=no;;
|
||||
yes|on)
|
||||
enable_intel_sse=yes
|
||||
AC_DEFINE([INTEL_SSE], [1],
|
||||
[Enable Intel SSE optimizations]);;
|
||||
*)
|
||||
AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
|
||||
esac])
|
||||
|
||||
# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
|
||||
# or where Intel optimizations were explicitly requested (this allows a
|
||||
# fallback if a future host CPU does not match 'x86*')
|
||||
AM_CONDITIONAL([INTEL_SSE],
|
||||
[test "$enable_intel_sse" != 'no' &&
|
||||
case "$host_cpu" in
|
||||
i?86|x86_64) :;;
|
||||
*) test "$enable_intel_sse" != '';;
|
||||
esac])
|
||||
|
||||
# ARM
|
||||
# ===
|
||||
#
|
||||
# ARM NEON (SIMD) support.
|
||||
|
||||
AC_ARG_ENABLE([arm-neon],
|
||||
AS_HELP_STRING([[[--enable-arm-neon]]],
|
||||
[Enable ARM NEON optimizations: =no/off, check, api, yes/on:]
|
||||
[no/off: disable the optimizations; check: use internal checking code]
|
||||
[(deprecated and poorly supported); api: disable by default, enable by]
|
||||
[a call to png_set_option; yes/on: turn on unconditionally.]
|
||||
[If not specified: determined by the compiler.]),
|
||||
[case "$enableval" in
|
||||
no|off)
|
||||
# disable the default enabling on __ARM_NEON__ systems:
|
||||
AC_DEFINE([ARM_NEON], [0],
|
||||
[Disable ARM Neon optimizations])
|
||||
# Prevent inclusion of the assembler files below:
|
||||
enable_arm_neon=no;;
|
||||
yes|on)
|
||||
enable_arm_neon=yes
|
||||
AC_DEFINE([ARM_NEON], [1],
|
||||
[Enable ARM Neon optimizations]);;
|
||||
*)
|
||||
AC_MSG_ERROR([--enable-arm-neon=${enable_arm_neon}: invalid value])
|
||||
esac])
|
||||
|
||||
# Add ARM specific files to all builds where the host_cpu is arm ('arm*') or
|
||||
# where ARM optimizations were explicitly requested (this allows a fallback if a
|
||||
# future host CPU does not match 'arm*')
|
||||
|
||||
AM_CONDITIONAL([ARM_NEON],
|
||||
[test "$enable_arm_neon" != 'no' &&
|
||||
case "$host_cpu" in
|
||||
arm*|aarch64*) :;;
|
||||
*) test "$enable_arm_neon" != '';;
|
||||
esac])
|
||||
|
||||
SIMDFLAGS=""
|
||||
|
||||
AS_IF([test "x$enable_intel_sse" != "xno"], [
|
||||
SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE"
|
||||
])
|
||||
|
||||
AS_IF([test "x$enable_arm_neon" != "xno"], [
|
||||
SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON"
|
||||
])
|
||||
|
||||
CFLAGS="${SIMDFLAGS} ${CFLAGS}"
|
||||
|
||||
AC_SUBST([SIMDFLAGS], [$SIMDFLAGS])
|
||||
|
||||
AC_CHECK_HEADER(cblas.h, [AX_CBLAS])
|
||||
|
||||
AC_ARG_ENABLE([data-download],
|
||||
|
||||
Reference in New Issue
Block a user