Use NEON on ARM hardware via sse2neon.h

The autoconf changes were adapted from:
https://github.com/glennrp/libpng/blob/libpng16/configure.ac
This commit is contained in:
Dino Kovač
2022-04-16 22:48:59 +02:00
parent 893745f09b
commit 6064bc6c06
7 changed files with 9025 additions and 32 deletions

View File

@@ -40,7 +40,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) {
}
if (context->flag & CRF_CONTEXT_MARGINALS) {
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
context->exp_state = double_matrix_new_aligned(T, L, 16);
if (context->exp_state == NULL) goto exit_context_created;
double_matrix_zero(context->exp_state);
@@ -52,7 +52,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) {
context->mexp_state = double_matrix_new_zeros(T, L);
if (context->mexp_state == NULL) goto exit_context_created;
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
context->exp_state_trans = double_matrix_new_aligned(T, L * L, 16);
if (context->exp_state_trans == NULL) goto exit_context_created;
double_matrix_zero(context->exp_state_trans);
@@ -64,7 +64,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) {
context->mexp_state_trans = double_matrix_new_zeros(T, L * L);
if (context->mexp_state_trans == NULL) goto exit_context_created;
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
context->exp_trans = double_matrix_new_aligned(L, L, 16);
if (context->exp_trans == NULL) goto exit_context_created;
double_matrix_zero(context->exp_trans);
@@ -130,13 +130,13 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) {
if (self->flag & CRF_CONTEXT_MARGINALS &&
(
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
!double_matrix_resize_aligned(self->exp_state, T, L, 16) ||
#else
!double_matrix_resize(self->exp_state, T, L) ||
#endif
!double_matrix_resize(self->mexp_state, T, L) ||
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
!double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 16) ||
#else
!double_matrix_resize(self->exp_state_trans, T, L * L) ||
@@ -184,7 +184,7 @@ void crf_context_destroy(crf_context_t *self) {
}
if (self->exp_state != NULL) {
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
double_matrix_destroy_aligned(self->exp_state);
#else
double_matrix_destroy(self->exp_state);
@@ -200,7 +200,7 @@ void crf_context_destroy(crf_context_t *self) {
}
if (self->exp_state_trans != NULL) {
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
double_matrix_destroy_aligned(self->exp_state_trans);
#else
double_matrix_destroy(self->exp_state_trans);
@@ -216,7 +216,7 @@ void crf_context_destroy(crf_context_t *self) {
}
if (self->exp_trans != NULL) {
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
double_matrix_destroy_aligned(self->exp_trans);
#else
double_matrix_destroy(self->exp_trans);

8853
src/sse2neon.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -8,8 +8,10 @@
#define ks_lt_index(a, b) ((a).value < (b).value)
#ifdef USE_SSE
#if defined(INTEL_SSE)
#include <emmintrin.h>
#elif defined(ARM_NEON)
#include "sse2neon.h"
#endif
/*
@@ -338,7 +340,7 @@
#ifdef USE_SSE
#if defined(INTEL_SSE) || defined(ARM_NEON)
/*
From https://github.com/herumi/fmath/blob/master/fastexp.cpp
@@ -524,4 +526,4 @@ static inline void remez9_0_log2_sse(double *values, size_t num)
#endif
#endif