Merge branch 'openvenues:master' into master
This commit is contained in:
36
.github/workflows/test.yml
vendored
Normal file
36
.github/workflows/test.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build_and_test:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install Dependencies Linux
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install curl autoconf automake libtool pkg-config
|
||||
- name: Install Dependencies MacOS
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
brew update
|
||||
brew install curl autoconf automake libtool pkg-config
|
||||
- name: Build
|
||||
env:
|
||||
LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data
|
||||
run: |
|
||||
./bootstrap.sh
|
||||
./configure --datadir=$LIBPOSTAL_DATA_DIR
|
||||
make
|
||||
- name: Test
|
||||
run: make check
|
||||
83
.travis.yml
83
.travis.yml
@@ -1,83 +0,0 @@
|
||||
language: c
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
env:
|
||||
global:
|
||||
- secure: "bHrAu46oecEj3gjamT+XWXtf2J0ZJCFa8tUdgM4evscaJiiwv1TtsGXyhIj/ai7DlRIPVJUtBUy6uoGGjr6GT43zTrzSxYAOMdVXZYsnTDcdL1/0dbwcIK6/u0EI377s1buGIxG1fHveWKXuXwJWDAw4KS+5HU88a42+zMbhKe4="
|
||||
- secure: "SkvNYucKVns9qDjOEW2WIhDlOMKBOwhzVcwY++HWTRtn04ErrqR4k01Mmho0jGBQD9JrPLhDgnX1BNy5s+Kmq/bxn9OZm7K1z24qBKb0mBBiNEnf2jvT0AvF5xxM+cJf4KKNL+CC0MwNf5y7HVPq1xibOV4/CNIrc1ZZc9aqdkE="
|
||||
- secure: "am/rRca5akv7gSSMeNQfHnWiTHhk8fQhOZvZ0Ut+PezkQlLgKp7bzmMFkkuQ4L5hpJU40kFzuWmIPgO33dacgq69Vx/Xct1bEnxGBGjriI5qOhMizmzLYPs5uWiRjtJnBqb4JOUh5K7JBlwrgvD72fY5ZK2lwtzTksfWo8N+ahU="
|
||||
- secure: "mh/WDQapGJb6MAFvgCjiMAAv1aa8gUaIs2Ohtx7yPrDBwsD8UqlyEM7ktGLZGQ1q/7OJ/Z6QfDMfJQwDKzxyUSY1yHZTNkP3QzkTt2D1Qyvi++O6EkGqSdSS6Lb3aID3IsEaye/yasJ+rxiRSp05O9+OYvhJlqRZnzaimiAv5KI="
|
||||
- secure: "OGNJ6Cj3trq4nASgm4BK331aij+FZ11St7/YF9rfxeQBwg4MCPH2+D0jvAULBHvJR7K2RmepX/FG5d4S+rtwKNGngg3ovPdd1MbwFltHpn5/KM+hxe7kCZx2+V9/FN+4YSyO0zSUDra6AXHOs72mfyrZoB3a36SS4lg2sAp33gU="
|
||||
- GH_REF=github.com/openvenues/libpostal
|
||||
- DICTIONARIES_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "resources/dictionaries/.*/.*.txt\|src/gazetteer_data.c" | wc -l)
|
||||
- NUMEX_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "resources/numex\|src/numex_table_builder.c" | wc -l)
|
||||
- TRANSLIT_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "src/transliteration_data.c" | wc -l)
|
||||
- TAG_VERSION=$(cat ./versions/base).$TRAVIS_BUILD_NUMBER
|
||||
- SRC_TARBALL_FILENAME=libpostal-$(cat ./versions/base).tar.gz
|
||||
- LIBPOSTAL_DATA_DIR=$(pwd)/data
|
||||
- LIBPOSTAL_DATA_FILENAME=libpostal_data.tar.gz
|
||||
compiler:
|
||||
- clang
|
||||
- gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- gcc-4.8
|
||||
- pkg-config
|
||||
before_script:
|
||||
- ./bootstrap.sh
|
||||
- if [[ $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 ]]; then git clone https://github.com/pypa/virtualenv; cd virtualenv; git checkout master; python virtualenv.py ../env; cd ..; env/bin/pip install -r scripts/requirements-simple.txt; fi;
|
||||
- if [ $NUMEX_CHANGED -ne 0 ]; then env/bin/python scripts/geodata/numbers/numex.py; fi;
|
||||
- if [ $DICTIONARIES_CHANGED -ne 0 ]; then env/bin/python scripts/geodata/address_expansions/address_dictionaries.py; fi;
|
||||
install:
|
||||
- if [ "$CC" = "gcc" ]; then export CC="gcc-4.8"; fi
|
||||
script:
|
||||
- ./configure --datadir=$LIBPOSTAL_DATA_DIR
|
||||
- make -j4
|
||||
- if [[ $DICTIONARIES_CHANGED -ne 0 ]]; then ./src/build_address_dictionary; fi;
|
||||
- if [[ $NUMEX_CHANGED -ne 0 ]]; then ./src/build_numex_table; fi;
|
||||
- if [[ $TRANSLIT_CHANGED -ne 0 ]]; then ./src/build_trans_table; fi;
|
||||
- make check
|
||||
|
||||
after_success:
|
||||
- |
|
||||
if [[ "$CC" == "gcc" && "$TRAVIS_PULL_REQUEST" = "false" && "$TRAVIS_BRANCH" = "master" ]]; then
|
||||
if [[ ( $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 || $TRANSLIT_CHANGED -ne 0 ) ]]; then
|
||||
export PATH=$PATH:env/bin/;
|
||||
git clone -b master "https://${GH_TOKEN}@${GH_REF}" _travis > /dev/null 2>&1 || exit 1
|
||||
cp src/*_data.c _travis/src
|
||||
echo "$TAG_VERSION" > _travis/versions/base_data
|
||||
cd _travis
|
||||
git config user.name "$GIT_COMMITTER_NAME";
|
||||
git config user.email "$GIT_COMMITTER_EMAIL";
|
||||
git commit -a -m "[auto][ci skip] Adding data files from Travis build #$TRAVIS_BUILD_NUMBER";
|
||||
git push --quiet origin master;
|
||||
|
||||
tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILENAME $BASIC_MODULE_DIRS
|
||||
fi
|
||||
git tag $TAG_VERSION -a -m "[auto][ci skip] Generating tag for Travis build #$TRAVIS_BUILD_NUMBER";
|
||||
git push --tags --quiet origin master;
|
||||
fi;
|
||||
|
||||
before_deploy:
|
||||
- make dist
|
||||
|
||||
deploy:
|
||||
- provider: releases
|
||||
file:
|
||||
- "$SRC_TARBALL_FILENAME"
|
||||
on:
|
||||
tags: true
|
||||
branch: master
|
||||
skip_cleanup: true
|
||||
- provider: releases
|
||||
file:
|
||||
- "$LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILENAME"
|
||||
on:
|
||||
tags: true
|
||||
branch: master
|
||||
condition: "$CC = gcc && ( $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 || $TRANSLIT_CHANGED -ne 0 )"
|
||||
skip_cleanup: true
|
||||
@@ -1,6 +1,6 @@
|
||||
## Submitting Issues
|
||||
|
||||
When submitting issues to libpostal, please repeect these guildelines:
|
||||
When submitting issues to libpostal, please respect these guidelines:
|
||||
|
||||
- Be constructive. Try to help solve the problem.
|
||||
- Always search for existing issues before submitting one.
|
||||
|
||||
20
README.md
20
README.md
@@ -113,6 +113,8 @@ brew install curl autoconf automake libtool pkg-config
|
||||
|
||||
Then to install the C library:
|
||||
|
||||
If you're using an M1 Mac, add `--disable-sse2` to the `./configure` command. This will result in poorer performance but the build will succeed.
|
||||
|
||||
```
|
||||
git clone https://github.com/openvenues/libpostal
|
||||
cd libpostal
|
||||
@@ -175,6 +177,24 @@ If you require a .lib import library to link this to your application. You can g
|
||||
lib.exe /def:libpostal.def /out:libpostal.lib /machine:x64
|
||||
```
|
||||
|
||||
Installation with an alternative data model
|
||||
-------------------------------------------
|
||||
|
||||
An alternative data model is available for libpostal. It is created by Senzing Inc. for improved parsing on US, UK and Singapore addresses and improved US rural route address handling.
|
||||
To enable this add `MODEL=senzing` to the conigure line during installation:
|
||||
```
|
||||
./configure --datadir=[...some dir with a few GB of space...] MODEL=senzing
|
||||
```
|
||||
|
||||
The data for this model is gotten from [OpenAddress](https://openaddresses.io/), [OpenStreetMap](https://www.openstreetmap.org/) and data generated by Senzing based on customer feedback (a few hundred records), a total of about 1.2 billion records of data from over 230 countries, in 100+ languages. The data from OpenStreetMap and OpenAddress is good but not perfect so the data set was modified by filtering out badly formed addresses, correcting misclassified address tokens and removing tokens that didn't belong in the addresses, whenever these conditions were encountered.
|
||||
|
||||
Senzing created a data set of 12950 addresses from 89 countries that it uses to test and verify the quality of its models. The data set was generated using random addresses from OSM, minimally 50 per country. Hard-to-parse addresses were gotten from Senzing support team and customers and from the libpostal github page and added to this set. The Senzing model got 4.3% better parsing results than the default model, using this test set.
|
||||
|
||||
The size of this model is about 2.2GB compared to 1.8GB for the default model so keep that in mind if storages space is important.
|
||||
|
||||
Further information about this data model can be found at: https://github.com/Senzing/libpostal-data
|
||||
If you run into any issues with this model, whether they have to do with parses, installation or any other problems, then please report them at https://github.com/Senzing/libpostal-data
|
||||
|
||||
Examples of parsing
|
||||
-------------------
|
||||
|
||||
|
||||
67
configure.ac
67
configure.ac
@@ -60,6 +60,17 @@ AC_SUBST([LIBPOSTAL_DATA_FILE_LATEST_VERSION], [$DATA_FILE_LATEST_VERSION])
|
||||
AC_SUBST([LIBPOSTAL_PARSER_MODEL_LATEST_VERSION], [$PARSER_MODEL_LATEST_VERSION])
|
||||
AC_SUBST([LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION], [$LANG_CLASS_MODEL_LATEST_VERSION])
|
||||
|
||||
# Senzing data
|
||||
AC_SUBST([LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING], [v1])
|
||||
|
||||
SENZING_DATA_FILE_LATEST_VERSION=$(cat $srcdir/versions/senzing/base_data)
|
||||
SENZING_PARSER_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/parser)
|
||||
SENZING_LANG_CLASS_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/language_classifier)
|
||||
|
||||
AC_SUBST([LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION], [$SENZING_DATA_FILE_LATEST_VERSION])
|
||||
AC_SUBST([LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION], [$SENZING_PARSER_MODEL_LATEST_VERSION])
|
||||
AC_SUBST([LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION], [$SENZING_LANG_CLASS_MODEL_LATEST_VERSION])
|
||||
|
||||
AC_CONFIG_FILES([Makefile
|
||||
libpostal.pc
|
||||
src/Makefile
|
||||
@@ -73,57 +84,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl
|
||||
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Architecture-specific options
|
||||
# Checks for SSE2 build
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# allow enabling hardware optimization on any system:
|
||||
case "$host_cpu" in
|
||||
arm*|aarch64*)
|
||||
enable_arm_neon=yes
|
||||
enable_intel_sse=no
|
||||
AC_DEFINE([ARM_NEON], [1],
|
||||
[Enable ARM_NEON optimizations])
|
||||
;;
|
||||
i?86|x86_64)
|
||||
enable_intel_sse=yes
|
||||
enable_arm_neon=no
|
||||
AC_DEFINE([INTEL_SSE], [1],
|
||||
[Enable Intel SSE optimizations])
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_ARG_ENABLE([neon],
|
||||
AS_HELP_STRING([[[--disable-neon]]],
|
||||
[Disable ARM NEON hardware optimizations]),
|
||||
[
|
||||
enable_arm_neon=no
|
||||
AC_DEFINE([ARM_NEON], [0],
|
||||
[Disable ARM_NEON optimizations])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([sse2],
|
||||
AS_HELP_STRING([[[--disable-sse2]]],
|
||||
[Disable Intel SSE2 hardware optimizations]),
|
||||
[
|
||||
enable_intel_sse=no
|
||||
AC_DEFINE([INTEL_SSE], [0],
|
||||
[Disable INTEL_SSE optimizations])
|
||||
])
|
||||
AS_HELP_STRING(
|
||||
[--disable-sse2],
|
||||
[disable SSE2 optimization routines]
|
||||
)
|
||||
)
|
||||
|
||||
SIMDFLAGS=""
|
||||
|
||||
AS_IF([test "x$enable_intel_sse" != "xno"], [
|
||||
SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE"
|
||||
AS_IF([test "x$enable_sse2" != "xno"], [
|
||||
CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
|
||||
])
|
||||
|
||||
AS_IF([test "x$enable_arm_neon" != "xno"], [
|
||||
SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON"
|
||||
])
|
||||
|
||||
CFLAGS="${SIMDFLAGS} ${CFLAGS}"
|
||||
|
||||
AC_SUBST([SIMDFLAGS], [$SIMDFLAGS])
|
||||
|
||||
AC_CHECK_HEADER(cblas.h, [AX_CBLAS])
|
||||
|
||||
AC_ARG_ENABLE([data-download],
|
||||
@@ -134,6 +108,9 @@ AC_ARG_ENABLE([data-download],
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;;
|
||||
esac], [DOWNLOAD_DATA=true])
|
||||
|
||||
AC_ARG_VAR(MODEL, [Option to use alternative data models. Currently available is "senzing" (MODEL=senzing). If this option is not set the default libpostal data model is used.])
|
||||
AS_VAR_IF([MODEL], [], [],
|
||||
[AS_VAR_IF([MODEL], [senzing], [], [AC_MSG_FAILURE([Invalid MODEL value set])])])
|
||||
|
||||
AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"])
|
||||
|
||||
|
||||
@@ -152,11 +152,21 @@ if test $ax_cblas_ok = no; then
|
||||
[], [-lblas])])
|
||||
fi
|
||||
|
||||
# BLAS in OpenBLAS library?
|
||||
if test $ax_cblas_ok = no; then
|
||||
AC_CHECK_LIB(openblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lopenblas"])
|
||||
fi
|
||||
|
||||
# Generic CBLAS library?
|
||||
if test $ax_cblas_ok = no; then
|
||||
AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lcblas"])
|
||||
fi
|
||||
|
||||
# Generic BLAS library?
|
||||
if test $ax_cblas_ok = no; then
|
||||
AC_CHECK_LIB(blas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"])
|
||||
fi
|
||||
|
||||
AC_SUBST(CBLAS_LIBS)
|
||||
|
||||
LIBS="$ax_cblas_save_LIBS"
|
||||
|
||||
@@ -40,8 +40,8 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) {
|
||||
}
|
||||
|
||||
if (context->flag & CRF_CONTEXT_MARGINALS) {
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
context->exp_state = double_matrix_new_aligned(T, L, 16);
|
||||
#if defined(USE_SSE)
|
||||
context->exp_state = double_matrix_new_aligned(T, L, 32);
|
||||
if (context->exp_state == NULL) goto exit_context_created;
|
||||
double_matrix_zero(context->exp_state);
|
||||
#else
|
||||
@@ -52,8 +52,8 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) {
|
||||
context->mexp_state = double_matrix_new_zeros(T, L);
|
||||
if (context->mexp_state == NULL) goto exit_context_created;
|
||||
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
context->exp_state_trans = double_matrix_new_aligned(T, L * L, 16);
|
||||
#if defined(USE_SSE)
|
||||
context->exp_state_trans = double_matrix_new_aligned(T, L * L, 32);
|
||||
if (context->exp_state_trans == NULL) goto exit_context_created;
|
||||
double_matrix_zero(context->exp_state_trans);
|
||||
#else
|
||||
@@ -64,8 +64,8 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) {
|
||||
context->mexp_state_trans = double_matrix_new_zeros(T, L * L);
|
||||
if (context->mexp_state_trans == NULL) goto exit_context_created;
|
||||
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
context->exp_trans = double_matrix_new_aligned(L, L, 16);
|
||||
#if defined(USE_SSE)
|
||||
context->exp_trans = double_matrix_new_aligned(L, L, 32);
|
||||
if (context->exp_trans == NULL) goto exit_context_created;
|
||||
double_matrix_zero(context->exp_trans);
|
||||
#else
|
||||
@@ -130,14 +130,14 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) {
|
||||
|
||||
if (self->flag & CRF_CONTEXT_MARGINALS &&
|
||||
(
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
!double_matrix_resize_aligned(self->exp_state, T, L, 16) ||
|
||||
#if defined(USE_SSE)
|
||||
!double_matrix_resize_aligned(self->exp_state, T, L, 32) ||
|
||||
#else
|
||||
!double_matrix_resize(self->exp_state, T, L) ||
|
||||
#endif
|
||||
!double_matrix_resize(self->mexp_state, T, L) ||
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
!double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 16) ||
|
||||
#if defined(USE_SSE)
|
||||
!double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 32) ||
|
||||
#else
|
||||
!double_matrix_resize(self->exp_state_trans, T, L * L) ||
|
||||
#endif
|
||||
@@ -184,7 +184,7 @@ void crf_context_destroy(crf_context_t *self) {
|
||||
}
|
||||
|
||||
if (self->exp_state != NULL) {
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
#if defined(USE_SSE)
|
||||
double_matrix_destroy_aligned(self->exp_state);
|
||||
#else
|
||||
double_matrix_destroy(self->exp_state);
|
||||
@@ -200,7 +200,7 @@ void crf_context_destroy(crf_context_t *self) {
|
||||
}
|
||||
|
||||
if (self->exp_state_trans != NULL) {
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
#if defined(USE_SSE)
|
||||
double_matrix_destroy_aligned(self->exp_state_trans);
|
||||
#else
|
||||
double_matrix_destroy(self->exp_state_trans);
|
||||
@@ -216,7 +216,7 @@ void crf_context_destroy(crf_context_t *self) {
|
||||
}
|
||||
|
||||
if (self->exp_trans != NULL) {
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
#if defined(USE_SSE)
|
||||
double_matrix_destroy_aligned(self->exp_trans);
|
||||
#else
|
||||
double_matrix_destroy(self->exp_trans);
|
||||
|
||||
@@ -198,7 +198,7 @@ bool file_write_float(FILE *file, float value) {
|
||||
}
|
||||
|
||||
inline uint32_t file_deserialize_uint32(unsigned char *buf) {
|
||||
return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
|
||||
return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) | ((uint32_t)buf[2] << 8) | (uint32_t)buf[3];
|
||||
}
|
||||
|
||||
bool file_read_uint32(FILE *file, uint32_t *value) {
|
||||
@@ -243,7 +243,7 @@ bool file_write_uint32(FILE *file, uint32_t value) {
|
||||
|
||||
|
||||
inline uint16_t file_deserialize_uint16(unsigned char *buf) {
|
||||
return (buf[0] << 8) | buf[1];
|
||||
return ((uint16_t)buf[0] << 8) | buf[1];
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@ LIBPOSTAL_DATA_DIR=$3
|
||||
MB=$((1024*1024))
|
||||
CHUNK_SIZE=$((64*$MB))
|
||||
|
||||
DATAMODEL="@MODEL@"
|
||||
|
||||
# Not loving this approach but there appears to be no way to query the size
|
||||
# of a release asset without using the Github API
|
||||
LIBPOSTAL_DATA_FILE_CHUNKS=1
|
||||
@@ -34,6 +36,20 @@ LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz"
|
||||
|
||||
LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download"
|
||||
|
||||
if [ "$DATAMODEL" = "senzing" ]; then
|
||||
LIBPOSTAL_DATA_FILE_CHUNKS=1
|
||||
LIBPOSTAL_PARSER_MODEL_CHUNKS=1
|
||||
LIBPOSTAL_LANG_CLASS_MODEL_CHUNKS=1
|
||||
|
||||
LIBPOSTAL_DATA_DIR_VERSION_STRING="@LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING@"
|
||||
|
||||
LIBPOSTAL_DATA_FILE_LATEST_VERSION="@LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION@"
|
||||
LIBPOSTAL_PARSER_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION@"
|
||||
LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION@"
|
||||
|
||||
LIBPOSTAL_BASE_URL="https://public-read-libpostal-data.s3.amazonaws.com"
|
||||
fi
|
||||
|
||||
LIBPOSTAL_DATA_VERSION_FILE=$LIBPOSTAL_DATA_DIR/data_version
|
||||
LIBPOSTAL_DATA_DIR_VERSION=
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ typedef enum {
|
||||
matrix->m = m; \
|
||||
matrix->n = n; \
|
||||
\
|
||||
matrix->values = _aligned_malloc(sizeof(type) * m * n, alignment); \
|
||||
matrix->values = aligned_malloc(sizeof(type) * m * n, alignment); \
|
||||
if (matrix->values == NULL) { \
|
||||
free(matrix); \
|
||||
return NULL; \
|
||||
@@ -86,7 +86,7 @@ typedef enum {
|
||||
if (self == NULL) return; \
|
||||
\
|
||||
if (self->values != NULL) { \
|
||||
_aligned_free(self->values); \
|
||||
aligned_free(self->values); \
|
||||
} \
|
||||
\
|
||||
free(self); \
|
||||
@@ -118,7 +118,7 @@ typedef enum {
|
||||
if (self == NULL) return false; \
|
||||
\
|
||||
if (m * n > (self->m * self->n)) { \
|
||||
type *ptr = _aligned_realloc(self->values, sizeof(type) * m * n, alignment); \
|
||||
type *ptr = aligned_resize(self->values, sizeof(type) * self->m * self->n, sizeof(type) * m * n, alignment); \
|
||||
if (ptr == NULL) { \
|
||||
return false; \
|
||||
} \
|
||||
|
||||
47
src/vector.h
47
src/vector.h
@@ -7,43 +7,44 @@
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__)
|
||||
#include <malloc.h>
|
||||
static inline void *aligned_malloc(size_t size, size_t alignment) {
|
||||
return _aligned_malloc(size, alignment);
|
||||
}
|
||||
static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) {
|
||||
return _aligned_realloc(p, new_size, alignment);
|
||||
}
|
||||
static inline void aligned_free(void *p) {
|
||||
_aligned_free(p);
|
||||
}
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
static inline void *_aligned_malloc(size_t size, size_t alignment)
|
||||
static inline void *aligned_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
void *p;
|
||||
int ret = posix_memalign(&p, alignment, size);
|
||||
return (ret == 0) ? p : NULL;
|
||||
}
|
||||
static inline void *_aligned_realloc(void *p, size_t size, size_t alignment)
|
||||
static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment)
|
||||
{
|
||||
if ((alignment == 0) || ((alignment & (alignment - 1)) != 0) || (alignment < sizeof(void *))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
if (p == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *rp = realloc(p, size);
|
||||
|
||||
/* If realloc result is not already at an aligned boundary,
|
||||
_aligned_malloc a new block and copy the contents of the realloc'd
|
||||
pointer to the aligned block, free the realloc'd pointer and return
|
||||
the aligned pointer.
|
||||
*/
|
||||
if ( ((size_t)rp & (alignment - 1)) != 0) {
|
||||
void *p1 = _aligned_malloc(size, alignment);
|
||||
if (p1 != NULL) {
|
||||
memcpy(p1, rp, size);
|
||||
}
|
||||
free(rp);
|
||||
rp = p1;
|
||||
void *p1 = aligned_malloc(new_size, alignment);
|
||||
if (p1 == NULL) {
|
||||
free(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return rp;
|
||||
memcpy(p1, p, old_size);
|
||||
free(p);
|
||||
return p1;
|
||||
}
|
||||
static inline void _aligned_free(void *p)
|
||||
static inline void aligned_free(void *p)
|
||||
{
|
||||
free(p);
|
||||
}
|
||||
@@ -79,7 +80,7 @@ static inline void _aligned_free(void *p)
|
||||
name *array = malloc(sizeof(name)); \
|
||||
if (array == NULL) return NULL; \
|
||||
array->n = array->m = 0; \
|
||||
array->a = _aligned_malloc(size * sizeof(type), alignment); \
|
||||
array->a = aligned_malloc(size * sizeof(type), alignment); \
|
||||
if (array->a == NULL) return NULL; \
|
||||
array->m = size; \
|
||||
return array; \
|
||||
@@ -94,7 +95,7 @@ static inline void _aligned_free(void *p)
|
||||
} \
|
||||
static inline bool name##_resize_aligned(name *array, size_t size, size_t alignment) { \
|
||||
if (size <= array->m) return true; \
|
||||
type *ptr = _aligned_realloc(array->a, sizeof(type) * size, alignment); \
|
||||
type *ptr = aligned_resize(array->a, sizeof(type) * array->m, sizeof(type) * size, alignment); \
|
||||
if (ptr == NULL) return false; \
|
||||
array->a = ptr; \
|
||||
array->m = size; \
|
||||
@@ -160,7 +161,7 @@ static inline void _aligned_free(void *p)
|
||||
} \
|
||||
static inline void name##_destroy_aligned(name *array) { \
|
||||
if (array == NULL) return; \
|
||||
if (array->a != NULL) _aligned_free(array->a); \
|
||||
if (array->a != NULL) aligned_free(array->a); \
|
||||
free(array); \
|
||||
}
|
||||
|
||||
@@ -182,7 +183,7 @@ static inline void _aligned_free(void *p)
|
||||
free_func(array->a[i]); \
|
||||
} \
|
||||
} \
|
||||
_aligned_free(array->a); \
|
||||
aligned_free(array->a); \
|
||||
free(array); \
|
||||
}
|
||||
|
||||
|
||||
@@ -8,10 +8,8 @@
|
||||
|
||||
#define ks_lt_index(a, b) ((a).value < (b).value)
|
||||
|
||||
#if defined(INTEL_SSE)
|
||||
#if defined(USE_SSE)
|
||||
#include <emmintrin.h>
|
||||
#elif defined(ARM_NEON)
|
||||
#include "sse2neon.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -340,7 +338,7 @@
|
||||
|
||||
|
||||
|
||||
#if defined(INTEL_SSE) || defined(ARM_NEON)
|
||||
#if defined(USE_SSE)
|
||||
/*
|
||||
From https://github.com/herumi/fmath/blob/master/fastexp.cpp
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ CFLAGS_O2 = $(CFLAGS_BASE) -O2
|
||||
CFLAGS_O3 = $(CFLAGS_BASE) -O3
|
||||
DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||
|
||||
CFLAGS = $(SIMDFLAGS) $(CFLAGS_BASE)
|
||||
CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
TESTS = test_libpostal
|
||||
noinst_PROGRAMS = test_libpostal
|
||||
|
||||
1
versions/senzing/base_data
Normal file
1
versions/senzing/base_data
Normal file
@@ -0,0 +1 @@
|
||||
v1.0.0
|
||||
1
versions/senzing/language_classifier
Normal file
1
versions/senzing/language_classifier
Normal file
@@ -0,0 +1 @@
|
||||
v1.0.0
|
||||
1
versions/senzing/parser
Normal file
1
versions/senzing/parser
Normal file
@@ -0,0 +1 @@
|
||||
v1.0.0
|
||||
@@ -73,57 +73,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl
|
||||
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Architecture-specific options
|
||||
# Checks for SSE2 build
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# allow enabling hardware optimization on any system:
|
||||
case "$host_cpu" in
|
||||
arm*|aarch64*)
|
||||
enable_arm_neon=yes
|
||||
enable_intel_sse=no
|
||||
AC_DEFINE([ARM_NEON], [1],
|
||||
[Enable ARM_NEON optimizations])
|
||||
;;
|
||||
i?86|x86_64)
|
||||
enable_intel_sse=yes
|
||||
enable_arm_neon=no
|
||||
AC_DEFINE([INTEL_SSE], [1],
|
||||
[Enable Intel SSE optimizations])
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_ARG_ENABLE([neon],
|
||||
AS_HELP_STRING([[[--disable-neon]]],
|
||||
[Disable ARM NEON hardware optimizations]),
|
||||
[
|
||||
enable_arm_neon=no
|
||||
AC_DEFINE([ARM_NEON], [0],
|
||||
[Disable ARM_NEON optimizations])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([sse2],
|
||||
AS_HELP_STRING([[[--disable-sse2]]],
|
||||
[Disable Intel SSE2 hardware optimizations]),
|
||||
[
|
||||
enable_intel_sse=no
|
||||
AC_DEFINE([INTEL_SSE], [0],
|
||||
[Disable INTEL_SSE optimizations])
|
||||
])
|
||||
AS_HELP_STRING(
|
||||
[--disable-sse2],
|
||||
[disable SSE2 optimization routines]
|
||||
)
|
||||
)
|
||||
|
||||
SIMDFLAGS=""
|
||||
|
||||
AS_IF([test "x$enable_intel_sse" != "xno"], [
|
||||
SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE"
|
||||
AS_IF([test "x$enable_sse2" != "xno"], [
|
||||
CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
|
||||
])
|
||||
|
||||
AS_IF([test "x$enable_arm_neon" != "xno"], [
|
||||
SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON"
|
||||
])
|
||||
|
||||
CFLAGS="${SIMDFLAGS} ${CFLAGS}"
|
||||
|
||||
AC_SUBST([SIMDFLAGS], [$SIMDFLAGS])
|
||||
|
||||
AC_CHECK_HEADER(cblas.h, [AX_CBLAS])
|
||||
|
||||
AC_ARG_ENABLE([data-download],
|
||||
|
||||
Reference in New Issue
Block a user