From ec9e0e341fb40c70f0b2a8909e295086197e21f1 Mon Sep 17 00:00:00 2001 From: Oskar Thorbjornsson Date: Sun, 12 Feb 2023 17:58:36 -0800 Subject: [PATCH 01/24] Enable downloading of Senzing data model. --- configure.ac | 18 ++++++++++++++++++ src/libpostal_data.in | 16 ++++++++++++++++ versions/senzing/base_data | 1 + versions/senzing/language_classifier | 1 + versions/senzing/parser | 1 + 5 files changed, 37 insertions(+) create mode 100644 versions/senzing/base_data create mode 100644 versions/senzing/language_classifier create mode 100644 versions/senzing/parser diff --git a/configure.ac b/configure.ac index f740be12..4363c0fa 100644 --- a/configure.ac +++ b/configure.ac @@ -60,6 +60,17 @@ AC_SUBST([LIBPOSTAL_DATA_FILE_LATEST_VERSION], [$DATA_FILE_LATEST_VERSION]) AC_SUBST([LIBPOSTAL_PARSER_MODEL_LATEST_VERSION], [$PARSER_MODEL_LATEST_VERSION]) AC_SUBST([LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION], [$LANG_CLASS_MODEL_LATEST_VERSION]) +# Senzing data +AC_SUBST([LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING], [v1]) + +SENZING_DATA_FILE_LATEST_VERSION=$(cat $srcdir/versions/senzing/base_data) +SENZING_PARSER_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/parser) +SENZING_LANG_CLASS_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/language_classifier) + +AC_SUBST([LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION], [$SENZING_DATA_FILE_LATEST_VERSION]) +AC_SUBST([LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION], [$SENZING_PARSER_MODEL_LATEST_VERSION]) +AC_SUBST([LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION], [$SENZING_LANG_CLASS_MODEL_LATEST_VERSION]) + AC_CONFIG_FILES([Makefile libpostal.pc src/Makefile @@ -134,6 +145,13 @@ AC_ARG_ENABLE([data-download], *) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;; esac], [DOWNLOAD_DATA=true]) +AC_ARG_ENABLE([senzing-datamodel], + AS_HELP_STRING([[[--enable-senzing-datamodel]]], + [Use Senzing data model in lieu of the default one]), + [ + DATAMODEL="senzing" + AC_SUBST([LIBPOSTAL_DATA_MODEL], [$DATAMODEL]) + ]) AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) diff --git a/src/libpostal_data.in b/src/libpostal_data.in index 0a3d27f2..6b0c04e9 100755 --- a/src/libpostal_data.in +++ b/src/libpostal_data.in @@ -14,6 +14,8 @@ LIBPOSTAL_DATA_DIR=$3 MB=$((1024*1024)) CHUNK_SIZE=$((64*$MB)) +DATAMODEL="@LIBPOSTAL_DATA_MODEL@" + # Not loving this approach but there appears to be no way to query the size # of a release asset without using the Github API LIBPOSTAL_DATA_FILE_CHUNKS=1 @@ -34,6 +36,20 @@ LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz" LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download" +if [ $DATAMODEL == "senzing" ]; then + LIBPOSTAL_DATA_FILE_CHUNKS=1 + LIBPOSTAL_PARSER_MODEL_CHUNKS=1 + LIBPOSTAL_LANG_CLASS_MODEL_CHUNKS=1 + + LIBPOSTAL_DATA_DIR_VERSION_STRING="@LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING@" + + LIBPOSTAL_DATA_FILE_LATEST_VERSION="@LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION@" + LIBPOSTAL_PARSER_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION@" + LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION@" + + LIBPOSTAL_BASE_URL="https://public-read-libpostal-data.s3.amazonaws.com" +fi + LIBPOSTAL_DATA_VERSION_FILE=$LIBPOSTAL_DATA_DIR/data_version LIBPOSTAL_DATA_DIR_VERSION= diff --git a/versions/senzing/base_data b/versions/senzing/base_data new file mode 100644 index 00000000..60453e69 --- /dev/null +++ b/versions/senzing/base_data @@ -0,0 +1 @@ +v1.0.0 \ No newline at end of file diff --git a/versions/senzing/language_classifier b/versions/senzing/language_classifier new file mode 100644 index 00000000..60453e69 --- /dev/null +++ b/versions/senzing/language_classifier @@ -0,0 +1 @@ +v1.0.0 \ No newline at end of file diff --git a/versions/senzing/parser b/versions/senzing/parser new file mode 100644 index 00000000..0ec25f75 --- /dev/null +++ b/versions/senzing/parser @@ -0,0 +1 @@ +v1.0.0 From c4c636febdd160ad437f61c062e8f0d1eb82d108 Mon Sep 17 00:00:00 2001 From: Oskar Thorbjornsson Date: Sun, 12 Feb 2023 18:04:10 -0800 Subject: [PATCH 02/24] Adding directions to the readme on how to download Senzing datamodel. --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 5cd327bc..58ad3ce8 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,16 @@ If you require a .lib import library to link this to your application. You can g lib.exe /def:libpostal.def /out:libpostal.lib /machine:x64 ``` +Installation with an alternative data model +------------------------------------------- + +An alternative data model is available for libposta. It is created by Senzing Inc. for improved parsing on US, UK and Singapore addresses and improved US rural route address handling. +To enable this add `--enable-senzing-datamodel` to the conigure line during installation: +``` +./configure --datadir=[...some dir with a few GB of space...] --enable-senzing-datamodel +``` + + Examples of parsing ------------------- From a11f33fb3d84b0a41334e10ce8a5583f514177fd Mon Sep 17 00:00:00 2001 From: Oskar Thorbjornsson Date: Mon, 13 Feb 2023 13:32:38 -0800 Subject: [PATCH 03/24] Add a link to info about Senzing data model. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 58ad3ce8..9fa3bf89 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,7 @@ To enable this add `--enable-senzing-datamodel` to the conigure line during inst ``` ./configure --datadir=[...some dir with a few GB of space...] --enable-senzing-datamodel ``` +Further information about this data model can be found at: https://github.com/Senzing/libpostal Examples of parsing From 0c0818c683716b35245210c0df320258c36bc3c4 Mon Sep 17 00:00:00 2001 From: Oskar Thorbjornsson Date: Mon, 13 Feb 2023 17:03:42 -0800 Subject: [PATCH 04/24] Update Senzing link. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9fa3bf89..2ec9f509 100644 --- a/README.md +++ b/README.md @@ -183,7 +183,7 @@ To enable this add `--enable-senzing-datamodel` to the conigure line during inst ``` ./configure --datadir=[...some dir with a few GB of space...] --enable-senzing-datamodel ``` -Further information about this data model can be found at: https://github.com/Senzing/libpostal +Further information about this data model can be found at: https://github.com/Senzing/libpostal-data Examples of parsing From 00568da290bb175d6d1ed28140e94453fe4547fc Mon Sep 17 00:00:00 2001 From: Oskar Thorbjornsson Date: Tue, 14 Feb 2023 21:02:51 -0800 Subject: [PATCH 05/24] Modifying README and config parameter, based on code review. --- README.md | 15 +++++++++++---- configure.ac | 10 +++------- src/libpostal_data.in | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 2ec9f509..d8e2cb9c 100644 --- a/README.md +++ b/README.md @@ -178,13 +178,20 @@ lib.exe /def:libpostal.def /out:libpostal.lib /machine:x64 Installation with an alternative data model ------------------------------------------- -An alternative data model is available for libposta. It is created by Senzing Inc. for improved parsing on US, UK and Singapore addresses and improved US rural route address handling. -To enable this add `--enable-senzing-datamodel` to the conigure line during installation: +An alternative data model is available for libpostal. It is created by Senzing Inc. for improved parsing on US, UK and Singapore addresses and improved US rural route address handling. +To enable this add `MODEL=senzing` to the conigure line during installation: ``` -./configure --datadir=[...some dir with a few GB of space...] --enable-senzing-datamodel +./configure --datadir=[...some dir with a few GB of space...] MODEL=senzing ``` -Further information about this data model can be found at: https://github.com/Senzing/libpostal-data +The data for this model is gotten from [OpenAddress](https://openaddresses.io/), [OpenStreetMap](https://www.openstreetmap.org/) and data generated by Senzing based on customer feedback (a few hundred records), a total of about 1.2 billion records of data from over 230 countries, in 100+ languages. The data from OpenStreetMap and OpenAddress is good but not perfect so the data set was modified by filtering out badly formed addresses, correcting misclassified address tokens and removing tokens that didn't belong in the addresses, whenever these conditions were encountered. + +Senzing created a data set of 12950 addresses from 89 countries that it uses to test and verify the quality of its models. The data set was generated using random addresses from OSM, minimally 50 per country. Hard-to-parse addresses were gotten from Senzing support team and customers and from the libpostal github page and added to this set. The Senzing model got 4.3% better parsing results than the default model, using this test set. + +The size of this model is about 2.2GB compared to 1.8GB for the default model so keep that in mind if storages space is important. + +Further information about this data model can be found at: https://github.com/Senzing/libpostal-data +If you run into any issues with this model, whether they have to do with parses, installation or any other problems, then please report them at https://github.com/Senzing/libpostal-data Examples of parsing ------------------- diff --git a/configure.ac b/configure.ac index 4363c0fa..ed997e32 100644 --- a/configure.ac +++ b/configure.ac @@ -145,13 +145,9 @@ AC_ARG_ENABLE([data-download], *) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;; esac], [DOWNLOAD_DATA=true]) -AC_ARG_ENABLE([senzing-datamodel], - AS_HELP_STRING([[[--enable-senzing-datamodel]]], - [Use Senzing data model in lieu of the default one]), - [ - DATAMODEL="senzing" - AC_SUBST([LIBPOSTAL_DATA_MODEL], [$DATAMODEL]) - ]) +AC_ARG_VAR(MODEL, [Option to use alternative data models. Currently available is "senzing" (MODEL=senzing). If this option is not set the default libpostal data model is used.]) +AS_VAR_IF([MODEL], [], [], + [AS_VAR_IF([MODEL], [senzing], [], [AC_MSG_FAILURE([Invalid MODEL value set])])]) AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) diff --git a/src/libpostal_data.in b/src/libpostal_data.in index 6b0c04e9..8c18270f 100755 --- a/src/libpostal_data.in +++ b/src/libpostal_data.in @@ -14,7 +14,7 @@ LIBPOSTAL_DATA_DIR=$3 MB=$((1024*1024)) CHUNK_SIZE=$((64*$MB)) -DATAMODEL="@LIBPOSTAL_DATA_MODEL@" +DATAMODEL="@MODEL@" # Not loving this approach but there appears to be no way to query the size # of a release asset without using the Github API From e2590bca9764e34cb916470ad6e8e2c0759bd244 Mon Sep 17 00:00:00 2001 From: PIT-Development Date: Thu, 13 Apr 2023 08:38:52 +0200 Subject: [PATCH 06/24] docs: fix typos in contributing.md (#622) * Respect typo Repeect should be respect * Update CONTRIBUTING.md Also include guildelines to guidelines --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 26bd9bdb..7e5d2804 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ ## Submitting Issues -When submitting issues to libpostal, please repeect these guildelines: +When submitting issues to libpostal, please respect these guidelines: - Be constructive. Try to help solve the problem. - Always search for existing issues before submitting one. From 0ad268f991f6f3191e9fdeeb65d9731b7c542a80 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:57:53 +0200 Subject: [PATCH 07/24] Add support for OpenBLAS --- m4/ax_cblas.m4 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m4/ax_cblas.m4 b/m4/ax_cblas.m4 index 0c87c29f..4acda990 100644 --- a/m4/ax_cblas.m4 +++ b/m4/ax_cblas.m4 @@ -157,6 +157,11 @@ if test $ax_cblas_ok = no; then AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lcblas"]) fi +# Generic BLAS library? (for instance OpenBLAS) +if test $ax_cblas_ok = no; then + AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) +fi + AC_SUBST(CBLAS_LIBS) LIBS="$ax_cblas_save_LIBS" From 8eb721f6a1bfc358e2ad7763921756af866ebb91 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 18 Apr 2023 11:19:31 +0200 Subject: [PATCH 08/24] Fix typo --- m4/ax_cblas.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m4/ax_cblas.m4 b/m4/ax_cblas.m4 index 4acda990..0e0d68d3 100644 --- a/m4/ax_cblas.m4 +++ b/m4/ax_cblas.m4 @@ -159,7 +159,7 @@ fi # Generic BLAS library? (for instance OpenBLAS) if test $ax_cblas_ok = no; then - AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) + AC_CHECK_LIB(blas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) fi AC_SUBST(CBLAS_LIBS) From 6f9567742711dbd929ab6df7eb2caf01ab6325e0 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 18 Apr 2023 12:00:10 +0200 Subject: [PATCH 09/24] Explicit -lopenblas --- m4/ax_cblas.m4 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/m4/ax_cblas.m4 b/m4/ax_cblas.m4 index 0e0d68d3..da89cab9 100644 --- a/m4/ax_cblas.m4 +++ b/m4/ax_cblas.m4 @@ -152,12 +152,17 @@ if test $ax_cblas_ok = no; then [], [-lblas])]) fi +# BLAS in OpenBLAS library? +if test $ax_cblas_ok = no; then + AC_CHECK_LIB(openblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lopenblas"]) +fi + # Generic CBLAS library? if test $ax_cblas_ok = no; then AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lcblas"]) fi -# Generic BLAS library? (for instance OpenBLAS) +# Generic BLAS library? if test $ax_cblas_ok = no; then AC_CHECK_LIB(blas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) fi From 5d77298e88ebdd0ba5c20be2191cc67eff444c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 23 Jun 2023 12:16:35 +0300 Subject: [PATCH 10/24] avoid UB in bit shifts unsigned char* gets promoted to `int`, which cannot always be shifted by 24 bits. Justine Tunney blogs about it here: https://justine.lol/endian.html Example: ```deserialize.c #include #include #include uint32_t file_deserialize_uint32_ok(unsigned char *buf) { return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) | ((uint32_t)buf[2] << 8) | (uint32_t)buf[3]; } uint32_t file_deserialize_uint32(unsigned char *buf) { return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; } int main() { unsigned char arr[4] = {0xaa, 0xaa, 0xaa, 0xaa}; printf("%d\n", file_deserialize_uint32_ok((unsigned char*)arr)); printf("%d\n", file_deserialize_uint32((unsigned char*)arr)); } ``` Output: ``` $ clang-16 -fsanitize=undefined ./deserialize.c -o deserialize && ./deserialize -1431655766 deserialize.c:10:20: runtime error: left shift of 170 by 24 places cannot be represented in type 'int' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior deserialize.c:10:20 in -1431655766 ``` --- src/file_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/file_utils.c b/src/file_utils.c index f25e5ee6..5fc2dfbe 100644 --- a/src/file_utils.c +++ b/src/file_utils.c @@ -198,7 +198,7 @@ bool file_write_float(FILE *file, float value) { } inline uint32_t file_deserialize_uint32(unsigned char *buf) { - return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; + return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) | ((uint32_t)buf[2] << 8) | (uint32_t)buf[3]; } bool file_read_uint32(FILE *file, uint32_t *value) { @@ -243,7 +243,7 @@ bool file_write_uint32(FILE *file, uint32_t value) { inline uint16_t file_deserialize_uint16(unsigned char *buf) { - return (buf[0] << 8) | buf[1]; + return ((uint16_t)buf[0] << 8) | buf[1]; } From dcb63d8768a5e187025de338fa99cc10f44469ba Mon Sep 17 00:00:00 2001 From: Davide Madrisan Date: Thu, 29 Jun 2023 14:36:10 +0200 Subject: [PATCH 11/24] Fix dash syntax error in libpostal_data Fix the syntax error reported by dash: ./src/libpostal_data: 39: [: ==: unexpected operatora when the variable DATAMODEL is empty. Signed-off-by: Davide Madrisan --- src/libpostal_data.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libpostal_data.in b/src/libpostal_data.in index 8c18270f..a749a623 100755 --- a/src/libpostal_data.in +++ b/src/libpostal_data.in @@ -36,7 +36,7 @@ LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz" LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download" -if [ $DATAMODEL == "senzing" ]; then +if [ "$DATAMODEL" = "senzing" ]; then LIBPOSTAL_DATA_FILE_CHUNKS=1 LIBPOSTAL_PARSER_MODEL_CHUNKS=1 LIBPOSTAL_LANG_CLASS_MODEL_CHUNKS=1 From 2f20c9359e17a65ed847a395f16c5f5b21b15b0c Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:38:48 -0400 Subject: [PATCH 12/24] [github] adding Github action to run tests on mac and ubuntu initially --- .github/workflows/test.yml | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..36799bc7 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,36 @@ +name: Test + +on: + push: + branches: [master] + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + build_and_test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: Install Dependencies Linux + if: matrix.os == 'ubuntu-latest' + run: | + apt-get update -y + apt-get install curl autoconf automake libtool pkg-config + - name: Install Dependencies MacOS + if: runner.os == 'macOS' + run: | + brew update + brew install curl autoconf automake libtool pkg-config + - name: Build + env: + LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data + run: | + ./bootstrap.sh + ./configure --datadir=$LIBPOSTAL_DATA_DIR + make + - name: Test + run: make check From 5669372a90113fa949c79ce3d45d083780396968 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:42:50 -0400 Subject: [PATCH 13/24] [fix] sudo in github actions for build tool installs --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 36799bc7..ff2c99c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,13 +18,13 @@ jobs: - name: Install Dependencies Linux if: matrix.os == 'ubuntu-latest' run: | - apt-get update -y - apt-get install curl autoconf automake libtool pkg-config + sudo apt-get update -y + sudo apt-get install curl autoconf automake libtool pkg-config - name: Install Dependencies MacOS if: runner.os == 'macOS' run: | - brew update - brew install curl autoconf automake libtool pkg-config + sudo brew update + sudo brew install curl autoconf automake libtool pkg-config - name: Build env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data From 2b93af09d959c132f2e97f92dcf0278a433073b2 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:43:18 -0400 Subject: [PATCH 14/24] [build] removing travis build --- .travis.yml | 83 ----------------------------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1f3dbacf..00000000 --- a/.travis.yml +++ /dev/null @@ -1,83 +0,0 @@ -language: c -branches: - only: - - master -env: - global: - - secure: "bHrAu46oecEj3gjamT+XWXtf2J0ZJCFa8tUdgM4evscaJiiwv1TtsGXyhIj/ai7DlRIPVJUtBUy6uoGGjr6GT43zTrzSxYAOMdVXZYsnTDcdL1/0dbwcIK6/u0EI377s1buGIxG1fHveWKXuXwJWDAw4KS+5HU88a42+zMbhKe4=" - - secure: "SkvNYucKVns9qDjOEW2WIhDlOMKBOwhzVcwY++HWTRtn04ErrqR4k01Mmho0jGBQD9JrPLhDgnX1BNy5s+Kmq/bxn9OZm7K1z24qBKb0mBBiNEnf2jvT0AvF5xxM+cJf4KKNL+CC0MwNf5y7HVPq1xibOV4/CNIrc1ZZc9aqdkE=" - - secure: "am/rRca5akv7gSSMeNQfHnWiTHhk8fQhOZvZ0Ut+PezkQlLgKp7bzmMFkkuQ4L5hpJU40kFzuWmIPgO33dacgq69Vx/Xct1bEnxGBGjriI5qOhMizmzLYPs5uWiRjtJnBqb4JOUh5K7JBlwrgvD72fY5ZK2lwtzTksfWo8N+ahU=" - - secure: "mh/WDQapGJb6MAFvgCjiMAAv1aa8gUaIs2Ohtx7yPrDBwsD8UqlyEM7ktGLZGQ1q/7OJ/Z6QfDMfJQwDKzxyUSY1yHZTNkP3QzkTt2D1Qyvi++O6EkGqSdSS6Lb3aID3IsEaye/yasJ+rxiRSp05O9+OYvhJlqRZnzaimiAv5KI=" - - secure: "OGNJ6Cj3trq4nASgm4BK331aij+FZ11St7/YF9rfxeQBwg4MCPH2+D0jvAULBHvJR7K2RmepX/FG5d4S+rtwKNGngg3ovPdd1MbwFltHpn5/KM+hxe7kCZx2+V9/FN+4YSyO0zSUDra6AXHOs72mfyrZoB3a36SS4lg2sAp33gU=" - - GH_REF=github.com/openvenues/libpostal - - DICTIONARIES_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "resources/dictionaries/.*/.*.txt\|src/gazetteer_data.c" | wc -l) - - NUMEX_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "resources/numex\|src/numex_table_builder.c" | wc -l) - - TRANSLIT_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "src/transliteration_data.c" | wc -l) - - TAG_VERSION=$(cat ./versions/base).$TRAVIS_BUILD_NUMBER - - SRC_TARBALL_FILENAME=libpostal-$(cat ./versions/base).tar.gz - - LIBPOSTAL_DATA_DIR=$(pwd)/data - - LIBPOSTAL_DATA_FILENAME=libpostal_data.tar.gz -compiler: - - clang - - gcc -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-4.8 - - pkg-config -before_script: - - ./bootstrap.sh - - if [[ $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 ]]; then git clone https://github.com/pypa/virtualenv; cd virtualenv; git checkout master; python virtualenv.py ../env; cd ..; env/bin/pip install -r scripts/requirements-simple.txt; fi; - - if [ $NUMEX_CHANGED -ne 0 ]; then env/bin/python scripts/geodata/numbers/numex.py; fi; - - if [ $DICTIONARIES_CHANGED -ne 0 ]; then env/bin/python scripts/geodata/address_expansions/address_dictionaries.py; fi; -install: - - if [ "$CC" = "gcc" ]; then export CC="gcc-4.8"; fi -script: - - ./configure --datadir=$LIBPOSTAL_DATA_DIR - - make -j4 - - if [[ $DICTIONARIES_CHANGED -ne 0 ]]; then ./src/build_address_dictionary; fi; - - if [[ $NUMEX_CHANGED -ne 0 ]]; then ./src/build_numex_table; fi; - - if [[ $TRANSLIT_CHANGED -ne 0 ]]; then ./src/build_trans_table; fi; - - make check - -after_success: - - | - if [[ "$CC" == "gcc" && "$TRAVIS_PULL_REQUEST" = "false" && "$TRAVIS_BRANCH" = "master" ]]; then - if [[ ( $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 || $TRANSLIT_CHANGED -ne 0 ) ]]; then - export PATH=$PATH:env/bin/; - git clone -b master "https://${GH_TOKEN}@${GH_REF}" _travis > /dev/null 2>&1 || exit 1 - cp src/*_data.c _travis/src - echo "$TAG_VERSION" > _travis/versions/base_data - cd _travis - git config user.name "$GIT_COMMITTER_NAME"; - git config user.email "$GIT_COMMITTER_EMAIL"; - git commit -a -m "[auto][ci skip] Adding data files from Travis build #$TRAVIS_BUILD_NUMBER"; - git push --quiet origin master; - - tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILENAME $BASIC_MODULE_DIRS - fi - git tag $TAG_VERSION -a -m "[auto][ci skip] Generating tag for Travis build #$TRAVIS_BUILD_NUMBER"; - git push --tags --quiet origin master; - fi; - -before_deploy: - - make dist - -deploy: - - provider: releases - file: - - "$SRC_TARBALL_FILENAME" - on: - tags: true - branch: master - skip_cleanup: true - - provider: releases - file: - - "$LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILENAME" - on: - tags: true - branch: master - condition: "$CC = gcc && ( $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 || $TRANSLIT_CHANGED -ne 0 )" - skip_cleanup: true From b65e7d5bce82a30dec221ce144ee1a19c5fc617c Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:47:14 -0400 Subject: [PATCH 15/24] [fix] no sudo on brew on Mac in github actions, just like on a regular machine/in the docs --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ff2c99c9..ea9dca07 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,8 +23,8 @@ jobs: - name: Install Dependencies MacOS if: runner.os == 'macOS' run: | - sudo brew update - sudo brew install curl autoconf automake libtool pkg-config + brew update + brew install curl autoconf automake libtool pkg-config - name: Build env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data From 7a448b718d58cece0384d540067d48bbe4a73774 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 21:02:41 -0400 Subject: [PATCH 16/24] [crf] using 32 bytes for posix_memalign to align blocks of 4 doubles for remez algorithm to fix test which uses an odd-sized context --- src/crf_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/crf_context.c b/src/crf_context.c index 0f399a1a..e8635e28 100644 --- a/src/crf_context.c +++ b/src/crf_context.c @@ -41,7 +41,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { if (context->flag & CRF_CONTEXT_MARGINALS) { #if defined(INTEL_SSE) || defined(ARM_NEON) - context->exp_state = double_matrix_new_aligned(T, L, 16); + context->exp_state = double_matrix_new_aligned(T, L, 32); if (context->exp_state == NULL) goto exit_context_created; double_matrix_zero(context->exp_state); #else @@ -53,7 +53,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { if (context->mexp_state == NULL) goto exit_context_created; #if defined(INTEL_SSE) || defined(ARM_NEON) - context->exp_state_trans = double_matrix_new_aligned(T, L * L, 16); + context->exp_state_trans = double_matrix_new_aligned(T, L * L, 32); if (context->exp_state_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_state_trans); #else @@ -65,7 +65,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { if (context->mexp_state_trans == NULL) goto exit_context_created; #if defined(INTEL_SSE) || defined(ARM_NEON) - context->exp_trans = double_matrix_new_aligned(L, L, 16); + context->exp_trans = double_matrix_new_aligned(L, L, 32); if (context->exp_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_trans); #else @@ -131,13 +131,13 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) { if (self->flag & CRF_CONTEXT_MARGINALS && ( #if defined(INTEL_SSE) || defined(ARM_NEON) - !double_matrix_resize_aligned(self->exp_state, T, L, 16) || + !double_matrix_resize_aligned(self->exp_state, T, L, 32) || #else !double_matrix_resize(self->exp_state, T, L) || #endif !double_matrix_resize(self->mexp_state, T, L) || #if defined(INTEL_SSE) || defined(ARM_NEON) - !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 16) || + !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 32) || #else !double_matrix_resize(self->exp_state_trans, T, L * L) || #endif From 59325c3b13cb941aa0938af22a02294093de728d Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:16:22 -0400 Subject: [PATCH 17/24] [test] testing with sse2 disabled to see if the build is working generally --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea9dca07..b9b0b0c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh - ./configure --datadir=$LIBPOSTAL_DATA_DIR + ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 make - name: Test run: make check From d979fbb779bfa3236999107d91afb2aa8c5a10c5 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:28:49 -0400 Subject: [PATCH 18/24] [test] trying make check in the same step, to see if that makes a difference --- .github/workflows/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9b0b0c1..141f37da 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,12 +25,11 @@ jobs: run: | brew update brew install curl autoconf automake libtool pkg-config - - name: Build + - name: Build and Test env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 make - - name: Test - run: make check + make check From c76d020c18ce03b0e35f0dc281038a1e8abdcd86 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:36:23 -0400 Subject: [PATCH 19/24] [fix] same result running test as a separate step --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 141f37da..b9b0b0c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,11 +25,12 @@ jobs: run: | brew update brew install curl autoconf automake libtool pkg-config - - name: Build and Test + - name: Build env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 make - make check + - name: Test + run: make check From 57eaa414ceadb48d5922099eeaa446b02894a2e4 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:49:02 -0400 Subject: [PATCH 20/24] [revert] reverting the commits from #578, leaving header file in repo for the moment --- README.md | 2 ++ configure.ac | 53 +++++++------------------------------------- src/crf_context.c | 16 ++++++------- src/vector_math.h | 6 ++--- test/Makefile.am | 2 +- windows/configure.ac | 53 +++++++------------------------------------- 6 files changed, 29 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index d8e2cb9c..3d641717 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,8 @@ brew install curl autoconf automake libtool pkg-config Then to install the C library: +If you're using an M1 Mac, add `--disable-sse2` to the `./configure` command. This will result in poorer performance but the build will succeed. + ``` git clone https://github.com/openvenues/libpostal cd libpostal diff --git a/configure.ac b/configure.ac index ed997e32..b7339215 100644 --- a/configure.ac +++ b/configure.ac @@ -84,57 +84,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) # ------------------------------------------------------------------ -# Architecture-specific options +# Checks for SSE2 build # ------------------------------------------------------------------ -# allow enabling hardware optimization on any system: -case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - enable_intel_sse=no - AC_DEFINE([ARM_NEON], [1], - [Enable ARM_NEON optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - enable_arm_neon=no - AC_DEFINE([INTEL_SSE], [1], - [Enable Intel SSE optimizations]) - ;; -esac - -AC_ARG_ENABLE([neon], - AS_HELP_STRING([[[--disable-neon]]], - [Disable ARM NEON hardware optimizations]), - [ - enable_arm_neon=no - AC_DEFINE([ARM_NEON], [0], - [Disable ARM_NEON optimizations]) - ]) - AC_ARG_ENABLE([sse2], - AS_HELP_STRING([[[--disable-sse2]]], - [Disable Intel SSE2 hardware optimizations]), - [ - enable_intel_sse=no - AC_DEFINE([INTEL_SSE], [0], - [Disable INTEL_SSE optimizations]) - ]) + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) -SIMDFLAGS="" - -AS_IF([test "x$enable_intel_sse" != "xno"], [ - SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE" +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) -AS_IF([test "x$enable_arm_neon" != "xno"], [ - SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON" -]) - -CFLAGS="${SIMDFLAGS} ${CFLAGS}" - -AC_SUBST([SIMDFLAGS], [$SIMDFLAGS]) - AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) AC_ARG_ENABLE([data-download], diff --git a/src/crf_context.c b/src/crf_context.c index e8635e28..8e1a759e 100644 --- a/src/crf_context.c +++ b/src/crf_context.c @@ -40,7 +40,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { } if (context->flag & CRF_CONTEXT_MARGINALS) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_state = double_matrix_new_aligned(T, L, 32); if (context->exp_state == NULL) goto exit_context_created; double_matrix_zero(context->exp_state); @@ -52,7 +52,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { context->mexp_state = double_matrix_new_zeros(T, L); if (context->mexp_state == NULL) goto exit_context_created; -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_state_trans = double_matrix_new_aligned(T, L * L, 32); if (context->exp_state_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_state_trans); @@ -64,7 +64,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { context->mexp_state_trans = double_matrix_new_zeros(T, L * L); if (context->mexp_state_trans == NULL) goto exit_context_created; -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_trans = double_matrix_new_aligned(L, L, 32); if (context->exp_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_trans); @@ -130,13 +130,13 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) { if (self->flag & CRF_CONTEXT_MARGINALS && ( -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) !double_matrix_resize_aligned(self->exp_state, T, L, 32) || #else !double_matrix_resize(self->exp_state, T, L) || #endif !double_matrix_resize(self->mexp_state, T, L) || -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 32) || #else !double_matrix_resize(self->exp_state_trans, T, L * L) || @@ -184,7 +184,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_state != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_state); #else double_matrix_destroy(self->exp_state); @@ -200,7 +200,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_state_trans != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_state_trans); #else double_matrix_destroy(self->exp_state_trans); @@ -216,7 +216,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_trans != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_trans); #else double_matrix_destroy(self->exp_trans); diff --git a/src/vector_math.h b/src/vector_math.h index 7dbdb049..eff90466 100644 --- a/src/vector_math.h +++ b/src/vector_math.h @@ -8,10 +8,8 @@ #define ks_lt_index(a, b) ((a).value < (b).value) -#if defined(INTEL_SSE) +#if defined(USE_SSE) #include -#elif defined(ARM_NEON) -#include "sse2neon.h" #endif /* @@ -340,7 +338,7 @@ -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) /* From https://github.com/herumi/fmath/blob/master/fastexp.cpp diff --git a/test/Makefile.am b/test/Makefile.am index 5289e3c2..f2e911f2 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -5,7 +5,7 @@ CFLAGS_O2 = $(CFLAGS_BASE) -O2 CFLAGS_O3 = $(CFLAGS_BASE) -O3 DEFAULT_INCLUDES = -I.. -I/usr/local/include -CFLAGS = $(SIMDFLAGS) $(CFLAGS_BASE) +CFLAGS = $(CFLAGS_BASE) TESTS = test_libpostal noinst_PROGRAMS = test_libpostal diff --git a/windows/configure.ac b/windows/configure.ac index d19cd967..24e73fec 100644 --- a/windows/configure.ac +++ b/windows/configure.ac @@ -73,57 +73,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) # ------------------------------------------------------------------ -# Architecture-specific options +# Checks for SSE2 build # ------------------------------------------------------------------ -# allow enabling hardware optimization on any system: -case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - enable_intel_sse=no - AC_DEFINE([ARM_NEON], [1], - [Enable ARM_NEON optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - enable_arm_neon=no - AC_DEFINE([INTEL_SSE], [1], - [Enable Intel SSE optimizations]) - ;; -esac - -AC_ARG_ENABLE([neon], - AS_HELP_STRING([[[--disable-neon]]], - [Disable ARM NEON hardware optimizations]), - [ - enable_arm_neon=no - AC_DEFINE([ARM_NEON], [0], - [Disable ARM_NEON optimizations]) - ]) - AC_ARG_ENABLE([sse2], - AS_HELP_STRING([[[--disable-sse2]]], - [Disable Intel SSE2 hardware optimizations]), - [ - enable_intel_sse=no - AC_DEFINE([INTEL_SSE], [0], - [Disable INTEL_SSE optimizations]) - ]) + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) -SIMDFLAGS="" - -AS_IF([test "x$enable_intel_sse" != "xno"], [ - SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE" +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) -AS_IF([test "x$enable_arm_neon" != "xno"], [ - SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON" -]) - -CFLAGS="${SIMDFLAGS} ${CFLAGS}" - -AC_SUBST([SIMDFLAGS], [$SIMDFLAGS]) - AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) AC_ARG_ENABLE([data-download], From 5a1f6df5a90aaad0da81fec20a1f0c11d869a438 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 02:05:17 -0400 Subject: [PATCH 21/24] [sse] ok something about that PR breaks either way. Now trying it with SSE --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9b0b0c1..ea9dca07 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh - ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 + ./configure --datadir=$LIBPOSTAL_DATA_DIR make - name: Test run: make check From 7bdcf96c9d9c61811ffd4570ba9fbbac5ffd237f Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 16:00:55 -0400 Subject: [PATCH 22/24] [memalign] no more realloc on aligned pointers, just do an aligned malloc and copy to it. Slower but safe and this is not called that often in practice, usually to resize larger matrices. --- src/vector.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/vector.h b/src/vector.h index 78a0fad4..462a8baf 100644 --- a/src/vector.h +++ b/src/vector.h @@ -21,27 +21,19 @@ static inline void *_aligned_realloc(void *p, size_t size, size_t alignment) return NULL; } - if (size == 0) { + if (p == NULL) { return NULL; } - void *rp = realloc(p, size); - - /* If realloc result is not already at an aligned boundary, - _aligned_malloc a new block and copy the contents of the realloc'd - pointer to the aligned block, free the realloc'd pointer and return - the aligned pointer. - */ - if ( ((size_t)rp & (alignment - 1)) != 0) { - void *p1 = _aligned_malloc(size, alignment); - if (p1 != NULL) { - memcpy(p1, rp, size); - } - free(rp); - rp = p1; + void *p1 = _aligned_malloc(size, alignment); + if (p1 == NULL) { + free(p); + return NULL; } - return rp; + memcpy(p1, p, size); + free(p); + return p1; } static inline void _aligned_free(void *p) { From e4982b733fc864ac940918c437c7fa83223edc77 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 18 Aug 2023 13:39:36 -0400 Subject: [PATCH 23/24] [fix] memcpy in aligned vector/matrix resize needs to know the original size. Since this is an internal static function and does not affect client APIs, changing it to aligned_malloc, aligned_free, and aligned_resize, which takes the original pointer size as well as the new. On Windows it simply passes the pointer, new size, and alignment through _aligned_realloc, whereas on other platforms only the aligned_malloc is for new_size bytes and uses old_size bytes for memcpy --- src/matrix.h | 8 ++++---- src/vector.h | 27 ++++++++++++++++++--------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/matrix.h b/src/matrix.h index f6a31f1a..05b43db2 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -33,7 +33,7 @@ typedef enum { } name##_t; \ \ static name##_t *name##_new(size_t m, size_t n) { \ - name##_t *matrix = malloc(sizeof(name##_t)); \ + name##_t *matrix = malloc(sizeof(name##_t)); \ \ if (matrix == NULL) { \ return NULL; \ @@ -62,7 +62,7 @@ typedef enum { matrix->m = m; \ matrix->n = n; \ \ - matrix->values = _aligned_malloc(sizeof(type) * m * n, alignment); \ + matrix->values = aligned_malloc(sizeof(type) * m * n, alignment); \ if (matrix->values == NULL) { \ free(matrix); \ return NULL; \ @@ -86,7 +86,7 @@ typedef enum { if (self == NULL) return; \ \ if (self->values != NULL) { \ - _aligned_free(self->values); \ + aligned_free(self->values); \ } \ \ free(self); \ @@ -118,7 +118,7 @@ typedef enum { if (self == NULL) return false; \ \ if (m * n > (self->m * self->n)) { \ - type *ptr = _aligned_realloc(self->values, sizeof(type) * m * n, alignment); \ + type *ptr = aligned_resize(self->values, sizeof(type) * self->m * self->n, sizeof(type) * m * n, alignment); \ if (ptr == NULL) { \ return false; \ } \ diff --git a/src/vector.h b/src/vector.h index 462a8baf..562ed6cb 100644 --- a/src/vector.h +++ b/src/vector.h @@ -7,15 +7,24 @@ #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #include +static inline void *aligned_alloc(size_t size, size_t alignment) { + return _aligned_malloc(size, alignment); +} +static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) { + return _aligned_realloc(p, new_size, alignment); +} +static inline void aligned_free(void *p) { + _aligned_free(p); +} #else #include -static inline void *_aligned_malloc(size_t size, size_t alignment) +static inline void *aligned_malloc(size_t size, size_t alignment) { void *p; int ret = posix_memalign(&p, alignment, size); return (ret == 0) ? p : NULL; } -static inline void *_aligned_realloc(void *p, size_t size, size_t alignment) +static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) { if ((alignment == 0) || ((alignment & (alignment - 1)) != 0) || (alignment < sizeof(void *))) { return NULL; @@ -25,17 +34,17 @@ static inline void *_aligned_realloc(void *p, size_t size, size_t alignment) return NULL; } - void *p1 = _aligned_malloc(size, alignment); + void *p1 = aligned_malloc(new_size, alignment); if (p1 == NULL) { free(p); return NULL; } - memcpy(p1, p, size); + memcpy(p1, p, old_size); free(p); return p1; } -static inline void _aligned_free(void *p) +static inline void aligned_free(void *p) { free(p); } @@ -71,7 +80,7 @@ static inline void _aligned_free(void *p) name *array = malloc(sizeof(name)); \ if (array == NULL) return NULL; \ array->n = array->m = 0; \ - array->a = _aligned_malloc(size * sizeof(type), alignment); \ + array->a = aligned_malloc(size * sizeof(type), alignment); \ if (array->a == NULL) return NULL; \ array->m = size; \ return array; \ @@ -86,7 +95,7 @@ static inline void _aligned_free(void *p) } \ static inline bool name##_resize_aligned(name *array, size_t size, size_t alignment) { \ if (size <= array->m) return true; \ - type *ptr = _aligned_realloc(array->a, sizeof(type) * size, alignment); \ + type *ptr = aligned_resize(array->a, sizeof(type) * array->m, sizeof(type) * size, alignment); \ if (ptr == NULL) return false; \ array->a = ptr; \ array->m = size; \ @@ -152,7 +161,7 @@ static inline void _aligned_free(void *p) } \ static inline void name##_destroy_aligned(name *array) { \ if (array == NULL) return; \ - if (array->a != NULL) _aligned_free(array->a); \ + if (array->a != NULL) aligned_free(array->a); \ free(array); \ } @@ -174,7 +183,7 @@ static inline void _aligned_free(void *p) free_func(array->a[i]); \ } \ } \ - _aligned_free(array->a); \ + aligned_free(array->a); \ free(array); \ } From 330bd2e158846a52d04ed040503cba4e1087d82e Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 18 Aug 2023 22:37:50 -0400 Subject: [PATCH 24/24] [fix] Windows name --- src/vector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vector.h b/src/vector.h index 562ed6cb..52b8b8d0 100644 --- a/src/vector.h +++ b/src/vector.h @@ -7,7 +7,7 @@ #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #include -static inline void *aligned_alloc(size_t size, size_t alignment) { +static inline void *aligned_malloc(size_t size, size_t alignment) { return _aligned_malloc(size, alignment); } static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) {