Enable downloading of Senzing data model.

This commit is contained in:
Oskar Thorbjornsson
2023-02-12 17:58:36 -08:00
parent cb80555b24
commit ec9e0e341f
5 changed files with 37 additions and 0 deletions

View File

@@ -60,6 +60,17 @@ AC_SUBST([LIBPOSTAL_DATA_FILE_LATEST_VERSION], [$DATA_FILE_LATEST_VERSION])
AC_SUBST([LIBPOSTAL_PARSER_MODEL_LATEST_VERSION], [$PARSER_MODEL_LATEST_VERSION]) AC_SUBST([LIBPOSTAL_PARSER_MODEL_LATEST_VERSION], [$PARSER_MODEL_LATEST_VERSION])
AC_SUBST([LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION], [$LANG_CLASS_MODEL_LATEST_VERSION]) AC_SUBST([LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION], [$LANG_CLASS_MODEL_LATEST_VERSION])
# Senzing data
AC_SUBST([LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING], [v1])
SENZING_DATA_FILE_LATEST_VERSION=$(cat $srcdir/versions/senzing/base_data)
SENZING_PARSER_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/parser)
SENZING_LANG_CLASS_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/language_classifier)
AC_SUBST([LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION], [$SENZING_DATA_FILE_LATEST_VERSION])
AC_SUBST([LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION], [$SENZING_PARSER_MODEL_LATEST_VERSION])
AC_SUBST([LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION], [$SENZING_LANG_CLASS_MODEL_LATEST_VERSION])
AC_CONFIG_FILES([Makefile AC_CONFIG_FILES([Makefile
libpostal.pc libpostal.pc
src/Makefile src/Makefile
@@ -134,6 +145,13 @@ AC_ARG_ENABLE([data-download],
*) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;; *) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;;
esac], [DOWNLOAD_DATA=true]) esac], [DOWNLOAD_DATA=true])
AC_ARG_ENABLE([senzing-datamodel],
AS_HELP_STRING([[[--enable-senzing-datamodel]]],
[Use Senzing data model in lieu of the default one]),
[
DATAMODEL="senzing"
AC_SUBST([LIBPOSTAL_DATA_MODEL], [$DATAMODEL])
])
AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"])

View File

@@ -14,6 +14,8 @@ LIBPOSTAL_DATA_DIR=$3
MB=$((1024*1024)) MB=$((1024*1024))
CHUNK_SIZE=$((64*$MB)) CHUNK_SIZE=$((64*$MB))
DATAMODEL="@LIBPOSTAL_DATA_MODEL@"
# Not loving this approach but there appears to be no way to query the size # Not loving this approach but there appears to be no way to query the size
# of a release asset without using the Github API # of a release asset without using the Github API
LIBPOSTAL_DATA_FILE_CHUNKS=1 LIBPOSTAL_DATA_FILE_CHUNKS=1
@@ -34,6 +36,20 @@ LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz"
LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download" LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download"
if [ $DATAMODEL == "senzing" ]; then
LIBPOSTAL_DATA_FILE_CHUNKS=1
LIBPOSTAL_PARSER_MODEL_CHUNKS=1
LIBPOSTAL_LANG_CLASS_MODEL_CHUNKS=1
LIBPOSTAL_DATA_DIR_VERSION_STRING="@LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING@"
LIBPOSTAL_DATA_FILE_LATEST_VERSION="@LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION@"
LIBPOSTAL_PARSER_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION@"
LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION@"
LIBPOSTAL_BASE_URL="https://public-read-libpostal-data.s3.amazonaws.com"
fi
LIBPOSTAL_DATA_VERSION_FILE=$LIBPOSTAL_DATA_DIR/data_version LIBPOSTAL_DATA_VERSION_FILE=$LIBPOSTAL_DATA_DIR/data_version
LIBPOSTAL_DATA_DIR_VERSION= LIBPOSTAL_DATA_DIR_VERSION=

View File

@@ -0,0 +1 @@
v1.0.0

View File

@@ -0,0 +1 @@
v1.0.0

1
versions/senzing/parser Normal file
View File

@@ -0,0 +1 @@
v1.0.0