diff --git a/configure.ac b/configure.ac index f740be12..4363c0fa 100644 --- a/configure.ac +++ b/configure.ac @@ -60,6 +60,17 @@ AC_SUBST([LIBPOSTAL_DATA_FILE_LATEST_VERSION], [$DATA_FILE_LATEST_VERSION]) AC_SUBST([LIBPOSTAL_PARSER_MODEL_LATEST_VERSION], [$PARSER_MODEL_LATEST_VERSION]) AC_SUBST([LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION], [$LANG_CLASS_MODEL_LATEST_VERSION]) +# Senzing data +AC_SUBST([LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING], [v1]) + +SENZING_DATA_FILE_LATEST_VERSION=$(cat $srcdir/versions/senzing/base_data) +SENZING_PARSER_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/parser) +SENZING_LANG_CLASS_MODEL_LATEST_VERSION=$(cat $srcdir/versions/senzing/language_classifier) + +AC_SUBST([LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION], [$SENZING_DATA_FILE_LATEST_VERSION]) +AC_SUBST([LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION], [$SENZING_PARSER_MODEL_LATEST_VERSION]) +AC_SUBST([LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION], [$SENZING_LANG_CLASS_MODEL_LATEST_VERSION]) + AC_CONFIG_FILES([Makefile libpostal.pc src/Makefile @@ -134,6 +145,13 @@ AC_ARG_ENABLE([data-download], *) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;; esac], [DOWNLOAD_DATA=true]) +AC_ARG_ENABLE([senzing-datamodel], + AS_HELP_STRING([[[--enable-senzing-datamodel]]], + [Use Senzing data model in lieu of the default one]), + [ + DATAMODEL="senzing" + AC_SUBST([LIBPOSTAL_DATA_MODEL], [$DATAMODEL]) + ]) AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"]) diff --git a/src/libpostal_data.in b/src/libpostal_data.in index 0a3d27f2..6b0c04e9 100755 --- a/src/libpostal_data.in +++ b/src/libpostal_data.in @@ -14,6 +14,8 @@ LIBPOSTAL_DATA_DIR=$3 MB=$((1024*1024)) CHUNK_SIZE=$((64*$MB)) +DATAMODEL="@LIBPOSTAL_DATA_MODEL@" + # Not loving this approach but there appears to be no way to query the size # of a release asset without using the Github API LIBPOSTAL_DATA_FILE_CHUNKS=1 @@ -34,6 +36,20 @@ LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz" LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download" +if [ $DATAMODEL == "senzing" ]; then + LIBPOSTAL_DATA_FILE_CHUNKS=1 + LIBPOSTAL_PARSER_MODEL_CHUNKS=1 + LIBPOSTAL_LANG_CLASS_MODEL_CHUNKS=1 + + LIBPOSTAL_DATA_DIR_VERSION_STRING="@LIBPOSTAL_SENZING_DATA_DIR_VERSION_STRING@" + + LIBPOSTAL_DATA_FILE_LATEST_VERSION="@LIBPOSTAL_SENZING_DATA_FILE_LATEST_VERSION@" + LIBPOSTAL_PARSER_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_PARSER_MODEL_LATEST_VERSION@" + LIBPOSTAL_LANG_CLASS_MODEL_LATEST_VERSION="@LIBPOSTAL_SENZING_LANG_CLASS_MODEL_LATEST_VERSION@" + + LIBPOSTAL_BASE_URL="https://public-read-libpostal-data.s3.amazonaws.com" +fi + LIBPOSTAL_DATA_VERSION_FILE=$LIBPOSTAL_DATA_DIR/data_version LIBPOSTAL_DATA_DIR_VERSION= diff --git a/versions/senzing/base_data b/versions/senzing/base_data new file mode 100644 index 00000000..60453e69 --- /dev/null +++ b/versions/senzing/base_data @@ -0,0 +1 @@ +v1.0.0 \ No newline at end of file diff --git a/versions/senzing/language_classifier b/versions/senzing/language_classifier new file mode 100644 index 00000000..60453e69 --- /dev/null +++ b/versions/senzing/language_classifier @@ -0,0 +1 @@ +v1.0.0 \ No newline at end of file diff --git a/versions/senzing/parser b/versions/senzing/parser new file mode 100644 index 00000000..0ec25f75 --- /dev/null +++ b/versions/senzing/parser @@ -0,0 +1 @@ +v1.0.0