[parser] gshuf (Mac equivalent of shuf) is quite a bit slower than shuf, so removing it. Need to train on Linux unless a better alternative is found for shuffling large files on Mac
This commit is contained in:
@@ -52,10 +52,8 @@ AC_CONFIG_FILES([Makefile
|
|||||||
src/sparkey/Makefile])
|
src/sparkey/Makefile])
|
||||||
|
|
||||||
AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes])
|
AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes])
|
||||||
AC_CHECK_PROG([FOUND_GSHUF], [gshuf], [yes])
|
|
||||||
|
|
||||||
AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])])
|
AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])])
|
||||||
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
|
|
||||||
|
|
||||||
LIBPOSTAL_DATA_DIR=$datadir/libpostal
|
LIBPOSTAL_DATA_DIR=$datadir/libpostal
|
||||||
AC_DEFINE_UNQUOTED([LIBPOSTAL_DATA_DIR], ["$LIBPOSTAL_DATA_DIR"], [Data directory for libpostsal])
|
AC_DEFINE_UNQUOTED([LIBPOSTAL_DATA_DIR], ["$LIBPOSTAL_DATA_DIR"], [Data directory for libpostsal])
|
||||||
|
|||||||
@@ -216,17 +216,18 @@ bool address_parser_train(address_parser_t *self, char *filename, uint32_t num_i
|
|||||||
|
|
||||||
trainer->iterations = iter;
|
trainer->iterations = iter;
|
||||||
|
|
||||||
log_debug("Shuffling\n");
|
#if defined(HAVE_SHUF)
|
||||||
|
log_info("Shuffling\n");
|
||||||
|
|
||||||
/*
|
|
||||||
if (!shuffle_file(filename)) {
|
if (!shuffle_file(filename)) {
|
||||||
log_error("Error in shuffle\n");
|
log_error("Error in shuffle\n");
|
||||||
averaged_perceptron_trainer_destroy(trainer);
|
averaged_perceptron_trainer_destroy(trainer);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("Shuffle complete\n");
|
log_info("Shuffle complete\n");
|
||||||
*/
|
#endif
|
||||||
|
|
||||||
if (!address_parser_train_epoch(self, trainer, filename)) {
|
if (!address_parser_train_epoch(self, trainer, filename)) {
|
||||||
log_error("Error in epoch\n");
|
log_error("Error in epoch\n");
|
||||||
averaged_perceptron_trainer_destroy(trainer);
|
averaged_perceptron_trainer_destroy(trainer);
|
||||||
@@ -249,9 +250,8 @@ int main(int argc, char **argv) {
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(HAVE_SHUF) && !defined(HAVE_GSHUF)
|
#if !defined(HAVE_SHUF)
|
||||||
log_error("shuf or gshuf must be installed to train address parser. Please install and reconfigure libpostal\n");
|
log_warn("shuf must be installed to train address parser effectively. If this is a production machine, please install shuf. No shuffling will be performed.\n");
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char *filename = argv[1];
|
char *filename = argv[1];
|
||||||
|
|||||||
Reference in New Issue
Block a user