[parser] gshuf (Mac equivalent of shuf) is quite a bit slower than shuf, so removing it. Need to train on Linux unless a better alternative is found for shuffling large files on Mac
This commit is contained in:
@@ -52,10 +52,8 @@ AC_CONFIG_FILES([Makefile
|
||||
src/sparkey/Makefile])
|
||||
|
||||
AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes])
|
||||
AC_CHECK_PROG([FOUND_GSHUF], [gshuf], [yes])
|
||||
|
||||
AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])])
|
||||
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
|
||||
|
||||
LIBPOSTAL_DATA_DIR=$datadir/libpostal
|
||||
AC_DEFINE_UNQUOTED([LIBPOSTAL_DATA_DIR], ["$LIBPOSTAL_DATA_DIR"], [Data directory for libpostsal])
|
||||
|
||||
@@ -216,17 +216,18 @@ bool address_parser_train(address_parser_t *self, char *filename, uint32_t num_i
|
||||
|
||||
trainer->iterations = iter;
|
||||
|
||||
log_debug("Shuffling\n");
|
||||
#if defined(HAVE_SHUF)
|
||||
log_info("Shuffling\n");
|
||||
|
||||
/*
|
||||
if (!shuffle_file(filename)) {
|
||||
log_error("Error in shuffle\n");
|
||||
averaged_perceptron_trainer_destroy(trainer);
|
||||
return false;
|
||||
}
|
||||
|
||||
log_debug("Shuffle complete\n");
|
||||
*/
|
||||
log_info("Shuffle complete\n");
|
||||
#endif
|
||||
|
||||
if (!address_parser_train_epoch(self, trainer, filename)) {
|
||||
log_error("Error in epoch\n");
|
||||
averaged_perceptron_trainer_destroy(trainer);
|
||||
@@ -249,9 +250,8 @@ int main(int argc, char **argv) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#if !defined(HAVE_SHUF) && !defined(HAVE_GSHUF)
|
||||
log_error("shuf or gshuf must be installed to train address parser. Please install and reconfigure libpostal\n");
|
||||
exit(EXIT_FAILURE);
|
||||
#if !defined(HAVE_SHUF)
|
||||
log_warn("shuf must be installed to train address parser effectively. If this is a production machine, please install shuf. No shuffling will be performed.\n");
|
||||
#endif
|
||||
|
||||
char *filename = argv[1];
|
||||
|
||||
Reference in New Issue
Block a user