[parser] uint64_t chunk size, no warning if gshuf is available
This commit is contained in:
@@ -1,3 +1,5 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "address_parser.h"
|
#include "address_parser.h"
|
||||||
#include "address_parser_io.h"
|
#include "address_parser_io.h"
|
||||||
#include "address_dictionary.h"
|
#include "address_dictionary.h"
|
||||||
@@ -20,9 +22,9 @@ KHASH_MAP_INIT_STR(phrase_stats, phrase_stats_t)
|
|||||||
KHASH_MAP_INIT_STR(postal_code_context_phrases, khash_t(str_set) *)
|
KHASH_MAP_INIT_STR(postal_code_context_phrases, khash_t(str_set) *)
|
||||||
KHASH_MAP_INIT_STR(phrase_types, address_parser_types_t)
|
KHASH_MAP_INIT_STR(phrase_types, address_parser_types_t)
|
||||||
|
|
||||||
#define CHUNK_SIZE_MB 1024 * 1024
|
#define CHUNK_SIZE_MB UINT64_C(1024) * UINT64_C(1024)
|
||||||
#define CHUNK_SIZE_GB 1024 * (CHUNK_SIZE_MB)
|
#define CHUNK_SIZE_GB UINT64_C(1024) * (CHUNK_SIZE_MB)
|
||||||
#define DEFAULT_SHUFFLE_CHUNK_SIZE 2 * (CHUNK_SIZE_GB)
|
#define DEFAULT_SHUFFLE_CHUNK_SIZE UINT64_C(2) * (CHUNK_SIZE_GB)
|
||||||
|
|
||||||
// Training
|
// Training
|
||||||
|
|
||||||
@@ -1053,7 +1055,7 @@ int main(int argc, char **argv) {
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(HAVE_SHUF)
|
#if !defined(HAVE_SHUF) && !defined(HAVE_GSHUF)
|
||||||
log_warn("shuf must be installed to train address parser effectively. If this is a production machine, please install shuf. No shuffling will be performed.\n");
|
log_warn("shuf must be installed to train address parser effectively. If this is a production machine, please install shuf. No shuffling will be performed.\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user