Files
libpostal/src/sparkey/sparkey.h
2015-07-09 15:26:11 -04:00

674 lines
27 KiB
C

/*
* Copyright (c) 2012-2013 Spotify AB
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
#ifndef SPOTIFY_SPARKEY_H_INCLUDED
#define SPOTIFY_SPARKEY_H_INCLUDED
/**
* \mainpage Sparkey C API
* \section intro_sec Getting started
*
* For a complete listing of available functions, see sparkey.h .
*
* \section logwriter Writing to a log file
*
* This section contains all functions relevant for writing entries to a log.
* Writing to the same log file is not thread safe. Only use the writer objects
* from one thread at a time, and make sure to only write to a file from one process.
* The library will not do any form of locking or checking for other writers, so be
* careful.
*
* Basic workflow:
* - Create and initialize the logwriter:
* \code
* sparkey_logwriter *mywriter;
* sparkey_returncode returncode = sparkey_logwriter_create(&mywriter, "mylog.spl", SPARKEY_COMPRESSION_NONE, 0);
* // TODO: check the returncode
* \endcode
* - Write to the log:
* \code
* const char *mykey = "mykey";
* const char *myvalue = "this is my value";
* sparkey_returncode returncode = sparkey_logwriter_put(mywriter, strlen(mykey), (uint8_t*)mykey, strlen(myvalue), (uint8_t*)myvalue);
* // TODO: check the returncode
* \endcode
* - Close it when you're done:
* \code
* sparkey_returncode returncode = sparkey_logwriter_close(&mywriter);
* // TODO: check the returncode
* \endcode
*
* \section logreader Reading from a log file
*
* This section contains all functions relevant for reading entries from a log.
* A sparkey_logreader may be shared between multiple threads, but \ref sparkey_logreader_open
* and \ref sparkey_logreader_close are not thread safe.
*
* The logreader is not useful by itself. You also need a sparkey_logiter to iterate through the entries.
* This is a highly mutable struct and should not be shared between threads. It is not threadsafe.
*
* Here is a basic workflow for iterating through all entries in a logfile:
* - Create a logreader
* \code
* sparkey_logreader *myreader;
* sparkey_returncode returncode = sparkey_logreader_open(&myreader, "mylog.spl");
* \endcode
* - Create a logiter
* \code
* sparkey_logiter *myiter;
* sparkey_returncode returncode = sparkey_logiter_create(&myiter, myreader);
* \endcode
* - Perform the iteration:
* \code
* while (1) {
* sparkey_returncode returncode = sparkey_logiter_next(myiter, myreader);
* // TODO: check the returncode
* if (sparkey_logiter_state(myiter) != SPARKEY_ITER_ACTIVE) {
* break;
* }
* uint64_t wanted_keylen = sparkey_logiter_keylen(myiter);
* uint8_t *keybuf = malloc(wanted_keylen);
* uint64_t actual_keylen;
* returncode = sparkey_logiter_fill_key(myiter, myreader, wanted_keylen, keybuf, &actual_keylen);
* // TODO: check the returncode
* // TODO: assert actual_keylen == wanted_keylen
* uint64_t wanted_valuelen = sparkey_logiter_valuelen(myiter);
* uint8_t *valuebuf = malloc(wanted_valuelen);
* uint64_t actual_valuelen;
* returncode = sparkey_logiter_fill_value(myiter, myreader, wanted_valuelen, valuebuf, &actual_valuelen);
* // TODO: check the returncode
* // TODO: assert actual_valuelen == wanted_valuelen
* // Do stuff with key and value
* free(keybuf);
* free(valuebuf);
* }
* \endcode
* Note that you have to allocate memory for the key and value manually - Sparkey does not allocate memory except for when
* creating readers, writers and iterators.
*
* - Alternatively, you can preallocate the buffers by using
* max_key_len and max_value_len provided by the log header:
* \code
* uint8_t *keybuf = malloc(sparkey_logreader_maxkeylen(sparkey_hash_getreader(myreader)));
* uint8_t *valuebuf = malloc(sparkey_logreader_maxvaluelen(sparkey_hash_getreader(myreader)));
* while (1) {
* sparkey_returncode returncode = sparkey_logiter_next(&myiter, &myreader);
* // TODO: check the returncode
* if (sparkey_logiter_state(myiter) != SPARKEY_ITER_ACTIVE) {
* break;
* }
* uint64_t wanted_keylen = sparkey_logiter_keylen(myiter);
* uint64_t actual_keylen;
* returncode = sparkey_logiter_fill_key(&myiter, &myreader, wanted_keylen, keybuf, &actual_keylen);
* // TODO: check the returncode
* // TODO: assert actual_keylen == wanted_keylen
* uint64_t wanted_valuelen = sparkey_logiter_valuelen(myiter);
* uint64_t actual_valuelen;
* returncode = sparkey_logiter_fill_value(&myiter, &myreader, wanted_valuelen, valuebuf, &actual_valuelen);
* // TODO: check the returncode
* // TODO: assert actual_valuelen == wanted_valuelen
* // Do stuff with key and value
* }
* free(keybuf);
* free(valuebuf);
* \endcode
* - You can also skip allocating at all, if you can process the key and/or value in chunks. Here's an example for processing a key in chunks,
* but the same can be applied for values:
* \code
* uint64_t total_len = sparkey_logiter_keylen(myiter);
* while (total_len > 0) {
* uint8_t *buf;
* uint64_t len;
* sparkey_returncode returncode = sparkey_logiter_keychunk(&myiter, &myreader, total_len, &buf, &len);
* // TODO: check the returncode
* // Example: use the chunks to write to standard out
* fwrite(buf, 1, len, stdout);
* total_len -= len;
* }
* \endcode
* - Close everything when you're done:
* \code
* sparkey_logreader_close(&myreader);
* sparkey_logiter_close(&myiter);
* \endcode
*
* \section hashwriter Creating hash files from log files
*
* This header only contains the function sparkey_hash_write which creates a hash file.
*
* This is all you need to do to create a hash file based on an existing log file:
* \code
* sparkey_returncode returncode = sparkey_hash_write("mylog.spi", "mylog.spl", 0);
* // TODO: check the returncode
* \endcode
*
* \section hashreader Reading from a hash-file/log-file pair
*
* This header contains all functions relevant for reading live key/value pairs from a log and hash file.
* The documentation is very similar to the one for reading from a log file, because this api is an extension.
* Random lookups is the only feature that's added, and iteration simply skips dead entries.
*
* A sparkey_hashreader may be shared between multiple threads, but \ref sparkey_hash_open
* and \ref sparkey_hash_close are not thread safe.
*
* The hashreader is not useful by itself. You also need a sparkey_logiter to do random lookups and
* iterate through the entries.
* This is a highly mutable struct and should not be shared between threads. It is not threadsafe.
*
* Here is a basic workflow for iterating through all live entries in a log and hash file:
* - Create a hashreader
* \code
* sparkey_hashreader *myreader;
* sparkey_returncode returncode = sparkey_hash_open(&myreader, "mylog.spi", "mylog.spl");
* // TODO: check the returncode
* \endcode
* \code
* sparkey_logiter *myiter;
* sparkey_returncode returncode = sparkey_logiter_create(&myiter, sparkey_hash_getreader(myreader));
* // TODO: check the returncode
* \endcode
* - Iteration is exactly as described previously, but uses \ref sparkey_logiter_hashnext instead of
* \ref sparkey_logiter_next.
* - Random lookup
* \code
* sparkey_returncode returncode = sparkey_hash_get(myreader, (uint8_t*)"mykey", 5, myiter);
* if (sparkey_logiter_state(myiter) != SPARKEY_ITER_ACTIVE) {
* // Entry not found;
* } else {
* // Extracting value is done the same as when iterating.
* uint64_t wanted_valuelen = sparkey_logiter_valuelen(myiter);
* uint8_t *valuebuf = malloc(wanted_valuelen);
* uint64_t actual_valuelen;
* returncode = sparkey_logiter_fill_value(myiter, sparkey_hash_getreader(myreader), wanted_valuelen, valuebuf, &actual_valuelen);
* }
* \endcode
* Note that this API allows you to do a random seek and then iterate through the following entries. This may be
* useful when you insert groups of entries in order and quickly want to access all of them.
* - Close everything when you're done:
* \code
* sparkey_hash_close(&myreader);
* sparkey_logiter_close(&myiter);
* \endcode
*/
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
SPARKEY_SUCCESS = 0,
SPARKEY_INTERNAL_ERROR = -1,
SPARKEY_FILE_NOT_FOUND = -100,
SPARKEY_PERMISSION_DENIED = -101,
SPARKEY_TOO_MANY_OPEN_FILES = -102,
SPARKEY_FILE_TOO_LARGE = -103,
SPARKEY_FILE_ALREADY_EXISTS = -104,
SPARKEY_FILE_BUSY = -105,
SPARKEY_FILE_IS_DIRECTORY = -106,
SPARKEY_FILE_SIZE_EXCEEDED = -107,
SPARKEY_FILE_CLOSED = -108,
SPARKEY_OUT_OF_DISK = -109,
SPARKEY_UNEXPECTED_EOF = -110,
SPARKEY_MMAP_FAILED = -111,
SPARKEY_WRONG_LOG_MAGIC_NUMBER = -200,
SPARKEY_WRONG_LOG_MAJOR_VERSION = -201,
SPARKEY_UNSUPPORTED_LOG_MINOR_VERSION = -202,
SPARKEY_LOG_TOO_SMALL = -203,
SPARKEY_LOG_CLOSED = -204,
SPARKEY_LOG_ITERATOR_INACTIVE = -205,
SPARKEY_LOG_ITERATOR_MISMATCH = -206,
SPARKEY_LOG_ITERATOR_CLOSED = -207,
SPARKEY_LOG_HEADER_CORRUPT = -208,
SPARKEY_INVALID_COMPRESSION_BLOCK_SIZE = -209,
SPARKEY_INVALID_COMPRESSION_TYPE = -210,
SPARKEY_WRONG_HASH_MAGIC_NUMBER = -300,
SPARKEY_WRONG_HASH_MAJOR_VERSION = -301,
SPARKEY_UNSUPPORTED_HASH_MINOR_VERSION = -302,
SPARKEY_HASH_TOO_SMALL = -303,
SPARKEY_HASH_CLOSED = -304,
SPARKEY_FILE_IDENTIFIER_MISMATCH = -305,
SPARKEY_HASH_HEADER_CORRUPT = -306,
SPARKEY_HASH_SIZE_INVALID = -307,
} sparkey_returncode;
/**
* Get a human readable string from a return code.
* @param code a return code
* @returns a string representing the return code.
*/
const char * sparkey_errstring(sparkey_returncode code);
/* logwriter */
/**
* A structure holding all the data necessary to add entries to a log file.
*/
struct sparkey_logwriter;
typedef struct sparkey_logwriter sparkey_logwriter;
typedef enum {
SPARKEY_COMPRESSION_NONE,
SPARKEY_COMPRESSION_SNAPPY
} sparkey_compression_type;
typedef enum {
SPARKEY_ENTRY_PUT,
SPARKEY_ENTRY_DELETE
} sparkey_entry_type;
typedef enum {
SPARKEY_ITER_NEW,
SPARKEY_ITER_ACTIVE,
SPARKEY_ITER_CLOSED,
SPARKEY_ITER_INVALID
} sparkey_iter_state;
struct sparkey_logreader;
typedef struct sparkey_logreader sparkey_logreader;
struct sparkey_logiter;
typedef struct sparkey_logiter sparkey_logiter;
struct sparkey_hashreader;
typedef struct sparkey_hashreader sparkey_hashreader;
/**
* Creates a new Sparkey log file, possibly overwriting an already existing.
* @param log a double reference to a sparkey_logwriter structure that gets allocated and initialized by this call.
* @param filename the file to create.
* @param compression_type NONE or SNAPPY, specifies if block compression should be used or not.
* @param compression_block_size is only relevant if compression type is not NONE.
* It represents the maximum number of bytes of an uncompressed block.
* @return SPARKEY_SUCCESS if all goes well.
*/
sparkey_returncode sparkey_logwriter_create(sparkey_logwriter **log, const char *filename, sparkey_compression_type compression_type, int compression_block_size);
/**
* Append to an existing Sparkey log file.
* @param log a double reference to a sparkey_logwriter structure that gets allocated and initialized by this call.
* @param filename the file to open for appending.
* It represents the maximum number of bytes of an uncompressed block.
* @return SPARKEY_SUCCESS if all goes well.
*/
sparkey_returncode sparkey_logwriter_append(sparkey_logwriter **log, const char *filename);
/**
* Append a key/value pair to the log file
* @param log a reference to an open log writer.
* @param keylen the number of bytes of the key data block
* @param key a pointer to a block of continuous data where the key can be found.
* Does not need to be NUL-terminated.
* @param valuelen the number of bytes of the value data block
* @param value a pointer to a block of continuous data where the value can be found.
* Does not need to be NUL-terminated.
* @return SPARKEY_SUCCESS if all goes well.
*/
sparkey_returncode sparkey_logwriter_put(sparkey_logwriter *log, uint64_t keylen, const uint8_t *key, uint64_t valuelen, const uint8_t *value);
/**
* Append a delete operation for a key to the log file
* @param log a reference to an open log writer.
* @param keylen the number of bytes of the key data block
* @param key a pointer to a block of continuous data where the key can be found.
* Does not need to be NUL-terminated.
* @return SPARKEY_SUCCESS if all goes well.
*/
sparkey_returncode sparkey_logwriter_delete(sparkey_logwriter *log, uint64_t keylen, const uint8_t *key);
/**
* Flush any open compression block to file buffer.
* Flush any open file buffer to disk.
* Rewrite the header on disk.
* This enables readers to read from the log.
* @param log a reference to an open log writer.
* @return SPARKEY_SUCCESS if all goes well.
*/
sparkey_returncode sparkey_logwriter_flush(sparkey_logwriter *log);
/**
* Flushes the log, then closes the file and marks the log as closed.
* The log will be closed after this, the sparkey_logwriter struct
* referenced will be freed and *log will be set to NULL.
* @param log a double reference to an open log writer.
* @return SPARKEY_SUCCESS if all goes well.
*/
sparkey_returncode sparkey_logwriter_close(sparkey_logwriter **log);
/* logreader */
/**
* Opens a log file for reading. The logreader is threadsafe, except during opening or closing.
* @param log a double reference to a logreader.
* @param filename a filename of a file containing a sparkey log.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a return code indicating the error.
*/
sparkey_returncode sparkey_logreader_open(sparkey_logreader **log, const char *filename);
/**
* Closes a logreader.
* It's allowed to close a logreader while there are open logiterators.
* Further operations on such logiterators will fail.
* This is a failsafe operation.
* @param log a double reference to a logreader
* This will be set to NULL after close.
*/
void sparkey_logreader_close(sparkey_logreader **log);
/**
* Get the size of the largest key in the log.
* @param log a reference to a logreader.
* @returns
*/
uint64_t sparkey_logreader_maxkeylen(sparkey_logreader *log);
/**
* Get the size of the largest value in the log.
* @param log a reference to a logreader.
* @returns
*/
uint64_t sparkey_logreader_maxvaluelen(sparkey_logreader *log);
/**
* Get the blocksize for a reader
* @param log a reference to a logreader.
* @returns the blocksize
*/
int sparkey_logreader_get_compression_blocksize(sparkey_logreader *log);
/**
* Get the compression type for a reader
* @param log a reference to a logreader.
* @returns the compression type
*/
sparkey_compression_type sparkey_logreader_get_compression_type(sparkey_logreader *log);
/**
* Initializes a logiter and associates it with a logreader.
* The logreader must be open. The logiter is not threadsafe.
* @param iter a double reference to an uninitialized logiter. Will be set on success.
* @param log an open logreader
* @returns SPARKEY_SUCCESS or all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_create(sparkey_logiter **iter, sparkey_logreader *log);
/**
* Closes a log iterator.
* This is a failsafe operation.
* @param iter a double reference to a log iterator.
* This will be set to NULL after close.
*/
void sparkey_logiter_close(sparkey_logiter **iter);
/**
* Skips to a specific block in the logfile.
* The position must be a valid block start, but that will not be verified by the function.
* If an illegal position is used, all other operations on this logiterator are undefined,
* and may even segfault.
* @param iter an open log iterator.
* @param log an open logreader associated with iter.
* @param position an offset into the logfile where a block begins.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_seek(sparkey_logiter *iter, sparkey_logreader *log, uint64_t position);
/**
* Skip a number of entries.
* This is equivalent to calling sparkey_logiter_next count number of times.
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @param count the number of entries to skip.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_skip(sparkey_logiter *iter, sparkey_logreader *log, int count);
/**
* Prepares the logiter to start reading from the next entry.
* iter->state will be SPARKEY_ITER_CLOSED if the last entry has been passed.
* iter->state will be SPARKEY_ITER_INVALID if anything goes wrong.
* iter->state will be SPARKEY_ITER_ACTIVE if it successfully reached the next entry.
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_next(sparkey_logiter *iter, sparkey_logreader *log);
/**
* Resets the iterator to the start of the current entry. This is only valid if
* iter->state is SPARKEY_ITER_ACTIVE.
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_reset(sparkey_logiter *iter, sparkey_logreader *log);
/**
* Consumes and returns part of or all of the key of the current entry.
* Usage example:
* uint8_t *res;
* uint64_t len;
* sparkey_returncode code = sparkey_logiter_keychunk(iter, log, 1 << 30, &res, &len);
*
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @param maxlen a limit for how much data you want to handle.
* @param res (output parameter) reference to a read only array of data. The array is of size res, and is not NUL-terminated.
* You can not use this as a string, and you may not modify it. The data in the array is valid until the next operation on the
* logiter or until the log is closed.
* @param len (output parameter) reference to a variable holding the size of res.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_keychunk(sparkey_logiter *iter, sparkey_logreader *log, uint64_t maxlen, uint8_t ** res, uint64_t *len);
/**
* First consumes and discards any remaining key parts.
* Then consumes and returns part of or all of the value of the current entry.
* Usage example:
* uint8_t *res;
* uint64_t len;
* sparkey_returncode code = sparkey_logiter_valuechunk(iter, log, 1 << 30, &res, &len);
*
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @param maxlen a limit for how much data you want to handle.
* @param res (output parameter) reference to a read only array of data. The array is of size res, and is not NUL-terminated.
* You can not use this as a string, and you may not modify it. The data in the array is valid until the next operation on the
* logiter or until the log is closed.
* @param len (output parameter) reference to a variable holding the size of res.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_valuechunk(sparkey_logiter *iter, sparkey_logreader *log, uint64_t maxlen, uint8_t ** res, uint64_t *len);
/**
* Convenience function around sparkey_logiter_keychunk.
* Takes a user allocated buffer and fills it as much as possible by consuming parts of the key of the current entry.
* No NUL will be appended after the data, so you may not use it as a string unless you add the NUL manually.
* Usage example:
* uint8_t *buf = malloc(iter->keylen);
* uint64_t len;
* sparkey_returncode code = sparkey_logiter_fill_key(iter, log, iter->keylen, buf, &len);
*
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @param maxlen a limit for how much data you want to handle.
* @param buf a writable array of data. The array must at least be of size maxlen.
* @param len (output parameter) reference to a variable holding the amount of data written to buf.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_fill_key(sparkey_logiter *iter, sparkey_logreader *log, uint64_t maxlen, uint8_t *buf, uint64_t *len);
/**
* Convenience function around sparkey_logiter_valuechunk.
* Takes a user allocated buffer and fills it as much as possible by consuming parts of the key of the current entry.
* No NUL will be appended after the data, so you may not use it as a string unless you add the NUL manually.
* Usage example:
* uint8_t *buf = malloc(iter->valuelen);
* uint64_t len;
* sparkey_returncode code = sparkey_logiter_fill_value(iter, log, iter->valuelen, buf, &len);
*
* @param iter an open logiter
* @param log an open logreader associated with iter.
* @param maxlen a limit for how much data you want to handle.
* @param buf a writable array of data. The array must at least be of size maxlen.
* @param len (output parameter) reference to a variable holding the amount of data written to buf.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_fill_value(sparkey_logiter *iter, sparkey_logreader *log, uint64_t maxlen, uint8_t *buf, uint64_t *len);
/**
* Compares the keys of two iterators pointing to the same log.
* It assumes that the iterators are both clean, i.e. nothing has been consumed from the current entry.
*
* @param iter1 an open logiter
* @param iter2 an open logiter
* @param log an open logreader associated with iter1 and iter2.
* @param res (output parameter) reference to a variable holding the result of the comparison.
* It will be zero if the keys are equal, negative if key1 is smaller than key2 and positive if key1 is larger than key2.
* The behaviour is thus Like memcmp.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_keycmp(sparkey_logiter *iter1, sparkey_logiter *iter2, sparkey_logreader *log, int *res);
/**
* Get the state for an iterator.
* @returns iter->state
*/
sparkey_iter_state sparkey_logiter_state(sparkey_logiter *iter);
/**
* Get the type of an iterator.
* @returns iter->type
*/
sparkey_entry_type sparkey_logiter_type(sparkey_logiter *iter);
/**
* Get the keylen of an iterator.
* @returns iter->keylen
*/
uint64_t sparkey_logiter_keylen(sparkey_logiter *iter);
/**
* Get the valuelen of an iterator.
* @returns iter->valuelen
*/
uint64_t sparkey_logiter_valuelen(sparkey_logiter *iter);
/* hashwriter */
/**
* Creates a hash table for a specific log file.
* It's safe and efficient to run this multiple times.
* If the hash file already exists, it will be used to speed up the creation of the new file
* by reusing the existing entries, and only update the new hash table based on
* the entries in the log that are new since the last hash was built.
* Note that the hash file is never overwritten, instead the old file is unlinked from
* the filesystem and the new one is created. Thus, it's safe to rewrite the hash table while
* other processes are reading from it.
* @param hash_filename the file to create and put the sparkey hash table in.
* @param log_filename a file that must exist and be a sparkey log file.
* @param hash_size size of the hashes for keys.
Valid values are 4 (32 bit murmurhash3_x86_32) and
8 (lower 64-bit part of murmurhash3_x64_128).
A value of zero will make it autoselect hash size, depending on number of entries.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_hash_write(const char *hash_filename, const char *log_filename, int hash_size);
/* hashreader */
/**
* Opens a hash file and a log file for reading. The the hashreader is threadsafe, except during opening or closing.
* @param reader a double reference to an uninitialized hashreader. Will be set on success.
* @param hash_filename a filename of a file containing a sparkey hash table.
* @param log_filename a filename of a file containing a sparkey log.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a return code indicating the error.
*/
sparkey_returncode sparkey_hash_open(sparkey_hashreader **reader, const char *hash_filename, const char *log_filename);
/**
* Gets the logreader that is referenced by the hashreader
* @param reader an open reader.
* @returns the associated logreader
*/
sparkey_logreader * sparkey_hash_getreader(sparkey_hashreader *reader);
/**
* Closes a hashreader.
* It's allowed to close a hashreader while there are open logiterators associated with it.
* Further operations on such logiterators will fail.
* This is a failsafe operation.
* @param reader a double reference to a hashreader
*/
void sparkey_hash_close(sparkey_hashreader **reader);
/**
* Performs a hash table lookup of a key. If the key is found,
* the iterator will have state SPARKEY_ITER_ACTIVE and the key chunk will be consumed.
* Otherwise, the iterator will have state SPARKEY_ITER_INVALID.
* @param reader an open reader.
* @param key a buffer containing the key. It does not have be NUL terminated.
* @param keylen the length of the key.
* @param iter an iterator associated with the reader. Will be mutated.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a return code indicating the error.
*/
sparkey_returncode sparkey_hash_get(sparkey_hashreader *reader, const uint8_t *key, uint64_t keylen, sparkey_logiter *iter);
/**
* Works the same as sparkey_logiter_next, except it skips entries that are not of type SPARKEY_ENTRY_PUT
* and entries that have been overwritten or deleted. Thus it only stops at live entries.
* iter->state will be SPARKEY_ITER_CLOSED if the last entry has been passed.
* iter->state will be SPARKEY_ITER_INVALID if anything goes wrong.
* iter->state will be SPARKEY_ITER_ACTIVE if it successfully reached the next entry.
* @see sparkey_logiter_next
* @param iter an open logiter
* @param reader an open reader associated with iter.
* @returns SPARKEY_SUCCESS if all goes well. Otherwise a returncode indicating the error.
*/
sparkey_returncode sparkey_logiter_hashnext(sparkey_logiter *iter, sparkey_hashreader *reader);
uint64_t sparkey_hash_numentries(sparkey_hashreader *reader);
uint64_t sparkey_hash_numcollisions(sparkey_hashreader *reader);
/* util */
/**
* Allocates and creates a string denoting a log file from an index file.
* This is simply a string replacement of .spi$ to .spl$
* @param index_filename the filename representing the index file
* @returns NULL if the index_filename does not end with ".spi"
*/
char * sparkey_create_log_filename(const char *index_filename);
/**
* Allocates and creates a string denoting an index file from a log file.
* This is simply a string replacement of .spl$ to .spi$
* @param log_filename the filename representing the log file
* @returns NULL if the log_filename does not end with ".spl"
*/
char * sparkey_create_index_filename(const char *log_filename);
#ifdef __cplusplus
}
#endif
#endif