[phrases] trie_add_prefix method and a schema for prefix keys, e.g. elisions in French and Italian, separable prefixes like Hinter in German, etc.
This commit is contained in:
25
src/trie.c
25
src/trie.c
@@ -667,16 +667,37 @@ inline bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data) {
|
||||
return trie_add_at_index(self, ROOT_NODE_ID, key, len, data);
|
||||
}
|
||||
|
||||
bool trie_add_prefix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data) {
|
||||
size_t len = strlen(key);
|
||||
if (start_node_id == NULL_NODE_ID || len == 0) return false;
|
||||
|
||||
trie_node_t start_node = trie_get_node(self, start_node_id);
|
||||
|
||||
uint32_t node_id = trie_get_transition_index(self, start_node, TRIE_PREFIX_CHAR);
|
||||
trie_node_t node = trie_get_node(self, node_id);
|
||||
if (node.check != start_node_id) {
|
||||
node_id = trie_add_transition(self, start_node_id, TRIE_PREFIX_CHAR);
|
||||
}
|
||||
|
||||
bool success = trie_add_at_index(self, node_id, key, len, data);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
inline bool trie_add_prefix(trie_t *self, char *key, uint32_t data) {
|
||||
return trie_add_prefix_at_index(self, key, ROOT_NODE_ID, data);
|
||||
}
|
||||
|
||||
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data) {
|
||||
size_t len = strlen(key);
|
||||
if (start_node_id == NULL_NODE_ID || len == 0) return false;
|
||||
|
||||
trie_node_t start_node = trie_get_node(self, start_node_id);
|
||||
|
||||
uint32_t node_id = trie_get_transition_index(self, start_node, '\0');
|
||||
uint32_t node_id = trie_get_transition_index(self, start_node, TRIE_SUFFIX_CHAR);
|
||||
trie_node_t node = trie_get_node(self, node_id);
|
||||
if (node.check != start_node_id) {
|
||||
node_id = trie_add_transition(self, start_node_id, '\0');
|
||||
node_id = trie_add_transition(self, start_node_id, TRIE_SUFFIX_CHAR);
|
||||
}
|
||||
|
||||
char *suffix = utf8_reversed_string(key);
|
||||
|
||||
@@ -40,6 +40,9 @@ extern "C" {
|
||||
#define TRIE_INDEX_ERROR 0
|
||||
#define TRIE_MAX_INDEX 0x7fffffff
|
||||
|
||||
#define TRIE_PREFIX_CHAR '\xff'
|
||||
#define TRIE_SUFFIX_CHAR '\x00'
|
||||
|
||||
// Using 256 characters can fit all UTF-8 encoded strings
|
||||
#define NUM_CHARS 256
|
||||
|
||||
@@ -109,6 +112,8 @@ bool trie_add(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_len(trie_t *self, char *key, size_t len, uint32_t data);
|
||||
bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_suffix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
|
||||
bool trie_add_prefix(trie_t *self, char *key, uint32_t data);
|
||||
bool trie_add_prefix_at_index(trie_t *self, char *key, uint32_t start_node_id, uint32_t data);
|
||||
|
||||
uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i);
|
||||
uint32_t trie_get_len(trie_t *self, char *word, size_t len);
|
||||
|
||||
Reference in New Issue
Block a user