[utils] string utils, file utils, contiguous arrays of strings used for storing tokenized strings, klib for generic hashtables and vectors, antirez's sds for certain types of string building, utf8proc for iterating over utf-8 strings and unicode normalization

2015-03-03 12:27:19 -05:00
parent 27269e18ca
commit 5216aba1b6
16 changed files with 16961 additions and 0 deletions
--- a/src/tokens.c
+++ b/src/tokens.c
@@ -0,0 +1,40 @@
+#include "tokens.h"
+
+
+tokenized_string_t *tokenized_string_new(void) {
+    tokenized_string_t *self = malloc(sizeof(tokenized_string_t));
+    self->str = char_array_new();
+    self->tokens = token_array_new();
+
+    return self;
+}
+
+void tokenized_string_add_token(tokenized_string_t *self, const char *src, size_t len, uint16_t token_type, uint64_t position) {
+    char *ptr = (char *) (src + position);
+    size_t offset = self->str->n;
+
+    contiguous_string_array_add_string_len(self->str, ptr, len);
+
+    token_t token = (token_t){offset, len, token_type, position};
+    token_array_push(self->tokens, token);
+
+}
+
+char *tokenized_string_get_token(tokenized_string_t *self, uint64_t index) {
+    if (index < self->tokens->n) {
+        uint64_t i = self->tokens->a[index].offset;
+        return (char *)self->str->a + i;
+    } else {
+        return NULL;
+    }
+}
+
+void tokenized_string_destroy(tokenized_string_t *self) {
+    if (!self)
+        return;
+    if (self->str)
+        char_array_destroy(self->str);
+    if (self->tokens)
+        token_array_destroy(self->tokens);
+    free(self);
+}