[tokenization/trie] simpler url regex reduces the scanner file size, accounting for a few more variations in word tokens, making trie suffix search use iteration instead of malloc'ing a new string

This commit is contained in:
Al
2015-04-05 16:30:27 -04:00
parent 5f3d74de18
commit 79fd7a8ded
4 changed files with 155806 additions and 224455 deletions

View File

@@ -10,6 +10,7 @@ extern "C" {
#include "collections.h"
#include "klib/kvec.h"
#include "log/log.h"
#include "string_utils.h"
#include "tokens.h"
#include "vector.h"
#include "utf8proc/utf8proc.h"