[tokenization] Adding a version which of tokenize which keeps whitespace tokens

2015-07-21 17:26:20 -04:00
parent 5d21cb1604
commit 71be52275d
3 changed files with 38 additions and 21 deletions
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -347350,7 +347350,7 @@ inline scanner_t scanner_from_string(const char *input, size_t len) {
    return scanner;
 }
-void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
+void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace) {
    scanner_t scanner = scanner_from_string(input, len);
    size_t token_start, token_length;
@@ -347360,24 +347360,31 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
        token_start = scanner.start - scanner.src;
        token_length = scanner.cursor - scanner.start;
-        if (token_type != WHITESPACE) { 
+        if (token_type == WHITESPACE && !keep_whitespace) {
-            // Caller frees
+            continue;
            token_t token;
            token.offset = token_start;
            token.len = token_length;
            token.type = token_type;
            token_array_push(tokens, token);
        }
        token_t token;
        token.offset = token_start;
        token.len = token_length;
        token.type = token_type;
        token_array_push(tokens, token);
    }
 }
 token_array *tokenize_keep_whitespace(const char *input) {
    token_array *tokens = token_array_new();
    tokenize_add_tokens(tokens, input, strlen(input), true);
    return tokens;
 }
 token_array *tokenize(const char *input) {
    token_array *tokens = token_array_new();
-    tokenize_add_tokens(tokens, input, strlen(input));
+    tokenize_add_tokens(tokens, input, strlen(input), false);
    return tokens;
 }
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -5,6 +5,8 @@
 extern "C" {
 #endif
 #include <stdbool.h>
 #include "token_types.h"
 #include "tokens.h"
@@ -16,7 +18,8 @@ uint16_t scan_token(scanner_t *s);
 scanner_t scanner_from_string(const char *input, size_t len);
-void tokenize_add_tokens(token_array *tokens, const char *input, size_t len);
+void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
 token_array *tokenize_keep_whitespace(const char *input);
 token_array *tokenize(const char *input);
--- a/src/scanner.re
+++ b/src/scanner.re
@@ -219,7 +219,7 @@ inline scanner_t scanner_from_string(const char *input, size_t len) {
    return scanner;
 }
-void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
+void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace) {
    scanner_t scanner = scanner_from_string(input, len);
    size_t token_start, token_length;
@@ -229,24 +229,31 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
        token_start = scanner.start - scanner.src;
        token_length = scanner.cursor - scanner.start;
-        if (token_type != WHITESPACE) { 
+        if (token_type == WHITESPACE && !keep_whitespace) {
-            // Caller frees
+            continue;
            token_t token;
            token.offset = token_start;
            token.len = token_length;
            token.type = token_type;
            token_array_push(tokens, token);
        }
        token_t token;
        token.offset = token_start;
        token.len = token_length;
        token.type = token_type;
        token_array_push(tokens, token);
    }
 }
 token_array *tokenize_keep_whitespace(const char *input) {
    token_array *tokens = token_array_new();
    tokenize_add_tokens(tokens, input, strlen(input), true);
    return tokens;
 }
 token_array *tokenize(const char *input) {
    token_array *tokens = token_array_new();
-    tokenize_add_tokens(tokens, input, strlen(input));
+    tokenize_add_tokens(tokens, input, strlen(input), false);
    return tokens;
 }