From 71be52275d269518eaa2870abc90be7053ca4e5f Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 21 Jul 2015 17:26:20 -0400 Subject: [PATCH] [tokenization] Adding a version which of tokenize which keeps whitespace tokens --- src/scanner.c | 27 +++++++++++++++++---------- src/scanner.h | 5 ++++- src/scanner.re | 27 +++++++++++++++++---------- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 2be26487..b080bc06 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -347350,7 +347350,7 @@ inline scanner_t scanner_from_string(const char *input, size_t len) { return scanner; } -void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { +void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace) { scanner_t scanner = scanner_from_string(input, len); size_t token_start, token_length; @@ -347360,24 +347360,31 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { token_start = scanner.start - scanner.src; token_length = scanner.cursor - scanner.start; - if (token_type != WHITESPACE) { - // Caller frees - token_t token; - token.offset = token_start; - token.len = token_length; - token.type = token_type; - - token_array_push(tokens, token); + if (token_type == WHITESPACE && !keep_whitespace) { + continue; } + + token_t token; + token.offset = token_start; + token.len = token_length; + token.type = token_type; + + token_array_push(tokens, token); } } +token_array *tokenize_keep_whitespace(const char *input) { + token_array *tokens = token_array_new(); + tokenize_add_tokens(tokens, input, strlen(input), true); + return tokens; +} + token_array *tokenize(const char *input) { token_array *tokens = token_array_new(); - tokenize_add_tokens(tokens, input, strlen(input)); + tokenize_add_tokens(tokens, input, strlen(input), false); return tokens; } diff --git a/src/scanner.h b/src/scanner.h index cdee3465..4e567f68 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -5,6 +5,8 @@ extern "C" { #endif +#include + #include "token_types.h" #include "tokens.h" @@ -16,7 +18,8 @@ uint16_t scan_token(scanner_t *s); scanner_t scanner_from_string(const char *input, size_t len); -void tokenize_add_tokens(token_array *tokens, const char *input, size_t len); +void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace); +token_array *tokenize_keep_whitespace(const char *input); token_array *tokenize(const char *input); diff --git a/src/scanner.re b/src/scanner.re index 8a30f91d..18c6e6ed 100644 --- a/src/scanner.re +++ b/src/scanner.re @@ -219,7 +219,7 @@ inline scanner_t scanner_from_string(const char *input, size_t len) { return scanner; } -void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { +void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace) { scanner_t scanner = scanner_from_string(input, len); size_t token_start, token_length; @@ -229,24 +229,31 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { token_start = scanner.start - scanner.src; token_length = scanner.cursor - scanner.start; - if (token_type != WHITESPACE) { - // Caller frees - token_t token; - token.offset = token_start; - token.len = token_length; - token.type = token_type; - - token_array_push(tokens, token); + if (token_type == WHITESPACE && !keep_whitespace) { + continue; } + + token_t token; + token.offset = token_start; + token.len = token_length; + token.type = token_type; + + token_array_push(tokens, token); } } +token_array *tokenize_keep_whitespace(const char *input) { + token_array *tokens = token_array_new(); + tokenize_add_tokens(tokens, input, strlen(input), true); + return tokens; +} + token_array *tokenize(const char *input) { token_array *tokens = token_array_new(); - tokenize_add_tokens(tokens, input, strlen(input)); + tokenize_add_tokens(tokens, input, strlen(input), false); return tokens; }