[tokenization] Adding a version which of tokenize which keeps whitespace tokens

This commit is contained in:
Al
2015-07-21 17:26:20 -04:00
parent 5d21cb1604
commit 71be52275d
3 changed files with 38 additions and 21 deletions

View File

@@ -347350,7 +347350,7 @@ inline scanner_t scanner_from_string(const char *input, size_t len) {
return scanner; return scanner;
} }
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace) {
scanner_t scanner = scanner_from_string(input, len); scanner_t scanner = scanner_from_string(input, len);
size_t token_start, token_length; size_t token_start, token_length;
@@ -347360,24 +347360,31 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
token_start = scanner.start - scanner.src; token_start = scanner.start - scanner.src;
token_length = scanner.cursor - scanner.start; token_length = scanner.cursor - scanner.start;
if (token_type != WHITESPACE) { if (token_type == WHITESPACE && !keep_whitespace) {
// Caller frees continue;
token_t token;
token.offset = token_start;
token.len = token_length;
token.type = token_type;
token_array_push(tokens, token);
} }
token_t token;
token.offset = token_start;
token.len = token_length;
token.type = token_type;
token_array_push(tokens, token);
} }
} }
token_array *tokenize_keep_whitespace(const char *input) {
token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input), true);
return tokens;
}
token_array *tokenize(const char *input) { token_array *tokenize(const char *input) {
token_array *tokens = token_array_new(); token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input)); tokenize_add_tokens(tokens, input, strlen(input), false);
return tokens; return tokens;
} }

View File

@@ -5,6 +5,8 @@
extern "C" { extern "C" {
#endif #endif
#include <stdbool.h>
#include "token_types.h" #include "token_types.h"
#include "tokens.h" #include "tokens.h"
@@ -16,7 +18,8 @@ uint16_t scan_token(scanner_t *s);
scanner_t scanner_from_string(const char *input, size_t len); scanner_t scanner_from_string(const char *input, size_t len);
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len); void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace);
token_array *tokenize_keep_whitespace(const char *input);
token_array *tokenize(const char *input); token_array *tokenize(const char *input);

View File

@@ -219,7 +219,7 @@ inline scanner_t scanner_from_string(const char *input, size_t len) {
return scanner; return scanner;
} }
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { void tokenize_add_tokens(token_array *tokens, const char *input, size_t len, bool keep_whitespace) {
scanner_t scanner = scanner_from_string(input, len); scanner_t scanner = scanner_from_string(input, len);
size_t token_start, token_length; size_t token_start, token_length;
@@ -229,24 +229,31 @@ void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
token_start = scanner.start - scanner.src; token_start = scanner.start - scanner.src;
token_length = scanner.cursor - scanner.start; token_length = scanner.cursor - scanner.start;
if (token_type != WHITESPACE) { if (token_type == WHITESPACE && !keep_whitespace) {
// Caller frees continue;
token_t token;
token.offset = token_start;
token.len = token_length;
token.type = token_type;
token_array_push(tokens, token);
} }
token_t token;
token.offset = token_start;
token.len = token_length;
token.type = token_type;
token_array_push(tokens, token);
} }
} }
token_array *tokenize_keep_whitespace(const char *input) {
token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input), true);
return tokens;
}
token_array *tokenize(const char *input) { token_array *tokenize(const char *input) {
token_array *tokens = token_array_new(); token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input)); tokenize_add_tokens(tokens, input, strlen(input), false);
return tokens; return tokens;
} }