From 2b69c185fae66889f8b9ebfe3edcacb10456c0b5 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 25 Jun 2015 10:03:34 -0400 Subject: [PATCH] [tokenization] Adding a tokenizer method for appending to an existing tokens array (e.g. can stop/start tokenizing on a script change) --- src/scanner.c | 23 ++++++++++++++--------- src/scanner.h | 3 ++- src/scanner.re | 23 ++++++++++++++--------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 18d7c87f..92ecc645 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -339972,26 +339972,24 @@ yy19321: } -scanner_t scanner_from_string(const char *input) { +inline scanner_t scanner_from_string(const char *input, size_t len) { unsigned char *s = (unsigned char *)input; scanner_t scanner; scanner.src = s; scanner.cursor = s; scanner.start = s; - scanner.end = s + strlen(input); + scanner.end = s + len; return scanner; } -token_array *tokenize(const char *input) { +void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { + scanner_t scanner = scanner_from_string(input, len); + size_t token_start, token_length; uint16_t token_type; - scanner_t scanner = scanner_from_string(input); - - token_array *tokens = token_array_new(); - while ( ( token_type = scan_token(&scanner)) != END ) { token_start = scanner.start - scanner.src; token_length = scanner.cursor - scanner.start; @@ -340007,6 +340005,13 @@ token_array *tokenize(const char *input) { } } - return tokens; - +} + +token_array *tokenize(const char *input) { + + token_array *tokens = token_array_new(); + + tokenize_add_tokens(tokens, input, strlen(input)); + + return tokens; } diff --git a/src/scanner.h b/src/scanner.h index 4ba474de..cdee3465 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -14,8 +14,9 @@ typedef struct scanner { uint16_t scan_token(scanner_t *s); -inline scanner_t scanner_from_string(const char *input); +scanner_t scanner_from_string(const char *input, size_t len); +void tokenize_add_tokens(token_array *tokens, const char *input, size_t len); token_array *tokenize(const char *input); diff --git a/src/scanner.re b/src/scanner.re index 6889aa04..91b771e3 100644 --- a/src/scanner.re +++ b/src/scanner.re @@ -206,26 +206,24 @@ email = ([a-zA-Z0-9\._%+\-]+"@"([a-zA-Z0-9]+[\.])+[a-zA-Z0-9]{2,3}); } -scanner_t scanner_from_string(const char *input) { +inline scanner_t scanner_from_string(const char *input, size_t len) { unsigned char *s = (unsigned char *)input; scanner_t scanner; scanner.src = s; scanner.cursor = s; scanner.start = s; - scanner.end = s + strlen(input); + scanner.end = s + len; return scanner; } -token_array *tokenize(const char *input) { +void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) { + scanner_t scanner = scanner_from_string(input, len); + size_t token_start, token_length; uint16_t token_type; - scanner_t scanner = scanner_from_string(input); - - token_array *tokens = token_array_new(); - while ( ( token_type = scan_token(&scanner)) != END ) { token_start = scanner.start - scanner.src; token_length = scanner.cursor - scanner.start; @@ -241,6 +239,13 @@ token_array *tokenize(const char *input) { } } - return tokens; - +} + +token_array *tokenize(const char *input) { + + token_array *tokens = token_array_new(); + + tokenize_add_tokens(tokens, input, strlen(input)); + + return tokens; }