[tokenization] Exposing some of the scanner's methods in header for use in the Python scanner so it can avoid the additional allocation

This commit is contained in:
Al
2015-03-17 18:38:30 -04:00
parent daf3f8706b
commit f794ef7222
3 changed files with 29 additions and 11 deletions

View File

@@ -6,10 +6,6 @@
#include "scanner.h" #include "scanner.h"
typedef struct scanner {
unsigned char *src, *cursor, *start, *end;
} scanner_t;
int scan_token(scanner_t *s) int scan_token(scanner_t *s)
{ {
s->start = s->cursor; s->start = s->cursor;
@@ -264787,10 +264783,8 @@ yy17828:
} }
tokenized_string_t *tokenize(const char *input) {
size_t token_start, token_length;
int token_type;
scanner_t scanner_from_string(const char *input) {
unsigned char *s = (unsigned char *)input; unsigned char *s = (unsigned char *)input;
scanner_t scanner; scanner_t scanner;
@@ -264799,6 +264793,15 @@ tokenized_string_t *tokenize(const char *input) {
scanner.start = s; scanner.start = s;
scanner.end = s + strlen(input); scanner.end = s + strlen(input);
return scanner;
}
tokenized_string_t *tokenize(const char *input) {
size_t token_start, token_length;
int token_type;
scanner_t scanner = scanner_from_string(input);
tokenized_string_t *response = tokenized_string_new(); tokenized_string_t *response = tokenized_string_new();
while ( ( token_type = scan_token(&scanner)) != END ) { while ( ( token_type = scan_token(&scanner)) != END ) {
@@ -264814,3 +264817,4 @@ tokenized_string_t *tokenize(const char *input) {
return response; return response;
} }

View File

@@ -8,6 +8,14 @@ extern "C" {
#include "token_types.h" #include "token_types.h"
#include "tokens.h" #include "tokens.h"
typedef struct scanner {
unsigned char *src, *cursor, *start, *end;
} scanner_t;
int scan_token(scanner_t *s);
inline scanner_t scanner_from_string(const char *input);
tokenized_string_t *tokenize(const char *str); tokenized_string_t *tokenize(const char *str);

View File

@@ -184,10 +184,7 @@ abbreviation = ({word})"\.";
} }
tokenized_string_t *tokenize(const char *input) { scanner_t scanner_from_string(const char *input) {
size_t token_start, token_length;
int token_type;
unsigned char *s = (unsigned char *)input; unsigned char *s = (unsigned char *)input;
scanner_t scanner; scanner_t scanner;
@@ -196,6 +193,15 @@ tokenized_string_t *tokenize(const char *input) {
scanner.start = s; scanner.start = s;
scanner.end = s + strlen(input); scanner.end = s + strlen(input);
return scanner;
}
tokenized_string_t *tokenize(const char *input) {
size_t token_start, token_length;
int token_type;
scanner_t scanner = scanner_from_string(input);
tokenized_string_t *response = tokenized_string_new(); tokenized_string_t *response = tokenized_string_new();
while ( ( token_type = scan_token(&scanner)) != END ) { while ( ( token_type = scan_token(&scanner)) != END ) {