[tokenization] Exposing some of the scanner's methods in header for use in the Python scanner so it can avoid the additional allocation
This commit is contained in:
@@ -6,10 +6,6 @@
|
|||||||
|
|
||||||
#include "scanner.h"
|
#include "scanner.h"
|
||||||
|
|
||||||
typedef struct scanner {
|
|
||||||
unsigned char *src, *cursor, *start, *end;
|
|
||||||
} scanner_t;
|
|
||||||
|
|
||||||
int scan_token(scanner_t *s)
|
int scan_token(scanner_t *s)
|
||||||
{
|
{
|
||||||
s->start = s->cursor;
|
s->start = s->cursor;
|
||||||
@@ -264787,10 +264783,8 @@ yy17828:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenized_string_t *tokenize(const char *input) {
|
|
||||||
size_t token_start, token_length;
|
|
||||||
int token_type;
|
|
||||||
|
|
||||||
|
scanner_t scanner_from_string(const char *input) {
|
||||||
unsigned char *s = (unsigned char *)input;
|
unsigned char *s = (unsigned char *)input;
|
||||||
|
|
||||||
scanner_t scanner;
|
scanner_t scanner;
|
||||||
@@ -264799,6 +264793,15 @@ tokenized_string_t *tokenize(const char *input) {
|
|||||||
scanner.start = s;
|
scanner.start = s;
|
||||||
scanner.end = s + strlen(input);
|
scanner.end = s + strlen(input);
|
||||||
|
|
||||||
|
return scanner;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenized_string_t *tokenize(const char *input) {
|
||||||
|
size_t token_start, token_length;
|
||||||
|
int token_type;
|
||||||
|
|
||||||
|
scanner_t scanner = scanner_from_string(input);
|
||||||
|
|
||||||
tokenized_string_t *response = tokenized_string_new();
|
tokenized_string_t *response = tokenized_string_new();
|
||||||
|
|
||||||
while ( ( token_type = scan_token(&scanner)) != END ) {
|
while ( ( token_type = scan_token(&scanner)) != END ) {
|
||||||
@@ -264814,3 +264817,4 @@ tokenized_string_t *tokenize(const char *input) {
|
|||||||
return response;
|
return response;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,14 @@ extern "C" {
|
|||||||
#include "token_types.h"
|
#include "token_types.h"
|
||||||
#include "tokens.h"
|
#include "tokens.h"
|
||||||
|
|
||||||
|
typedef struct scanner {
|
||||||
|
unsigned char *src, *cursor, *start, *end;
|
||||||
|
} scanner_t;
|
||||||
|
|
||||||
|
int scan_token(scanner_t *s);
|
||||||
|
|
||||||
|
inline scanner_t scanner_from_string(const char *input);
|
||||||
|
|
||||||
tokenized_string_t *tokenize(const char *str);
|
tokenized_string_t *tokenize(const char *str);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -184,10 +184,7 @@ abbreviation = ({word})"\.";
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenized_string_t *tokenize(const char *input) {
|
scanner_t scanner_from_string(const char *input) {
|
||||||
size_t token_start, token_length;
|
|
||||||
int token_type;
|
|
||||||
|
|
||||||
unsigned char *s = (unsigned char *)input;
|
unsigned char *s = (unsigned char *)input;
|
||||||
|
|
||||||
scanner_t scanner;
|
scanner_t scanner;
|
||||||
@@ -196,6 +193,15 @@ tokenized_string_t *tokenize(const char *input) {
|
|||||||
scanner.start = s;
|
scanner.start = s;
|
||||||
scanner.end = s + strlen(input);
|
scanner.end = s + strlen(input);
|
||||||
|
|
||||||
|
return scanner;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenized_string_t *tokenize(const char *input) {
|
||||||
|
size_t token_start, token_length;
|
||||||
|
int token_type;
|
||||||
|
|
||||||
|
scanner_t scanner = scanner_from_string(input);
|
||||||
|
|
||||||
tokenized_string_t *response = tokenized_string_new();
|
tokenized_string_t *response = tokenized_string_new();
|
||||||
|
|
||||||
while ( ( token_type = scan_token(&scanner)) != END ) {
|
while ( ( token_type = scan_token(&scanner)) != END ) {
|
||||||
|
|||||||
Reference in New Issue
Block a user