[tokenization] Exposing some of the scanner's methods in header for use in the Python scanner so it can avoid the additional allocation
This commit is contained in:
@@ -184,10 +184,7 @@ abbreviation = ({word})"\.";
|
||||
|
||||
}
|
||||
|
||||
tokenized_string_t *tokenize(const char *input) {
|
||||
size_t token_start, token_length;
|
||||
int token_type;
|
||||
|
||||
scanner_t scanner_from_string(const char *input) {
|
||||
unsigned char *s = (unsigned char *)input;
|
||||
|
||||
scanner_t scanner;
|
||||
@@ -196,6 +193,15 @@ tokenized_string_t *tokenize(const char *input) {
|
||||
scanner.start = s;
|
||||
scanner.end = s + strlen(input);
|
||||
|
||||
return scanner;
|
||||
}
|
||||
|
||||
tokenized_string_t *tokenize(const char *input) {
|
||||
size_t token_start, token_length;
|
||||
int token_type;
|
||||
|
||||
scanner_t scanner = scanner_from_string(input);
|
||||
|
||||
tokenized_string_t *response = tokenized_string_new();
|
||||
|
||||
while ( ( token_type = scan_token(&scanner)) != END ) {
|
||||
|
||||
Reference in New Issue
Block a user