[tokenization] Adding a tokenizer method for appending to an existing tokens array (e.g. can stop/start tokenizing on a script change)
This commit is contained in:
@@ -339972,26 +339972,24 @@ yy19321:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
scanner_t scanner_from_string(const char *input) {
|
inline scanner_t scanner_from_string(const char *input, size_t len) {
|
||||||
unsigned char *s = (unsigned char *)input;
|
unsigned char *s = (unsigned char *)input;
|
||||||
|
|
||||||
scanner_t scanner;
|
scanner_t scanner;
|
||||||
scanner.src = s;
|
scanner.src = s;
|
||||||
scanner.cursor = s;
|
scanner.cursor = s;
|
||||||
scanner.start = s;
|
scanner.start = s;
|
||||||
scanner.end = s + strlen(input);
|
scanner.end = s + len;
|
||||||
|
|
||||||
return scanner;
|
return scanner;
|
||||||
}
|
}
|
||||||
|
|
||||||
token_array *tokenize(const char *input) {
|
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
|
||||||
|
scanner_t scanner = scanner_from_string(input, len);
|
||||||
|
|
||||||
size_t token_start, token_length;
|
size_t token_start, token_length;
|
||||||
uint16_t token_type;
|
uint16_t token_type;
|
||||||
|
|
||||||
scanner_t scanner = scanner_from_string(input);
|
|
||||||
|
|
||||||
token_array *tokens = token_array_new();
|
|
||||||
|
|
||||||
while ( ( token_type = scan_token(&scanner)) != END ) {
|
while ( ( token_type = scan_token(&scanner)) != END ) {
|
||||||
token_start = scanner.start - scanner.src;
|
token_start = scanner.start - scanner.src;
|
||||||
token_length = scanner.cursor - scanner.start;
|
token_length = scanner.cursor - scanner.start;
|
||||||
@@ -340007,6 +340005,13 @@ token_array *tokenize(const char *input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tokens;
|
}
|
||||||
|
|
||||||
|
token_array *tokenize(const char *input) {
|
||||||
|
|
||||||
|
token_array *tokens = token_array_new();
|
||||||
|
|
||||||
|
tokenize_add_tokens(tokens, input, strlen(input));
|
||||||
|
|
||||||
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,8 +14,9 @@ typedef struct scanner {
|
|||||||
|
|
||||||
uint16_t scan_token(scanner_t *s);
|
uint16_t scan_token(scanner_t *s);
|
||||||
|
|
||||||
inline scanner_t scanner_from_string(const char *input);
|
scanner_t scanner_from_string(const char *input, size_t len);
|
||||||
|
|
||||||
|
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len);
|
||||||
token_array *tokenize(const char *input);
|
token_array *tokenize(const char *input);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -206,26 +206,24 @@ email = ([a-zA-Z0-9\._%+\-]+"@"([a-zA-Z0-9]+[\.])+[a-zA-Z0-9]{2,3});
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
scanner_t scanner_from_string(const char *input) {
|
inline scanner_t scanner_from_string(const char *input, size_t len) {
|
||||||
unsigned char *s = (unsigned char *)input;
|
unsigned char *s = (unsigned char *)input;
|
||||||
|
|
||||||
scanner_t scanner;
|
scanner_t scanner;
|
||||||
scanner.src = s;
|
scanner.src = s;
|
||||||
scanner.cursor = s;
|
scanner.cursor = s;
|
||||||
scanner.start = s;
|
scanner.start = s;
|
||||||
scanner.end = s + strlen(input);
|
scanner.end = s + len;
|
||||||
|
|
||||||
return scanner;
|
return scanner;
|
||||||
}
|
}
|
||||||
|
|
||||||
token_array *tokenize(const char *input) {
|
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
|
||||||
|
scanner_t scanner = scanner_from_string(input, len);
|
||||||
|
|
||||||
size_t token_start, token_length;
|
size_t token_start, token_length;
|
||||||
uint16_t token_type;
|
uint16_t token_type;
|
||||||
|
|
||||||
scanner_t scanner = scanner_from_string(input);
|
|
||||||
|
|
||||||
token_array *tokens = token_array_new();
|
|
||||||
|
|
||||||
while ( ( token_type = scan_token(&scanner)) != END ) {
|
while ( ( token_type = scan_token(&scanner)) != END ) {
|
||||||
token_start = scanner.start - scanner.src;
|
token_start = scanner.start - scanner.src;
|
||||||
token_length = scanner.cursor - scanner.start;
|
token_length = scanner.cursor - scanner.start;
|
||||||
@@ -241,6 +239,13 @@ token_array *tokenize(const char *input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tokens;
|
}
|
||||||
|
|
||||||
|
token_array *tokenize(const char *input) {
|
||||||
|
|
||||||
|
token_array *tokens = token_array_new();
|
||||||
|
|
||||||
|
tokenize_add_tokens(tokens, input, strlen(input));
|
||||||
|
|
||||||
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user