[tokenization] Adding a tokenizer method for appending to an existing tokens array (e.g. can stop/start tokenizing on a script change)

This commit is contained in:
Al
2015-06-25 10:03:34 -04:00
parent 581cf406a6
commit 2b69c185fa
3 changed files with 30 additions and 19 deletions

View File

@@ -339972,26 +339972,24 @@ yy19321:
}
scanner_t scanner_from_string(const char *input) {
inline scanner_t scanner_from_string(const char *input, size_t len) {
unsigned char *s = (unsigned char *)input;
scanner_t scanner;
scanner.src = s;
scanner.cursor = s;
scanner.start = s;
scanner.end = s + strlen(input);
scanner.end = s + len;
return scanner;
}
token_array *tokenize(const char *input) {
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
scanner_t scanner = scanner_from_string(input, len);
size_t token_start, token_length;
uint16_t token_type;
scanner_t scanner = scanner_from_string(input);
token_array *tokens = token_array_new();
while ( ( token_type = scan_token(&scanner)) != END ) {
token_start = scanner.start - scanner.src;
token_length = scanner.cursor - scanner.start;
@@ -340007,6 +340005,13 @@ token_array *tokenize(const char *input) {
}
}
return tokens;
}
token_array *tokenize(const char *input) {
token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input));
return tokens;
}

View File

@@ -14,8 +14,9 @@ typedef struct scanner {
uint16_t scan_token(scanner_t *s);
inline scanner_t scanner_from_string(const char *input);
scanner_t scanner_from_string(const char *input, size_t len);
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len);
token_array *tokenize(const char *input);

View File

@@ -206,26 +206,24 @@ email = ([a-zA-Z0-9\._%+\-]+"@"([a-zA-Z0-9]+[\.])+[a-zA-Z0-9]{2,3});
}
scanner_t scanner_from_string(const char *input) {
inline scanner_t scanner_from_string(const char *input, size_t len) {
unsigned char *s = (unsigned char *)input;
scanner_t scanner;
scanner.src = s;
scanner.cursor = s;
scanner.start = s;
scanner.end = s + strlen(input);
scanner.end = s + len;
return scanner;
}
token_array *tokenize(const char *input) {
void tokenize_add_tokens(token_array *tokens, const char *input, size_t len) {
scanner_t scanner = scanner_from_string(input, len);
size_t token_start, token_length;
uint16_t token_type;
scanner_t scanner = scanner_from_string(input);
token_array *tokens = token_array_new();
while ( ( token_type = scan_token(&scanner)) != END ) {
token_start = scanner.start - scanner.src;
token_length = scanner.cursor - scanner.start;
@@ -241,6 +239,13 @@ token_array *tokenize(const char *input) {
}
}
return tokens;
}
token_array *tokenize(const char *input) {
token_array *tokens = token_array_new();
tokenize_add_tokens(tokens, input, strlen(input));
return tokens;
}