[tokenization] Exposing some of the scanner's methods in header for use in the Python scanner so it can avoid the additional allocation

2015-03-17 18:38:30 -04:00
parent daf3f8706b
commit f794ef7222
3 changed files with 29 additions and 11 deletions
--- a/src/scanner.re
+++ b/src/scanner.re
@@ -184,10 +184,7 @@ abbreviation = ({word})"\.";

 }

-tokenized_string_t *tokenize(const char *input) {
-    size_t token_start, token_length;
-    int token_type;
-
+scanner_t scanner_from_string(const char *input) {
    unsigned char *s = (unsigned char *)input;

    scanner_t scanner;
@@ -196,6 +193,15 @@ tokenized_string_t *tokenize(const char *input) {
    scanner.start = s;
    scanner.end = s + strlen(input);

+    return scanner;
+}
+
+tokenized_string_t *tokenize(const char *input) {
+    size_t token_start, token_length;
+    int token_type;
+
+    scanner_t scanner = scanner_from_string(input);
+
    tokenized_string_t *response = tokenized_string_new();

    while ( ( token_type = scan_token(&scanner)) != END ) {