[merge] merging commit from v1.1

This commit is contained in:
Al
2017-08-14 04:04:58 -06:00
parent bb277fb326
commit 448ca6a61a
10 changed files with 374 additions and 294 deletions

View File

@@ -3,12 +3,9 @@ from geodata.text import _tokenize
from geodata.text.token_types import token_types
def tokenize_raw(s):
return _tokenize.tokenize(safe_decode(s))
def tokenize(s):
def tokenize(s, whitespace=False):
u = safe_decode(s)
s = safe_encode(s)
return [(safe_decode(s[start:start + length]), token_types.from_id(token_type))
for start, length, token_type in _tokenize.tokenize(u)]
for start, length, token_type in _tokenize.tokenize(u, whitespace)]