[merge] merging commit from v1.1
This commit is contained in:
@@ -3,12 +3,9 @@ from geodata.text import _tokenize
|
||||
from geodata.text.token_types import token_types
|
||||
|
||||
|
||||
def tokenize_raw(s):
|
||||
return _tokenize.tokenize(safe_decode(s))
|
||||
|
||||
|
||||
def tokenize(s):
|
||||
def tokenize(s, whitespace=False):
|
||||
u = safe_decode(s)
|
||||
s = safe_encode(s)
|
||||
return [(safe_decode(s[start:start + length]), token_types.from_id(token_type))
|
||||
for start, length, token_type in _tokenize.tokenize(u)]
|
||||
for start, length, token_type in _tokenize.tokenize(u, whitespace)]
|
||||
|
||||
Reference in New Issue
Block a user