[python] Adding initial pypostal bindings for tokenize so we can remove address_normalizer dependency. Not tested on Python 3.
This commit is contained in:
12
python/postal/text/tokenize.py
Normal file
12
python/postal/text/tokenize.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from postal.text.encoding import safe_decode
|
||||
from postal.text import _tokenize
|
||||
from postal.text.token_types import token_types
|
||||
|
||||
|
||||
def tokenize_raw(s):
|
||||
return _tokenize.tokenize(safe_decode(s))
|
||||
|
||||
|
||||
def tokenize(s):
|
||||
return [(s[start:start + length], token_types.from_id(token_type))
|
||||
for start, length, token_type in _tokenize.tokenize(safe_decode(s))]
|
||||
Reference in New Issue
Block a user