[fix] using tokenize_raw API

This commit is contained in:
Al
2015-10-28 21:37:41 -04:00
parent bbd10e97bd
commit cee9da05d6

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
from postal.text import _normalize
from postal.text import _tokenize
from postal.text.tokenize import tokenize_raw
from postal.text.token_types import token_types
from postal.text.encoding import safe_decode
@@ -40,5 +40,5 @@ def normalized_tokens(s, string_options=DEFAULT_STRING_OPTIONS,
normalized = _normalize.normalize_string_utf8(s, string_options)
# Tuples of (offset, len, type)
tokens = _tokenize.tokenize(normalized)
tokens = tokenize_raw(normalized)
return [(_normalize.normalize_token(normalized, t, token_options), token_types.from_id(t[-1])) for t in tokens]