[fix] token_types.PHRASE
This commit is contained in:
@@ -63,7 +63,7 @@ def abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.2):
|
||||
i = 0
|
||||
|
||||
for t, c, length, data in gazetteer.filter(norm_tokens):
|
||||
if c is PHRASE:
|
||||
if c == token_types.PHRASE:
|
||||
valid = []
|
||||
data = [d.split('|') for d in data]
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from geodata.string_utils import wide_iter, wide_ord
|
||||
from geodata.i18n.unicode_properties import get_chars_by_script, get_script_languages
|
||||
from geodata.text.normalize import normalized_tokens, normalize_string
|
||||
from geodata.text.tokenize import tokenize
|
||||
from geodata.text.token_types import token_types
|
||||
|
||||
WELL_REPRESENTED_LANGUAGES = set(['en', 'fr', 'it', 'de', 'nl', 'es', 'pt'])
|
||||
|
||||
@@ -100,7 +101,7 @@ def disambiguate_language(text, languages):
|
||||
seen_languages = set()
|
||||
|
||||
for t, c, l, data in street_types_gazetteer.filter(tokens):
|
||||
if c is PHRASE:
|
||||
if c == token_types.PHRASE:
|
||||
valid = OrderedDict()
|
||||
data = [safe_decode(d).split(u'|') for d in data]
|
||||
potentials = set([l for l, d, i, c in data if l in valid_languages])
|
||||
|
||||
Reference in New Issue
Block a user