[fix] Spaces in abbreviation
This commit is contained in:
@@ -386,7 +386,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
|
|||||||
if random.random() > abbreviate_prob:
|
if random.random() > abbreviate_prob:
|
||||||
for j, (t_i, c_i) in enumerate(t):
|
for j, (t_i, c_i) in enumerate(t):
|
||||||
abbreviated.append(tokens[i + j][0])
|
abbreviated.append(tokens[i + j][0])
|
||||||
if i + j < n - 1 and raw_tokens[i + j + 1][1] > raw_tokens[i + j][1] + 1:
|
if i + j < n - 1 and raw_tokens[i + j + 1][0] > sum(raw_tokens[i + j][:2]):
|
||||||
abbreviated.append(u' ')
|
abbreviated.append(u' ')
|
||||||
i += len(t)
|
i += len(t)
|
||||||
continue
|
continue
|
||||||
@@ -412,7 +412,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
|
|||||||
token = random.choice(abbreviations) if abbreviations else canonical
|
token = random.choice(abbreviations) if abbreviations else canonical
|
||||||
token = recase_abbreviation(token, tokens[i:i + len(t)])
|
token = recase_abbreviation(token, tokens[i:i + len(t)])
|
||||||
abbreviated.append(token)
|
abbreviated.append(token)
|
||||||
if t[-1][1] != token_types.IDEOGRAPHIC_CHAR:
|
if i < n - 1 and raw_tokens[i + len(t)][0] > sum(raw_tokens[i + len(t) - 1][:2]):
|
||||||
abbreviated.append(u' ')
|
abbreviated.append(u' ')
|
||||||
break
|
break
|
||||||
elif is_prefix:
|
elif is_prefix:
|
||||||
@@ -464,13 +464,13 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
|
|||||||
else:
|
else:
|
||||||
for j, (t_i, c_i) in enumerate(t):
|
for j, (t_i, c_i) in enumerate(t):
|
||||||
abbreviated.append(tokens[i + j][0])
|
abbreviated.append(tokens[i + j][0])
|
||||||
if i + j < n - 1 and raw_tokens[i + j + 1][1] > raw_tokens[i + j][1] + 1:
|
if i + j < n - 1 and raw_tokens[i + j + 1][0] > sum(raw_tokens[i + j][:2]):
|
||||||
abbreviated.append(u' ')
|
abbreviated.append(u' ')
|
||||||
i += len(t)
|
i += len(t)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
abbreviated.append(tokens[i][0])
|
abbreviated.append(tokens[i][0])
|
||||||
if i < n - 1 and raw_tokens[i + 1][1] > raw_tokens[i][1] + 1:
|
if i < n - 1 and raw_tokens[i + 1][0] > sum(raw_tokens[i][:2]):
|
||||||
abbreviated.append(u' ')
|
abbreviated.append(u' ')
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user