[abbreviations] Adding ability to abbreviate within hyphenated phrases e.g. Sint-Maarten => St.-Maarten
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
import re
|
||||
|
||||
from geodata.text.tokenize import tokenize
|
||||
from geodata.text.token_types import token_types
|
||||
|
||||
non_breaking_dash_regex = re.compile(u'[\-\u058a\u05be\u1400\u1806\u2010-\u2013\u2212\u2e17\u2e1a\ufe32\ufe63\uff0d]', re.UNICODE)
|
||||
|
||||
|
||||
def is_numeric(s):
|
||||
tokens = tokenize(s)
|
||||
|
||||
Reference in New Issue
Block a user