[abbreviations] Adding ability to abbreviate within hyphenated phrases e.g. Sint-Maarten => St.-Maarten

This commit is contained in:
Al
2016-08-24 17:32:28 -04:00
parent a6dad74a2b
commit dfa5c8e0a6
2 changed files with 145 additions and 84 deletions

View File

@@ -1,6 +1,10 @@
import re
from geodata.text.tokenize import tokenize
from geodata.text.token_types import token_types
non_breaking_dash_regex = re.compile(u'[\-\u058a\u05be\u1400\u1806\u2010-\u2013\u2212\u2e17\u2e1a\ufe32\ufe63\uff0d]', re.UNICODE)
def is_numeric(s):
tokens = tokenize(s)