[unicode] Wide version of word breaks

This commit is contained in:
Al
2015-09-22 18:55:33 -04:00
parent 25917cfb17
commit a76831df7a

View File

@@ -49,12 +49,7 @@ ideographic_scripts = set([
def regex_char_range(match):
r = match.split('..')
# Wide version
# return u'-'.join([('\u{}' if len(c) < 5 else '\U{}').format(c) for c in r])
if len(r[0]) < 5 and len(r[-1]) < 5:
return '-'.join(['\u{}'.format(c.lower()) for c in r])
else:
return ''
return u'-'.join([('\u{}'.format(c.lower()) if len(c) < 5 else '\U{}'.format(c.lower().rjust(8, '0'))) for c in r])
def get_letter_range(text, *regexes):