diff --git a/scripts/geodata/i18n/word_breaks.py b/scripts/geodata/i18n/word_breaks.py index a6a4eff3..49a59725 100644 --- a/scripts/geodata/i18n/word_breaks.py +++ b/scripts/geodata/i18n/word_breaks.py @@ -49,12 +49,7 @@ ideographic_scripts = set([ def regex_char_range(match): r = match.split('..') # Wide version - # return u'-'.join([('\u{}' if len(c) < 5 else '\U{}').format(c) for c in r]) - - if len(r[0]) < 5 and len(r[-1]) < 5: - return '-'.join(['\u{}'.format(c.lower()) for c in r]) - else: - return '' + return u'-'.join([('\u{}'.format(c.lower()) if len(c) < 5 else '\U{}'.format(c.lower().rjust(8, '0'))) for c in r]) def get_letter_range(text, *regexes):