[utils] basic functions for wide char support for narrow Python builds (unichr, ord, unicode iteration)

This commit is contained in:
Al
2015-09-23 00:42:48 -04:00
parent 8562c7a5cb
commit 7e057b0fb8

View File

@@ -0,0 +1,36 @@
import sys
from encoding import safe_decode
NUM_CODEPOINTS = 0x10FFFF + 1
def wide_unichr(i):
if i <= sys.maxunicode:
return unichr(i)
else:
return '\U{0:08x}'.format(i).decode('unicode-escape')
def wide_ord(c):
if len(c) == 1:
return ord(c)
elif len(c) == 2:
h, l = c
return ((ord(h) - 0xD800) * 0x400) + (ord(l) - 0xDC00) + 0x10000
return None
def wide_iter(s):
skip = False
s = safe_decode(s)
for i, c in enumerate(s):
if skip:
skip = False
continue
if 0xD800 <= ord(c) <= 0xDBFF:
yield s[i:i+2]
skip = True
continue
yield c