diff --git a/scripts/geodata/string_utils.py b/scripts/geodata/string_utils.py new file mode 100644 index 00000000..3812f112 --- /dev/null +++ b/scripts/geodata/string_utils.py @@ -0,0 +1,36 @@ +import sys +from encoding import safe_decode + +NUM_CODEPOINTS = 0x10FFFF + 1 + + +def wide_unichr(i): + if i <= sys.maxunicode: + return unichr(i) + else: + return '\U{0:08x}'.format(i).decode('unicode-escape') + + +def wide_ord(c): + if len(c) == 1: + return ord(c) + elif len(c) == 2: + h, l = c + return ((ord(h) - 0xD800) * 0x400) + (ord(l) - 0xDC00) + 0x10000 + + return None + + +def wide_iter(s): + skip = False + s = safe_decode(s) + for i, c in enumerate(s): + if skip: + skip = False + continue + + if 0xD800 <= ord(c) <= 0xDBFF: + yield s[i:i+2] + skip = True + continue + yield c