Files
libpostal-addrss/scripts/geodata/string_utils.py
2025-09-06 22:03:29 -04:00

37 lines
693 B
Python

import sys
from encoding import safe_decode
NUM_CODEPOINTS = 0x10FFFF + 1
def wide_unichr(i):
if i <= sys.maxunicode:
return unichr(i)
else:
return '\U{0:08x}'.format(i).decode('unicode-escape')
def wide_ord(c):
if len(c) == 1:
return ord(c)
elif len(c) == 2:
h, l = c
return ((ord(h) - 0xD800) * 0x400) + (ord(l) - 0xDC00) + 0x10000
return None
def wide_iter(s):
skip = False
s = safe_decode(s)
for i, c in enumerate(s):
if skip:
skip = False
continue
if 0xD800 <= ord(c) <= 0xDBFF:
yield s[i:i+2]
skip = True
continue
yield c