[utils] function to create an array of uint32_t codepoints from a UTF-8 string, a few bug fixes to string_utils

This commit is contained in:
Al
2017-10-19 04:48:50 -04:00
parent c61007388b
commit 245aa226e0
2 changed files with 27 additions and 2 deletions

View File

@@ -361,12 +361,35 @@ ssize_t utf8_len(const char *str, size_t len) {
if (remaining == 0) break;
ptr += char_len;
num_utf8_chars += char_len;
num_utf8_chars++;
}
return num_utf8_chars;
}
uint32_array *unicode_codepoints(const char *str) {
if (str == NULL) return NULL;
uint32_array *a = uint32_array_new();
int32_t ch = 0;
ssize_t num_utf8_chars = 0;
ssize_t char_len;
uint8_t *ptr = (uint8_t *)str;
while (1) {
char_len = utf8proc_iterate(ptr, -1, &ch);
if (ch == 0) break;
uint32_array_push(a, (uint32_t)ch);
ptr += char_len;
}
return a;
}
int utf8_compare_len(const char *str1, const char *str2, size_t len) {
if (len == 0) return 0;