[utf8] Adding a boolean struct member for string_script_t return values, set to true if the string is ASCII (no transliteration needed, should be frequent for English addresses)
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
|
||||
#include "unicode_scripts_data.c"
|
||||
|
||||
#define MAX_ASCII 128
|
||||
|
||||
inline script_t get_char_script(uint32_t ch) {
|
||||
if (ch > NUM_CODEPOINTS - 1) return SCRIPT_UNKNOWN;
|
||||
return char_scripts[ch];
|
||||
@@ -21,6 +23,8 @@ string_script_t get_string_script(char *str, size_t len) {
|
||||
size_t script_len = 0;
|
||||
size_t idx = 0;
|
||||
|
||||
bool is_ascii = true;
|
||||
|
||||
while (idx < len) {
|
||||
ssize_t char_len = utf8proc_iterate(ptr, -1, &ch);
|
||||
|
||||
@@ -36,6 +40,8 @@ string_script_t get_string_script(char *str, size_t len) {
|
||||
break;
|
||||
}
|
||||
|
||||
is_ascii = is_ascii && ch < MAX_ASCII;
|
||||
|
||||
ptr += char_len;
|
||||
idx += char_len;
|
||||
script_len += char_len;
|
||||
@@ -46,5 +52,5 @@ string_script_t get_string_script(char *str, size_t len) {
|
||||
|
||||
}
|
||||
|
||||
return (string_script_t) {last_script, script_len};
|
||||
return (string_script_t) {last_script, script_len, is_ascii};
|
||||
}
|
||||
Reference in New Issue
Block a user