[utf8] Adding a boolean struct member for string_script_t return values, set to true if the string is ASCII (no transliteration needed, should be frequent for English addresses)
This commit is contained in:
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
#include "unicode_scripts_data.c"
|
#include "unicode_scripts_data.c"
|
||||||
|
|
||||||
|
#define MAX_ASCII 128
|
||||||
|
|
||||||
inline script_t get_char_script(uint32_t ch) {
|
inline script_t get_char_script(uint32_t ch) {
|
||||||
if (ch > NUM_CODEPOINTS - 1) return SCRIPT_UNKNOWN;
|
if (ch > NUM_CODEPOINTS - 1) return SCRIPT_UNKNOWN;
|
||||||
return char_scripts[ch];
|
return char_scripts[ch];
|
||||||
@@ -21,6 +23,8 @@ string_script_t get_string_script(char *str, size_t len) {
|
|||||||
size_t script_len = 0;
|
size_t script_len = 0;
|
||||||
size_t idx = 0;
|
size_t idx = 0;
|
||||||
|
|
||||||
|
bool is_ascii = true;
|
||||||
|
|
||||||
while (idx < len) {
|
while (idx < len) {
|
||||||
ssize_t char_len = utf8proc_iterate(ptr, -1, &ch);
|
ssize_t char_len = utf8proc_iterate(ptr, -1, &ch);
|
||||||
|
|
||||||
@@ -36,6 +40,8 @@ string_script_t get_string_script(char *str, size_t len) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
is_ascii = is_ascii && ch < MAX_ASCII;
|
||||||
|
|
||||||
ptr += char_len;
|
ptr += char_len;
|
||||||
idx += char_len;
|
idx += char_len;
|
||||||
script_len += char_len;
|
script_len += char_len;
|
||||||
@@ -46,5 +52,5 @@ string_script_t get_string_script(char *str, size_t len) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (string_script_t) {last_script, script_len};
|
return (string_script_t) {last_script, script_len, is_ascii};
|
||||||
}
|
}
|
||||||
@@ -18,6 +18,7 @@ typedef struct script_languages {
|
|||||||
typedef struct string_script {
|
typedef struct string_script {
|
||||||
script_t script;
|
script_t script;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
bool ascii;
|
||||||
} string_script_t;
|
} string_script_t;
|
||||||
|
|
||||||
script_t get_char_script(uint32_t ch);
|
script_t get_char_script(uint32_t ch);
|
||||||
|
|||||||
Reference in New Issue
Block a user