[utf8] Adding a boolean struct member for string_script_t return values, set to true if the string is ASCII (no transliteration needed, should be frequent for English addresses)

This commit is contained in:
Al
2015-06-28 19:37:53 -04:00
parent f0bf7e750c
commit cc0401a8d1
2 changed files with 8 additions and 1 deletions

View File

@@ -2,6 +2,8 @@
#include "unicode_scripts_data.c"
#define MAX_ASCII 128
inline script_t get_char_script(uint32_t ch) {
if (ch > NUM_CODEPOINTS - 1) return SCRIPT_UNKNOWN;
return char_scripts[ch];
@@ -21,6 +23,8 @@ string_script_t get_string_script(char *str, size_t len) {
size_t script_len = 0;
size_t idx = 0;
bool is_ascii = true;
while (idx < len) {
ssize_t char_len = utf8proc_iterate(ptr, -1, &ch);
@@ -36,6 +40,8 @@ string_script_t get_string_script(char *str, size_t len) {
break;
}
is_ascii = is_ascii && ch < MAX_ASCII;
ptr += char_len;
idx += char_len;
script_len += char_len;
@@ -46,5 +52,5 @@ string_script_t get_string_script(char *str, size_t len) {
}
return (string_script_t) {last_script, script_len};
return (string_script_t) {last_script, script_len, is_ascii};
}

View File

@@ -18,6 +18,7 @@ typedef struct script_languages {
typedef struct string_script {
script_t script;
size_t len;
bool ascii;
} string_script_t;
script_t get_char_script(uint32_t ch);