[fix] Making a copy even on pure Latin-script transliteration since string_trim modifies in-place, occasionally causes issues

This commit is contained in:
Al
2015-12-19 01:31:52 -05:00
parent 39e83961ef
commit f8da44e8b0

View File

@@ -83,7 +83,9 @@ void add_latin_alternatives(string_tree_t *tree, char *str, size_t len, uint64_t
} }
} }
utf8_normalized = normalize_string_utf8(str, options); char *str_copy = strndup(str, len);
utf8_normalized = normalize_string_utf8(str_copy, options);
free(str_copy);
if (options & NORMALIZE_STRING_LATIN_ASCII && utf8_normalized != NULL) { if (options & NORMALIZE_STRING_LATIN_ASCII && utf8_normalized != NULL) {
transliterated = transliterate(LATIN_ASCII, utf8_normalized, strlen(utf8_normalized)); transliterated = transliterate(LATIN_ASCII, utf8_normalized, strlen(utf8_normalized));
@@ -112,8 +114,9 @@ string_tree_t *normalize_string(char *str, uint64_t options) {
size_t consumed = 0; size_t consumed = 0;
while (consumed < len) { char *copy;
while (consumed < len) {
string_script_t script_span = get_string_script(str, len - consumed); string_script_t script_span = get_string_script(str, len - consumed);
script_t script = script_span.script; script_t script = script_span.script;
size_t script_len = script_span.len; size_t script_len = script_span.len;
@@ -127,7 +130,7 @@ string_tree_t *normalize_string(char *str, uint64_t options) {
if (utf8_normalized != NULL) { if (utf8_normalized != NULL) {
if (options & NORMALIZE_STRING_LATIN_ASCII) { if (options & NORMALIZE_STRING_LATIN_ASCII) {
transliterated = transliterate(LATIN_ASCII, utf8_normalized, len); transliterated = transliterate(LATIN_ASCII, utf8_normalized, strlen(utf8_normalized));
if (transliterated != NULL) { if (transliterated != NULL) {
string_tree_add_string(tree, transliterated); string_tree_add_string(tree, transliterated);
free(transliterated); free(transliterated);
@@ -141,13 +144,19 @@ string_tree_t *normalize_string(char *str, uint64_t options) {
} }
} else if (options & NORMALIZE_STRING_LATIN_ASCII && script == SCRIPT_LATIN && script_len > 0) { } else if (options & NORMALIZE_STRING_LATIN_ASCII && script == SCRIPT_LATIN && script_len > 0) {
add_latin_alternatives(tree, str, script_len, options); copy = strndup(str, script_len);
} else if (options & NORMALIZE_STRING_TRANSLITERATE && script != SCRIPT_UNKNOWN && script_len > 0) { if (copy != NULL) {
add_latin_alternatives(tree, str, script_len, options);
free(copy);
copy = NULL;
}
} else if (options & NORMALIZE_STRING_TRANSLITERATE && script != SCRIPT_UNKNOWN && script_len > 0) {
char *trans_name; char *trans_name;
char *original = strndup(str, script_len); copy = strndup(str, script_len);
if (original != NULL) { if (copy != NULL) {
add_latin_alternatives(tree, original, script_len, options); add_latin_alternatives(tree, copy, script_len, options);
free(original); free(copy);
copy = NULL;
} }
foreach_transliterator(script, "", trans_name, { foreach_transliterator(script, "", trans_name, {
@@ -166,7 +175,7 @@ string_tree_t *normalize_string(char *str, uint64_t options) {
consumed += script_len; consumed += script_len;
str += script_len; str += script_len;
} }
return tree; return tree;