From 4bc6adf6699f3a3a991abd6d585d818458f01d40 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 10 Aug 2015 17:48:48 -0400 Subject: [PATCH] [normalize] Adding the original script as an alternative in transliteration mode as well --- src/normalize.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/normalize.c b/src/normalize.c index 6a0b2f43..5d21c8d0 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -120,12 +120,17 @@ string_tree_t *normalize_string(char *str, uint64_t options) { free(utf8_normalized); utf8_normalized = NULL; } - string_tree_finalize_token(tree); + } else if (options & NORMALIZE_STRING_LATIN_ASCII && script == SCRIPT_LATIN && script_len > 0) { add_latin_alternatives(tree, str, script_len, options); - string_tree_finalize_token(tree); } else if (options & NORMALIZE_STRING_TRANSLITERATE && script != SCRIPT_UNKNOWN && script_len > 0) { char *trans_name; + char *original = strndup(str, script_len); + if (original != NULL) { + add_latin_alternatives(tree, original, script_len, options); + free(original); + } + foreach_transliterator(script, "", trans_name, { transliterated = transliterate(trans_name, str, script_len); @@ -135,14 +140,16 @@ string_tree_t *normalize_string(char *str, uint64_t options) { } }) - string_tree_finalize_token(tree); } else { string_tree_add_string_len(tree, str, script_len); } + string_tree_finalize_token(tree); + consumed += script_len; str += script_len; } + return tree; }