diff --git a/src/libpostal.c b/src/libpostal.c index b68aacb7..d226413e 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -310,7 +310,7 @@ static string_tree_t *add_string_alternatives(char *str, libpostal_normalize_opt if (expansion.canonical_index != NULL_CANONICAL_INDEX) { char *canonical = address_dictionary_get_canonical(expansion.canonical_index); - char *canonical_normalized = normalize_string_utf8(canonical, normalize_string_options); + char *canonical_normalized = normalize_string_latin(canonical, strlen(canonical), normalize_string_options); canonical = canonical_normalized != NULL ? canonical_normalized : canonical; @@ -533,7 +533,7 @@ static inline void cat_affix_expansion(char_array *key, char *str, address_expan if (expansion.canonical_index != NULL_CANONICAL_INDEX) { char *canonical = address_dictionary_get_canonical(expansion.canonical_index); uint64_t normalize_string_options = get_normalize_string_options(options); - char *canonical_normalized = normalize_string_utf8(canonical, normalize_string_options); + char *canonical_normalized = normalize_string_latin(canonical, strlen(canonical), normalize_string_options); canonical = canonical_normalized != NULL ? canonical_normalized : canonical; char_array_cat(key, canonical); diff --git a/src/normalize.c b/src/normalize.c index 1adecdc6..076b6e56 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -114,13 +114,19 @@ char *normalize_string_utf8(char *str, uint64_t options) { char *normalize_string_latin_languages(char *str, size_t len, uint64_t options, size_t num_languages, char **languages) { - char *latin_transliterator = LATIN_ASCII; + char *transliterated = NULL; + char *latin_transliterator = NULL; + if (options & NORMALIZE_STRING_SIMPLE_LATIN_ASCII) { latin_transliterator = LATIN_ASCII_SIMPLE; + } else if (options & NORMALIZE_STRING_LATIN_ASCII) { + latin_transliterator = LATIN_ASCII; + } + + if (latin_transliterator != NULL) { + transliterated = transliterate(latin_transliterator, str, len); } - char *transliterated = transliterate(latin_transliterator, str, len); - char *utf8_normalized; if (transliterated == NULL) { utf8_normalized = normalize_string_utf8_languages(str, options, num_languages, languages);