diff --git a/src/expand.c b/src/expand.c index d4e5dc2d..622567d9 100644 --- a/src/expand.c +++ b/src/expand.c @@ -104,7 +104,7 @@ void add_normalized_strings_token(cstring_array *strings, char *str, token_t tok } } - if (is_numeric_token(token.type) && options.split_alpha_from_numeric && numeric_starts_with_alpha(str, token)) { + if (is_numeric_token(token.type) && options.split_alpha_from_numeric) { normalize_token_options |= NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC; normalize_token(strings, str, token, normalize_token_options); normalize_token_options ^= NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC; diff --git a/src/normalize.c b/src/normalize.c index ff21af9b..3e218e9d 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -423,15 +423,18 @@ void add_normalized_token(char_array *array, char *str, token_t token, uint64_t next_char_len = utf8proc_iterate(ptr + char_len, len, &next_ch); int next_cat = utf8proc_category(next_ch); bool next_is_number = utf8_is_number(next_cat); + bool next_is_letter = utf8_is_letter(next_cat); bool is_full_stop = ch == FULL_STOP_CODEPOINT; + bool is_hyphen_between_letter_and_number = is_hyphen && ((next_is_number && last_was_letter) || (next_is_letter && last_was_number)); + if (is_hyphen && options & NORMALIZE_TOKEN_REPLACE_HYPHENS && (!(last_was_number && next_is_number) || options & NORMALIZE_TOKEN_REPLACE_NUMERIC_HYPHENS)) { char_array_append(array, " "); append_char = false; } else if (is_hyphen && options & NORMALIZE_TOKEN_DELETE_HYPHENS) { - append_char = false; + append_char = !is_hyphen_between_letter_and_number; } if ((is_hyphen || is_full_stop) && token.type == NUMERIC && options & NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC && last_was_letter) { @@ -452,7 +455,7 @@ void add_normalized_token(char_array *array, char *str, token_t token, uint64_t append_char = false; } - if (options & NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC && token.type == NUMERIC && last_was_letter && is_number && !alpha_numeric_split) { + if (options & NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC && token.type == NUMERIC && ((last_was_letter && is_number) || (last_was_number && is_letter)) && !alpha_numeric_split) { char_array_append(array, " "); alpha_numeric_split = true; }