diff --git a/src/transliterate.h b/src/transliterate.h index a4ff7715..739ce380 100644 --- a/src/transliterate.h +++ b/src/transliterate.h @@ -92,12 +92,12 @@ typedef struct transliteration_table { #define EMPTY_TRANSITION_CHAR "\x04" #define EMPTY_TRANSITION_CODEPOINT 4 #define EMPTY_TRANSITION_CHAR_LEN strlen(EMPTY_TRANSITION_CHAR) -#define REPEAT_ZERO_CHAR "\x05" -#define REPEAT_ZERO_CODEPOINT 5 -#define REPEAT_ZERO_CHAR_LEN strlen(REPEAT_ZERO_CHAR) -#define REPEAT_ONE_CHAR "\x06" -#define REPEAT_ONE_CODEPOINT 6 -#define REPEAT_ONE_CHAR_LEN strlen(REPEAT_ONE_CHAR) +#define REPEAT_CHAR "\x05" +#define REPEAT_CODEPOINT 5 +#define REPEAT_CHAR_LEN strlen(REPEAT_ZERO_CHAR) +#define GROUP_INDICATOR_CHAR "\x06" +#define GROUP_INDICATOR_CODEPOINT 6 +#define GROUP_INDICATOR_CHAR_LEN strlen(GROUP_INDICATOR_CHAR) #define BEGIN_SET_CHAR "\x0f" #define BEGIN_SET_CODEPOINT 15 #define BEGIN_SET_CHAR_LEN strlen(BEGIN_SET_CHAR) @@ -105,9 +105,6 @@ typedef struct transliteration_table { #define END_SET_CODEPOINT 14 #define END_SET_CHAR_LEN strlen(END_SET_CHAR) -#define GROUP_INDICATOR_CHAR "\x10" -#define GROUP_INDICATOR_CODEPOINT 16 -#define GROUP_INDICATOR_CHAR_LEN strlen(GROUP_INDICATOR_CHAR) #define DOLLAR_CODEPOINT 36 diff --git a/src/transliteration_table_builder.c b/src/transliteration_table_builder.c index 907e4f6a..2a6e45be 100644 --- a/src/transliteration_table_builder.c +++ b/src/transliteration_table_builder.c @@ -96,21 +96,6 @@ string_tree_t *regex_string_tree(char *regex, size_t len) { } else if ((codepoint == LPAREN_CODEPOINT || codepoint == RPAREN_CODEPOINT) && last_codepoint != BACKSLASH_CODEPOINT) { log_debug("group\n"); add_to_index = false; - } else if (codepoint == STAR_CODEPOINT && last_codepoint != BACKSLASH_CODEPOINT) { - log_debug("star\n"); - // For *, we add an optional transition to the empty - codepoint = REPEAT_ZERO_CODEPOINT; - } else if (codepoint == PLUS_CODEPOINT && last_codepoint != BACKSLASH_CODEPOINT) { - log_debug("plus\n"); - codepoint = REPEAT_ONE_CODEPOINT; - } else if (codepoint == DOLLAR_CODEPOINT && last_codepoint != BACKSLASH_CODEPOINT) { - log_debug("dollar\n"); - codepoint = WORD_BOUNDARY_CODEPOINT; - - if (in_set) { - uint32_array_push(char_set, codepoint); - add_to_index = false; - } } else if (in_set) { log_debug("in set\n"); // Queue node, we'll add them to the trie