[fix] merging repeat codepoints in trie builder

This commit is contained in:
Al
2015-05-22 22:45:23 -04:00
parent c00ecf6ea8
commit 31cc2bb5d1
2 changed files with 6 additions and 24 deletions

View File

@@ -92,12 +92,12 @@ typedef struct transliteration_table {
#define EMPTY_TRANSITION_CHAR "\x04"
#define EMPTY_TRANSITION_CODEPOINT 4
#define EMPTY_TRANSITION_CHAR_LEN strlen(EMPTY_TRANSITION_CHAR)
#define REPEAT_ZERO_CHAR "\x05"
#define REPEAT_ZERO_CODEPOINT 5
#define REPEAT_ZERO_CHAR_LEN strlen(REPEAT_ZERO_CHAR)
#define REPEAT_ONE_CHAR "\x06"
#define REPEAT_ONE_CODEPOINT 6
#define REPEAT_ONE_CHAR_LEN strlen(REPEAT_ONE_CHAR)
#define REPEAT_CHAR "\x05"
#define REPEAT_CODEPOINT 5
#define REPEAT_CHAR_LEN strlen(REPEAT_ZERO_CHAR)
#define GROUP_INDICATOR_CHAR "\x06"
#define GROUP_INDICATOR_CODEPOINT 6
#define GROUP_INDICATOR_CHAR_LEN strlen(GROUP_INDICATOR_CHAR)
#define BEGIN_SET_CHAR "\x0f"
#define BEGIN_SET_CODEPOINT 15
#define BEGIN_SET_CHAR_LEN strlen(BEGIN_SET_CHAR)
@@ -105,9 +105,6 @@ typedef struct transliteration_table {
#define END_SET_CODEPOINT 14
#define END_SET_CHAR_LEN strlen(END_SET_CHAR)
#define GROUP_INDICATOR_CHAR "\x10"
#define GROUP_INDICATOR_CODEPOINT 16
#define GROUP_INDICATOR_CHAR_LEN strlen(GROUP_INDICATOR_CHAR)
#define DOLLAR_CODEPOINT 36

View File

@@ -96,21 +96,6 @@ string_tree_t *regex_string_tree(char *regex, size_t len) {
} else if ((codepoint == LPAREN_CODEPOINT || codepoint == RPAREN_CODEPOINT) && last_codepoint != BACKSLASH_CODEPOINT) {
log_debug("group\n");
add_to_index = false;
} else if (codepoint == STAR_CODEPOINT && last_codepoint != BACKSLASH_CODEPOINT) {
log_debug("star\n");
// For *, we add an optional transition to the empty
codepoint = REPEAT_ZERO_CODEPOINT;
} else if (codepoint == PLUS_CODEPOINT && last_codepoint != BACKSLASH_CODEPOINT) {
log_debug("plus\n");
codepoint = REPEAT_ONE_CODEPOINT;
} else if (codepoint == DOLLAR_CODEPOINT && last_codepoint != BACKSLASH_CODEPOINT) {
log_debug("dollar\n");
codepoint = WORD_BOUNDARY_CODEPOINT;
if (in_set) {
uint32_array_push(char_set, codepoint);
add_to_index = false;
}
} else if (in_set) {
log_debug("in set\n");
// Queue node, we'll add them to the trie