From 475aa3dbfa19bf053494807f9c52f9abf22d2a32 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 31 Dec 2016 03:22:17 -0500 Subject: [PATCH] [strings] fixing and simplifying string tree iterator. This version is inspired by Python's itertools.product (itertoolsmodule.c has so many goodies) --- src/string_utils.c | 80 ++++++++++++---------------------------------- src/string_utils.h | 4 --- 2 files changed, 21 insertions(+), 63 deletions(-) diff --git a/src/string_utils.c b/src/string_utils.c index dd4aaf16..7a847d7f 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -214,6 +214,11 @@ char *utf8_case(const char *s, casing_option_t casing, utf8proc_option_t options return (char *)buffer; } +typedef enum casing_option { + UTF8_LOWER, + UTF8_UPPER +} casing_option_t; + inline char *utf8_lower_options(const char *s, utf8proc_option_t options) { return utf8_case(s, UTF8_LOWER, options); } @@ -1030,7 +1035,8 @@ inline uint32_t string_tree_num_strings(string_tree_t *self) { inline uint32_t string_tree_num_alternatives(string_tree_t *self, uint32_t i) { if (i >= self->token_indices->n) return 0; - return self->token_indices->a[i + 1] - self->token_indices->a[i]; + uint32_t n = self->token_indices->a[i + 1] - self->token_indices->a[i]; + return n > 0 ? n : 1; } void string_tree_destroy(string_tree_t *self) { @@ -1047,10 +1053,6 @@ void string_tree_destroy(string_tree_t *self) { free(self); } -#define STRING_TREE_ITER_DIRECTION_LEFT -1 -#define STRING_TREE_ITER_DIRECTION_RIGHT 1 - - string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { string_tree_iterator_t *self = malloc(sizeof(string_tree_iterator_t)); self->tree = tree; @@ -1061,8 +1063,6 @@ string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { // calloc since the first path through the tree is all zeros self->path = calloc(num_tokens, sizeof(uint32_t)); - self->num_alternatives = calloc(num_tokens, sizeof(uint32_t)); - uint32_t permutations = 1; uint32_t num_strings; @@ -1070,7 +1070,6 @@ string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { // N + 1 indices stored in token_indices, so this is always valid num_strings = string_tree_num_alternatives(tree, i); if (num_strings > 0) { - self->num_alternatives[i] = num_strings; // 1 or more strings in the string_tree means use those instead of the actual token permutations *= num_strings; } @@ -1078,66 +1077,33 @@ string_tree_iterator_t *string_tree_iterator_new(string_tree_t *tree) { if (permutations > 1) { self->remaining = (uint32_t)permutations; - self->single_path = false; } else{ self->remaining = 1; - self->single_path = true; } - // Start on the right going backward - self->cursor = self->num_tokens - 1; - self->direction = -1; - return self; } -static inline void string_tree_iterator_switch_direction(string_tree_iterator_t *self) { - self->direction *= -1; -} - -static inline void string_tree_iterator_reset_right(string_tree_iterator_t *self) { - size_t offset = self->cursor + 1; - size_t num_bytes = self->num_tokens - offset; - memset(self->path + offset, 0, sizeof(uint32_t) * num_bytes); -} - -static int string_tree_iterator_do_iteration(string_tree_iterator_t *self) { - uint32_t num_alternatives; - int32_t direction = self->direction; - - uint32_t sentinel = (direction == STRING_TREE_ITER_DIRECTION_LEFT ? -1 : self->num_tokens); - - for (uint32_t cursor = self->cursor; cursor != sentinel; cursor += direction) { - self->cursor = cursor; - num_alternatives = self->num_alternatives[cursor]; - if (num_alternatives > 1 && self->path[cursor] < num_alternatives - 1) { - self->path[cursor]++; - if (direction == STRING_TREE_ITER_DIRECTION_LEFT && self->cursor < self->num_tokens - 1) { - string_tree_iterator_reset_right(self); - self->cursor++; - } - string_tree_iterator_switch_direction(self); - self->remaining--; - return 1; - } - } - - return -1; -} - void string_tree_iterator_next(string_tree_iterator_t *self) { - if (!self->single_path && string_tree_iterator_do_iteration(self) == -1 && self->remaining > 0) { - string_tree_iterator_switch_direction(self); - if (string_tree_iterator_do_iteration(self) == -1) { + if (self->remaining > 0) { + int i; + for (i = self->num_tokens - 1; i >= 0; i--) { + self->path[i]++; + if (self->path[i] == string_tree_num_alternatives(self->tree, i)) { + self->path[i] = 0; + self->remaining--; + } else { + self->remaining--; + break; + } + } + + if (i < 0) { self->remaining = 0; } - } else if (self->single_path) { - self->remaining--; - return; } } - char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i) { if (i >= self->num_tokens) { return NULL; @@ -1159,9 +1125,5 @@ void string_tree_iterator_destroy(string_tree_iterator_t *self) { free(self->path); } - if (self->num_alternatives) { - free(self->num_alternatives); - } - free(self); } diff --git a/src/string_utils.h b/src/string_utils.h index be724252..706d3b8c 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -266,12 +266,8 @@ void string_tree_destroy(string_tree_t *self); typedef struct string_tree_iterator { string_tree_t *tree; - bool single_path; uint32_t *path; - uint32_t *num_alternatives; uint32_t num_tokens; - uint32_t cursor; - int8_t direction; // 1 or -1 uint32_t remaining; } string_tree_iterator_t;