[fix] Compiler warnings for casts/printf
This commit is contained in:
@@ -113,7 +113,7 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) {
|
||||
|
||||
size_t len = strlen(str);
|
||||
|
||||
log_debug("tokenized, num tokens=%d\n", tokens->n);
|
||||
log_debug("tokenized, num tokens=%zu\n", tokens->n);
|
||||
|
||||
phrase_language_array *phrases = NULL;
|
||||
phrase_array *lang_phrases = NULL;
|
||||
@@ -155,7 +155,7 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) {
|
||||
string_tree_t *tree = string_tree_new_size(len);
|
||||
|
||||
if (phrases != NULL) {
|
||||
log_debug("phrases not NULL, n=%d\n", phrases->n);
|
||||
log_debug("phrases not NULL, n=%zu\n", phrases->n);
|
||||
ks_introsort(phrase_language_array, phrases->n, phrases->a);
|
||||
|
||||
phrase_language_t phrase_lang;
|
||||
@@ -181,7 +181,7 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) {
|
||||
for (int j = start; j < end; j++) {
|
||||
token_t token = tokens->a[j];
|
||||
if (token.type != WHITESPACE) {
|
||||
log_debug("Adding previous token, %.*s\n", token.len, str + token.offset);
|
||||
log_debug("Adding previous token, %.*s\n", (int)token.len, str + token.offset);
|
||||
|
||||
string_tree_add_string_len(tree, str + token.offset, token.len);
|
||||
} else {
|
||||
@@ -250,7 +250,7 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) {
|
||||
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
||||
token = tokens->a[j];
|
||||
if (token.type != WHITESPACE) {
|
||||
log_debug("Adding previous token, %.*s\n", token.len, str + token.offset);
|
||||
log_debug("Adding previous token, %.*s\n", (int)token.len, str + token.offset);
|
||||
string_tree_add_string_len(tree, str + token.offset, token.len);
|
||||
} else {
|
||||
string_tree_add_string(tree, " ");
|
||||
@@ -271,7 +271,7 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) {
|
||||
for (int j = start; j < end; j++) {
|
||||
token_t token = tokens->a[j];
|
||||
if (token.type != WHITESPACE) {
|
||||
log_debug("Adding previous token, %.*s\n", token.len, str + token.offset);
|
||||
log_debug("Adding previous token, %.*s\n", (int)token.len, str + token.offset);
|
||||
|
||||
string_tree_add_string_len(tree, str + token.offset, token.len);
|
||||
} else {
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
char *normalize_string_utf8(char *str, uint64_t options) {
|
||||
int utf8proc_options = UTF8PROC_OPTIONS_BASE | UTF8PROC_IGNORE | UTF8PROC_NLF2LF | UTF8PROC_STRIPCC;
|
||||
uint8_t *utf8proc_normalized = NULL;
|
||||
ssize_t normalized_len = 0;
|
||||
|
||||
bool have_utf8proc_options = false;
|
||||
|
||||
@@ -31,7 +30,7 @@ char *normalize_string_utf8(char *str, uint64_t options) {
|
||||
}
|
||||
|
||||
if (have_utf8proc_options) {
|
||||
ssize_t normalized_len = utf8proc_map((uint8_t *)str, 0, &utf8proc_normalized, utf8proc_options);
|
||||
utf8proc_map((uint8_t *)str, 0, &utf8proc_normalized, utf8proc_options);
|
||||
return (char *)utf8proc_normalized;
|
||||
}
|
||||
|
||||
@@ -113,7 +112,6 @@ string_tree_t *normalize_string(char *str, uint64_t options) {
|
||||
|
||||
char *utf8_normalized = NULL;
|
||||
char *transliterated = NULL;
|
||||
char *ascii = NULL;
|
||||
|
||||
if (options & NORMALIZE_STRING_LOWERCASE && is_ascii) {
|
||||
utf8_normalized = normalize_string_utf8(str, NORMALIZE_STRING_LOWERCASE);
|
||||
|
||||
@@ -406,7 +406,7 @@ bool numex_table_read(FILE *f) {
|
||||
goto exit_numex_table_load_error;
|
||||
}
|
||||
|
||||
log_debug("read num_languages = %d\n", num_languages);
|
||||
log_debug("read num_languages = %zu\n", num_languages);
|
||||
|
||||
int i = 0;
|
||||
|
||||
@@ -745,7 +745,7 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
||||
bool set_rule = false;
|
||||
state.state = NUMEX_SEARCH_STATE_MATCH;
|
||||
|
||||
log_debug("phrase.len=%lld, phrase.data=%d\n", phrase.len, phrase.data);
|
||||
log_debug("phrase.len=%u, phrase.data=%d\n", phrase.len, phrase.data);
|
||||
|
||||
rule = get_numex_rule((size_t)phrase.data);
|
||||
log_debug("rule.value=%lld\n", rule.value);
|
||||
@@ -771,7 +771,7 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
||||
}
|
||||
result.len = idx + phrase.start + phrase.len - result.start;
|
||||
|
||||
log_debug("idx=%d, phrase.len=%d\n", idx, phrase.len);
|
||||
log_debug("idx=%zu, phrase.len=%d\n", idx, phrase.len);
|
||||
|
||||
log_debug("prev_rule.radix=%d\n", prev_rule.radix);
|
||||
|
||||
|
||||
@@ -744,7 +744,7 @@ char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
int32_t ch = 0;
|
||||
ssize_t char_len = 0;
|
||||
uint8_t *ptr = (uint8_t *)str;
|
||||
uint64_t idx = 0;
|
||||
size_t idx = 0;
|
||||
|
||||
char *original_str = str;
|
||||
char_array *revisit = NULL;
|
||||
@@ -770,7 +770,7 @@ char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
|
||||
if (ch == 0) break;
|
||||
|
||||
log_debug("Got char '%.*s' at idx=%llu\n", (int)char_len, str + idx, idx);
|
||||
log_debug("Got char '%.*s' at idx=%zu\n", (int)char_len, str + idx, idx);
|
||||
|
||||
state = state_transition(trie, str, idx, char_len, prev_state);
|
||||
set_match_if_any(trie, state, &match_state);
|
||||
@@ -783,7 +783,6 @@ char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
log_debug("end of partial or last char, prev start=%zd, prev len=%zu\n", prev_state.phrase_start, prev_state.phrase_len);
|
||||
|
||||
bool context_no_match = false;
|
||||
bool empty_context_match = false;
|
||||
|
||||
bool is_last_char = idx + char_len == len;
|
||||
|
||||
@@ -1293,7 +1292,7 @@ bool transliterator_write(transliterator_t *trans, FILE *f) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!file_write_uint32(f, trans->steps_length)) {
|
||||
if (!file_write_uint32(f, (uint32_t)trans->steps_length)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1913,7 +1912,7 @@ bool transliteration_module_setup(char *filename) {
|
||||
return transliteration_table_load(filename == NULL ? DEFAULT_TRANSLITERATION_PATH : filename);
|
||||
}
|
||||
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ VECTOR_INIT_FREE_DATA(transliteration_replacement_array, transliteration_replace
|
||||
|
||||
KHASH_MAP_INIT_STR(str_transliterator, transliterator_t *)
|
||||
|
||||
#define kh_script_lang_hash(key) ((uint64_t)(key).script ^ (((key).language == NULL) ? 0 : kh_str_hash_func((key).language)))
|
||||
#define kh_script_lang_hash(key) ((khint_t)(key).script ^ (((key).language == NULL) ? 0 : kh_str_hash_func((key).language)))
|
||||
#define kh_script_lang_equal(a, b) (((a).script == (b).script) && strcmp((a).language, (b).language) == 0)
|
||||
|
||||
typedef struct transliterator_index {
|
||||
@@ -153,14 +153,14 @@ char *transliterate(char *trans_name, char *str, size_t len);
|
||||
bool transliteration_table_add_script_language(script_language_t script_language, transliterator_index_t index);
|
||||
transliterator_index_t get_transliterator_index_for_script_language(script_t script, char *language);
|
||||
|
||||
#define foreach_transliterator(script, language, transliterator_var, code) do { \
|
||||
transliteration_table_t *__trans_table = get_transliteration_table(); \
|
||||
transliterator_index_t __index = get_transliterator_index_for_script_language(script, language); \
|
||||
for (int __i = __index.transliterator_index; __i < __index.transliterator_index + __index.num_transliterators; __i++) { \
|
||||
transliterator_var = cstring_array_get_string(__trans_table->transliterator_names, __i); \
|
||||
if (transliterator_var == NULL) break; \
|
||||
code; \
|
||||
} \
|
||||
#define foreach_transliterator(script, language, transliterator_var, code) do { \
|
||||
transliteration_table_t *__trans_table = get_transliteration_table(); \
|
||||
transliterator_index_t __index = get_transliterator_index_for_script_language(script, language); \
|
||||
for (size_t __i = __index.transliterator_index; __i < __index.transliterator_index + __index.num_transliterators; __i++) { \
|
||||
transliterator_var = cstring_array_get_string(__trans_table->transliterator_names, (uint32_t)__i); \
|
||||
if (transliterator_var == NULL) break; \
|
||||
code; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
bool transliteration_table_write(FILE *file);
|
||||
|
||||
24
src/trie.c
24
src/trie.c
@@ -192,7 +192,7 @@ static bool trie_extend(trie_t *self, uint32_t to_index) {
|
||||
if (to_index < self->nodes->n)
|
||||
return true;
|
||||
|
||||
new_begin = self->nodes->n;
|
||||
new_begin = (uint32_t)self->nodes->n;
|
||||
|
||||
for (i = new_begin; i < to_index + 1; i++) {
|
||||
trie_node_array_push(self->nodes, (trie_node_t){-(i-1), -(i+1)});
|
||||
@@ -346,7 +346,7 @@ static uint32_t trie_find_new_base(trie_t *self, unsigned char *transitions, int
|
||||
while (!trie_can_fit_transitions(self, index - first_char_index, transitions, num_transitions)) {
|
||||
trie_node_t node = trie_get_node(self, index);
|
||||
if (-node.check == FREE_LIST_ID) {
|
||||
if (!trie_extend(self, self->nodes->n+self->alphabet_size)) {
|
||||
if (!trie_extend(self, (uint32_t) self->nodes->n + self->alphabet_size)) {
|
||||
log_error("Trie index error extending to %d\n", index);
|
||||
return TRIE_INDEX_ERROR;
|
||||
}
|
||||
@@ -456,8 +456,8 @@ void trie_add_tail(trie_t *self, unsigned char *tail) {
|
||||
|
||||
void trie_set_tail(trie_t *self, unsigned char *tail, int32_t tail_pos) {
|
||||
log_debug("Setting tail: %s at pos %d\n", tail, tail_pos);
|
||||
int tail_len = strlen((char *)tail);
|
||||
int num_appends = (tail_pos + tail_len) - self->tail->n;
|
||||
size_t tail_len = strlen((char *)tail);
|
||||
size_t num_appends = ((size_t)tail_pos + tail_len) - self->tail->n;
|
||||
int i = 0;
|
||||
|
||||
// Pad with 0s if we're short
|
||||
@@ -536,9 +536,9 @@ int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tai
|
||||
if (*tail != '\0') tail++;
|
||||
|
||||
log_debug("Separating node at index %d into char %c with tail %s\n", from_index, c, tail);
|
||||
trie_set_base(self, index, -1 * self->data->n);
|
||||
trie_set_base(self, index, -1 * (int32_t)self->data->n);
|
||||
|
||||
trie_data_array_push(self->data, (trie_data_node_t){self->tail->n, data});
|
||||
trie_data_array_push(self->data, (trie_data_node_t){(uint32_t)self->tail->n, data});
|
||||
trie_add_tail(self, tail);
|
||||
|
||||
return index;
|
||||
@@ -557,9 +557,9 @@ void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix,
|
||||
unsigned char *old_tail = original_tail;
|
||||
log_debug("Merging existing tail %s with new tail %s, node_id=%d\n", original_tail, suffix, old_node_id);
|
||||
|
||||
int common_prefix = string_common_prefix((char *)old_tail, (char *)suffix);
|
||||
int old_tail_len = strlen((char *)old_tail);
|
||||
int suffix_len = strlen((char *)suffix);
|
||||
size_t common_prefix = string_common_prefix((char *)old_tail, (char *)suffix);
|
||||
size_t old_tail_len = strlen((char *)old_tail);
|
||||
size_t suffix_len = strlen((char *)suffix);
|
||||
if (common_prefix == old_tail_len && old_tail_len == suffix_len) {
|
||||
log_debug("Key already exists, setting value to %d\n", data);
|
||||
self->data->a[old_data_index] = (trie_data_node_t) {old_tail_pos, data};
|
||||
@@ -567,7 +567,7 @@ void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix,
|
||||
}
|
||||
|
||||
uint32_t node_id = old_node_id;
|
||||
log_debug("common_prefix=%d\n", common_prefix);
|
||||
log_debug("common_prefix=%zu\n", common_prefix);
|
||||
|
||||
for (int i=0; i < common_prefix; i++) {
|
||||
c = old_tail[i];
|
||||
@@ -817,11 +817,11 @@ inline bool trie_set_data_at_index(trie_t *self, uint32_t index, uint32_t data)
|
||||
}
|
||||
|
||||
inline bool trie_set_data(trie_t *self, char *key, uint32_t data) {
|
||||
if (index == NULL_NODE_ID) {
|
||||
uint32_t node_id = trie_get(self, key);
|
||||
if (node_id == NULL_NODE_ID) {
|
||||
return trie_add(self, key, data);
|
||||
}
|
||||
|
||||
uint32_t node_id = trie_get(self, key);
|
||||
return trie_set_data_at_index(self, node_id, data);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,8 +22,9 @@ bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, ph
|
||||
uint32_t next_id;
|
||||
|
||||
bool match = false;
|
||||
uint64_t index = 0;
|
||||
int phrase_len = 0, phrase_start = 0;
|
||||
uint32_t index = 0;
|
||||
uint32_t phrase_len = 0;
|
||||
uint32_t phrase_start = 0;
|
||||
uint32_t data;
|
||||
|
||||
trie_search_state_t state = SEARCH_STATE_BEGIN, last_state = SEARCH_STATE_BEGIN;
|
||||
@@ -91,7 +92,7 @@ bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, ph
|
||||
log_debug("node.check == node_id\n");
|
||||
state = SEARCH_STATE_PARTIAL_MATCH;
|
||||
if (last_state == SEARCH_STATE_NO_MATCH || last_state == SEARCH_STATE_BEGIN) {
|
||||
log_debug("phrase_start=%llu\n", index);
|
||||
log_debug("phrase_start=%u\n", index);
|
||||
phrase_start = index;
|
||||
fail_ptr = ptr + remaining;
|
||||
}
|
||||
@@ -113,7 +114,7 @@ bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, ph
|
||||
log_debug("Tail matches\n");
|
||||
last_state = state;
|
||||
data = data_node.data;
|
||||
log_debug("%llu, %d, %zu\n", index, phrase_len, tail_len);
|
||||
log_debug("%u, %d, %zu\n", index, phrase_len, tail_len);
|
||||
ptr += tail_len;
|
||||
index += tail_len;
|
||||
advance_index = false;
|
||||
@@ -140,7 +141,7 @@ bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, ph
|
||||
log_debug("Transition to NUL byte matched\n");
|
||||
state = SEARCH_STATE_MATCH;
|
||||
match = true;
|
||||
phrase_len = index + len - phrase_start;
|
||||
phrase_len = index + (uint32_t)len - phrase_start;
|
||||
if (terminal_node.base < 0) {
|
||||
int32_t data_index = -1*terminal_node.base;
|
||||
trie_data_node_t data_node = self->data->a[data_index];
|
||||
@@ -168,7 +169,7 @@ bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, ph
|
||||
if (advance_index) index += len;
|
||||
|
||||
advance_index = true;
|
||||
log_debug("index now %llu\n", index);
|
||||
log_debug("index now %u\n", index);
|
||||
} // while
|
||||
|
||||
return true;
|
||||
@@ -186,7 +187,7 @@ inline phrase_array *trie_search(trie_t *self, char *text) {
|
||||
return phrases;
|
||||
}
|
||||
|
||||
int trie_node_search_tail_tokens(trie_t *self, trie_node_t node, char *str, token_array *tokens, int tail_index, int token_index) {
|
||||
int trie_node_search_tail_tokens(trie_t *self, trie_node_t node, char *str, token_array *tokens, size_t tail_index, int token_index) {
|
||||
int32_t data_index = -1*node.base;
|
||||
trie_data_node_t old_data_node = self->data->a[data_index];
|
||||
uint32_t current_tail_pos = old_data_node.tail;
|
||||
@@ -203,7 +204,7 @@ int trie_node_search_tail_tokens(trie_t *self, trie_node_t node, char *str, toke
|
||||
token_t token = tokens->a[i];
|
||||
|
||||
char *ptr = str + token.offset;
|
||||
int token_length = token.len;
|
||||
size_t token_length = token.len;
|
||||
|
||||
if (!(*tail_ptr)) {
|
||||
log_debug("tail matches!\n");
|
||||
@@ -242,7 +243,7 @@ bool trie_search_tokens_from_index(trie_t *self, char *str, token_array *tokens,
|
||||
trie_search_state_t state = SEARCH_STATE_BEGIN, last_state = SEARCH_STATE_BEGIN;
|
||||
|
||||
token_t token;
|
||||
size_t token_length, token_consumed;
|
||||
size_t token_length;
|
||||
|
||||
log_debug("num_tokens: %zu\n", tokens->n);
|
||||
for (int i = 0; i < tokens->n; i++, last_state = state) {
|
||||
@@ -460,7 +461,6 @@ phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, u
|
||||
const uint8_t *ptr = (const uint8_t *)word;
|
||||
const uint8_t *char_ptr;
|
||||
|
||||
bool done = false;
|
||||
bool in_tail = false;
|
||||
unsigned char *current_tail = (unsigned char *)"";
|
||||
size_t tail_remaining = 0;
|
||||
@@ -484,13 +484,13 @@ phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, u
|
||||
current_tail++;
|
||||
if (i == char_len - 1) {
|
||||
phrase_len += char_len;
|
||||
phrase_start = index;
|
||||
phrase_start = (uint32_t)index;
|
||||
}
|
||||
continue;
|
||||
} else if (in_tail && tail_remaining == 0 && i == char_len - 1) {
|
||||
log_debug("tail match!\n");
|
||||
phrase_start = index + char_len;
|
||||
phrase_len = len - index - char_len;
|
||||
phrase_start = (uint32_t)(index + char_len);
|
||||
phrase_len = (uint32_t)(len - index - char_len);
|
||||
value = tail_value;
|
||||
index = 0;
|
||||
break;
|
||||
@@ -523,8 +523,8 @@ phrase_t trie_search_suffixes_from_index(trie_t *self, char *word, size_t len, u
|
||||
in_tail = true;
|
||||
|
||||
if (tail_remaining == 0) {
|
||||
phrase_start = index;
|
||||
phrase_len = len - index;
|
||||
phrase_start = (uint32_t)index;
|
||||
phrase_len = (uint32_t)(len - index);
|
||||
value = tail_value;
|
||||
index = 0;
|
||||
break;
|
||||
@@ -581,7 +581,7 @@ phrase_t trie_search_prefixes_from_index(trie_t *self, char *word, size_t len, u
|
||||
|
||||
ssize_t char_len = 0;
|
||||
|
||||
size_t idx = 0;
|
||||
uint32_t idx = 0;
|
||||
|
||||
size_t separator_char_len = 0;
|
||||
|
||||
@@ -593,8 +593,6 @@ phrase_t trie_search_prefixes_from_index(trie_t *self, char *word, size_t len, u
|
||||
trie_node_t terminal_node;
|
||||
|
||||
for (; idx < len; last_node = node, last_node_id = node_id) {
|
||||
unsigned char ch = *ptr;
|
||||
|
||||
log_debug("Getting transition index for %d, (%d, %d)\n", last_node_id, last_node.base, last_node.check);
|
||||
node_id = trie_get_transition_index(self, last_node, *ptr);
|
||||
node = trie_get_node(self, node_id);
|
||||
@@ -648,7 +646,7 @@ phrase_t trie_search_prefixes_from_index(trie_t *self, char *word, size_t len, u
|
||||
|
||||
if (match_len >= current_tail_len) {
|
||||
if (first_char) phrase_start = idx;
|
||||
phrase_len = (idx + match_len + 1) - phrase_start;
|
||||
phrase_len = (uint32_t)(idx + match_len + 1) - phrase_start;
|
||||
|
||||
log_debug("tail match! phrase_len=%u\n", phrase_len);
|
||||
value = data_node.data;
|
||||
|
||||
Reference in New Issue
Block a user