[utils] cstring_array_get_token=>cstring_array_get_string
This commit is contained in:
@@ -614,7 +614,7 @@ inline int32_t cstring_array_get_offset(cstring_array *self, uint32_t i) {
|
||||
return (int32_t)self->indices->a[i];
|
||||
}
|
||||
|
||||
inline char *cstring_array_get_token(cstring_array *self, uint32_t i) {
|
||||
inline char *cstring_array_get_string(cstring_array *self, uint32_t i) {
|
||||
int32_t data_index = cstring_array_get_offset(self, i);
|
||||
if (data_index < 0) return NULL;
|
||||
return self->str->a + data_index;
|
||||
@@ -822,7 +822,7 @@ char *string_tree_iterator_get_string(string_tree_iterator_t *self, uint32_t i)
|
||||
uint32_t base_index = self->tree->token_indices->a[i];
|
||||
uint32_t offset = self->path[i];
|
||||
|
||||
return cstring_array_get_token(self->tree->strings, base_index + offset);
|
||||
return cstring_array_get_string(self->tree->strings, base_index + offset);
|
||||
}
|
||||
|
||||
bool string_tree_iterator_done(string_tree_iterator_t *self) {
|
||||
|
||||
@@ -145,7 +145,7 @@ uint32_t cstring_array_add_string_len(cstring_array *self, char *str, size_t len
|
||||
void cstring_array_append_string(cstring_array *self, char *str);
|
||||
void cstring_array_append_string_len(cstring_array *self, char *str, size_t len);
|
||||
int32_t cstring_array_get_offset(cstring_array *self, uint32_t i);
|
||||
char *cstring_array_get_token(cstring_array *self, uint32_t i);
|
||||
char *cstring_array_get_string(cstring_array *self, uint32_t i);
|
||||
int64_t cstring_array_token_length(cstring_array *self, uint32_t i);
|
||||
|
||||
void cstring_array_destroy(cstring_array *self);
|
||||
|
||||
@@ -37,7 +37,7 @@ tokenized_string_t *tokenized_string_from_tokens(char *src, token_array *tokens)
|
||||
|
||||
char *tokenized_string_get_token(tokenized_string_t *self, uint32_t index) {
|
||||
if (index < self->tokens->n) {
|
||||
return cstring_array_get_token(self->str, index);
|
||||
return cstring_array_get_string(self->str, index);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -611,7 +611,7 @@ static char *replace_groups(trie_t *trie, char *str, char *replacement, group_ca
|
||||
log_debug("in group ref\n");
|
||||
sscanf((char *)replacement_ptr, "%d", &group_ref);
|
||||
log_debug("Got group_ref=%d\n", group_ref);
|
||||
char *group = cstring_array_get_token(group_strings, group_ref-1);
|
||||
char *group = cstring_array_get_string(group_strings, group_ref-1);
|
||||
log_debug("Got group=%s\n", group);
|
||||
if (group != NULL) {
|
||||
char_array_cat(ret, group);
|
||||
@@ -817,11 +817,11 @@ char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
|
||||
|
||||
if (replacement != NULL) {
|
||||
char *replacement_string = cstring_array_get_token(trans_table->replacement_strings, replacement->string_index);
|
||||
char *replacement_string = cstring_array_get_string(trans_table->replacement_strings, replacement->string_index);
|
||||
char *revisit_string = NULL;
|
||||
if (replacement->revisit_index != 0) {
|
||||
log_debug("revisit_index = %d\n", replacement->revisit_index);
|
||||
revisit_string = cstring_array_get_token(trans_table->revisit_strings, replacement->revisit_index);
|
||||
revisit_string = cstring_array_get_string(trans_table->revisit_strings, replacement->revisit_index);
|
||||
}
|
||||
|
||||
bool free_revisit = false;
|
||||
@@ -936,17 +936,17 @@ char *transliterate(char *trans_name, char *str, size_t len) {
|
||||
|
||||
} else if (step->type == STEP_UNICODE_NORMALIZATION) {
|
||||
log_debug("unicode normalization\n");
|
||||
int utf8proc_options = UTF8PROC_NULLTERM | UTF8PROC_STABLE;
|
||||
int utf8proc_options = UTF8PROC_OPTIONS_BASE;
|
||||
if (strcmp(step->name, NFD) == 0) {
|
||||
utf8proc_options = utf8proc_options | UTF8PROC_DECOMPOSE;
|
||||
utf8proc_options = UTF8PROC_OPTIONS_NFD;
|
||||
} else if (strcmp(step->name, NFC) == 0) {
|
||||
utf8proc_options = utf8proc_options | UTF8PROC_COMPOSE;
|
||||
utf8proc_options = UTF8PROC_OPTIONS_NFC;
|
||||
} else if (strcmp(step->name, NFKD) == 0) {
|
||||
utf8proc_options = utf8proc_options | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT;
|
||||
utf8proc_options = UTF8PROC_OPTIONS_NFKD;
|
||||
} else if (strcmp(step->name, NFKC) == 0) {
|
||||
utf8proc_options = utf8proc_options | UTF8PROC_COMPOSE | UTF8PROC_COMPAT;
|
||||
utf8proc_options = UTF8PROC_OPTIONS_NKFC;
|
||||
} else if (strcmp(step->name, STRIP_MARK) == 0) {
|
||||
utf8proc_options = utf8proc_options | UTF8PROC_STRIPMARK;
|
||||
utf8proc_options = UTF8PROC_OPTIONS_STRIP_ACCENTS;
|
||||
}
|
||||
|
||||
uint8_t *utf8proc_normalized = NULL;
|
||||
@@ -1199,7 +1199,7 @@ char *transliterator_replace_strings(trie_t *trie, cstring_array *replacements,
|
||||
phrase = phrases->a[i];
|
||||
end = phrase.start;
|
||||
char_array_append_len(str, input + start, end - start);
|
||||
char_array_append(str, cstring_array_get_token(replacements, phrase.data));
|
||||
char_array_append(str, cstring_array_get_string(replacements, phrase.data));
|
||||
start = phrase.start + phrase.len;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#include "trie_search.h"
|
||||
#include "unicode_scripts.h"
|
||||
|
||||
#define LATIN_ASCII "latin-ascii"
|
||||
|
||||
#define DEFAULT_TRANSLITERATION_PATH "../data/transliteration/transliteration.dat"
|
||||
|
||||
#define MAX_TRANS_NAME_LEN 100
|
||||
@@ -160,7 +162,7 @@ transliterator_index_t get_transliterator_index_for_script_language(script_t scr
|
||||
#define foreach_transliterator(script, language, transliterator_var, code) do { \
|
||||
transliterator_index_t __index = get_transliterator_index_for_script_language(script, language); \
|
||||
for (int __i = __index.transliterator_index; __i < __index.transliterator_index + __index.num_transliterators; __i++) { \
|
||||
transliterator_var = cstring_array_get_token(trans_table->transliterator_names, __i); \
|
||||
transliterator_var = cstring_array_get_string(trans_table->transliterator_names, __i); \
|
||||
if (transliterator_var == NULL) break; \
|
||||
code; \
|
||||
} \
|
||||
|
||||
@@ -457,7 +457,7 @@ int main(int argc, char **argv) {
|
||||
for (ante = 0; ante < num_pre_context_strings; ante++) {
|
||||
char_array_clear(context);
|
||||
|
||||
token = cstring_array_get_token(pre_context_strings, ante);
|
||||
token = cstring_array_get_string(pre_context_strings, ante);
|
||||
if (token == NULL || strlen(token) == 0) {
|
||||
log_error("pre_context token was NULL or 0 length\n");
|
||||
goto exit_teardown;
|
||||
@@ -469,7 +469,7 @@ int main(int argc, char **argv) {
|
||||
for (post = 0; post < num_post_context_strings; post++) {
|
||||
context->n = context_len;
|
||||
char_array_cat(context, POST_CONTEXT_CHAR);
|
||||
token = cstring_array_get_token(post_context_strings, post);
|
||||
token = cstring_array_get_string(post_context_strings, post);
|
||||
char_array_cat(context, token);
|
||||
if (token == NULL || strlen(token) == 0) {
|
||||
log_error("post_context token was NULL or 0 length\n");
|
||||
@@ -542,7 +542,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
for (c = 0; c < num_context_strings; c++) {
|
||||
rule_key->n = context_key_len;
|
||||
token = cstring_array_get_token(context_strings, c);
|
||||
token = cstring_array_get_string(context_strings, c);
|
||||
if (token == NULL) {
|
||||
log_error("token was NULL for c=%d\n", c);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user