From b3f89a207a9dc6f257775301cf33158e7143428c Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 9 Sep 2015 17:41:23 -0700 Subject: [PATCH] [utils] Version of string_split for single character delimiters which modifies the input string directly rather than creating (essentially) a copy --- src/string_utils.c | 34 +++++++++++++++++++++++++++++++--- src/string_utils.h | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/string_utils.c b/src/string_utils.c index a5d6349e..3dd2a26f 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -21,8 +21,7 @@ int string_compare_len_case_insensitive(const char *str1, const char *str2, size unsigned char c1, c2; - if (!len) - return 0; + if (len == 0) return 0; do { c1 = *s1++; @@ -424,7 +423,15 @@ char_array *char_array_from_string(char *str) { size_t len = strlen(str); char_array *array = char_array_new_size(len+1); strcpy(array->a, str); - array->n = strlen(str); + array->n = len; + return array; +} + +char_array *char_array_from_string_no_copy(char *str, size_t n) { + char_array *array = malloc(sizeof(char_array)); + array->a = str; + array->m = n; + array->n = n; return array; } @@ -773,6 +780,27 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa return string_array; } +cstring_array *cstring_array_split_no_copy(char *str, char separator, int *count) { + *count = 0; + char *ptr = str; + size_t len = strlen(str); + + size_t skip_len = 1; + + for (int i = 0; i < len; i++, ptr++) { + if (*ptr == separator) { + *ptr = '\0'; + } + } + + char_array *array = char_array_from_string_no_copy(str, len); + cstring_array *string_array = cstring_array_from_char_array(array); + *count = cstring_array_num_strings(string_array); + + return string_array; +} + + char **cstring_array_to_strings(cstring_array *self) { char **strings = malloc(self->indices->n * sizeof(char *)); diff --git a/src/string_utils.h b/src/string_utils.h index 399990f5..f89cd0f4 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -1,8 +1,12 @@ +/* +string_utils.h +-------------- + +Utilities for manipulating strings in C. +*/ #ifndef STRING_UTILS_H #define STRING_UTILS_H - - #include #include #include @@ -35,7 +39,7 @@ #define UTF8PROC_OPTIONS_LOWERCASE UTF8PROC_OPTIONS_BASE | UTF8PROC_CASEFOLD -// NOTE: this particular implementation works only for ASCII strings +// ASCII string methods int string_compare_case_insensitive(const char *str1, const char *str2); int string_compare_len_case_insensitive(const char *str1, const char *str2, size_t len); size_t string_common_prefix(const char *str1, const char *str2); @@ -51,6 +55,7 @@ bool string_ends_with(const char *str, const char *ending); uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len); +// UTF-8 string methods char *utf8_reversed_string(const char *s); // returns a copy, caller frees ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); @@ -76,41 +81,49 @@ size_t string_ltrim(char *str); size_t string_rtrim(char *str); size_t string_trim(char *str); -/* Caller has to free the original string, - also keep in mind that after operating on a char array, - the pointer to the original string may get realloc'd and change - so need to set the char pointer to array.a when done. - Consider a macro which does this consistently +/* char_array is a dynamic character array defined in collections.h +but has a few additional methods related to string manipulation. + +The array pointer can be treated as a plain old C string for methods +expecting NUL-terminated char pointers, but operations like +concatenation are cheap and safe. */ char_array *char_array_from_string(char *str); +char_array *char_array_from_string_no_copy(char *str, size_t n); + +// Gets the underlying C string for a char_array char *char_array_get_string(char_array *array); // Frees the char_array and returns a standard NUL-terminated string char *char_array_to_string(char_array *array); +// Can use strlen(array->a) but this is faster size_t char_array_len(char_array *array); +// append_* methods do not NUL-terminate void char_array_append(char_array *array, char *str); void char_array_append_len(char_array *array, char *str, size_t len); void char_array_append_reversed(char_array *array, char *str); void char_array_append_reversed_len(char_array *array, char *str, size_t len); +// add NUL terminator to a char_array void char_array_terminate(char_array *array); -// Similar to strcat, strips NUL-byte and guarantees 0-terminated +// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated void char_array_cat(char_array *array, char *str); void char_array_cat_len(char_array *array, char *str, size_t len); void char_array_cat_reversed(char_array *array, char *str); void char_array_cat_reversed_len(char_array *array, char *str, size_t len); -// Cat with printf args +// Similar to cat methods but with printf args void char_array_cat_printf(char_array *array, char *format, ...); +// Mainly for paths or delimited strings void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...); void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...); /* -cstring_arrays represent n strings stored contiguously, delimited by NUL-byte. +cstring_arrays represent n strings stored contiguously, delimited by the NUL byte. Instead of storing an array of char pointers (char **), cstring_arrays use this format: @@ -142,11 +155,15 @@ void cstring_array_clear(cstring_array *self); cstring_array *cstring_array_from_char_array(char_array *str); +// Convert cstring_array to an array of n C strings and destroy the cstring_array char **cstring_array_to_strings(cstring_array *self); +// Split on delimiter cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, int *count); -void cstring_array_join_strings(cstring_array *self, char *separator, int count, ...); +// Split on delimiter by replacing (single character) separator with the NUL byte in the original string +cstring_array *cstring_array_split_no_copy(char *str, char separator, int *count); + uint32_t cstring_array_start_token(cstring_array *self); uint32_t cstring_array_add_string(cstring_array *self, char *str); uint32_t cstring_array_add_string_len(cstring_array *self, char *str, size_t len);