[utils] Version of string_split for single character delimiters which modifies the input string directly rather than creating (essentially) a copy
This commit is contained in:
@@ -21,8 +21,7 @@ int string_compare_len_case_insensitive(const char *str1, const char *str2, size
|
|||||||
|
|
||||||
unsigned char c1, c2;
|
unsigned char c1, c2;
|
||||||
|
|
||||||
if (!len)
|
if (len == 0) return 0;
|
||||||
return 0;
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
c1 = *s1++;
|
c1 = *s1++;
|
||||||
@@ -424,7 +423,15 @@ char_array *char_array_from_string(char *str) {
|
|||||||
size_t len = strlen(str);
|
size_t len = strlen(str);
|
||||||
char_array *array = char_array_new_size(len+1);
|
char_array *array = char_array_new_size(len+1);
|
||||||
strcpy(array->a, str);
|
strcpy(array->a, str);
|
||||||
array->n = strlen(str);
|
array->n = len;
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array *char_array_from_string_no_copy(char *str, size_t n) {
|
||||||
|
char_array *array = malloc(sizeof(char_array));
|
||||||
|
array->a = str;
|
||||||
|
array->m = n;
|
||||||
|
array->n = n;
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -773,6 +780,27 @@ cstring_array *cstring_array_split(char *str, const char *separator, size_t sepa
|
|||||||
return string_array;
|
return string_array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cstring_array *cstring_array_split_no_copy(char *str, char separator, int *count) {
|
||||||
|
*count = 0;
|
||||||
|
char *ptr = str;
|
||||||
|
size_t len = strlen(str);
|
||||||
|
|
||||||
|
size_t skip_len = 1;
|
||||||
|
|
||||||
|
for (int i = 0; i < len; i++, ptr++) {
|
||||||
|
if (*ptr == separator) {
|
||||||
|
*ptr = '\0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array *array = char_array_from_string_no_copy(str, len);
|
||||||
|
cstring_array *string_array = cstring_array_from_char_array(array);
|
||||||
|
*count = cstring_array_num_strings(string_array);
|
||||||
|
|
||||||
|
return string_array;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
char **cstring_array_to_strings(cstring_array *self) {
|
char **cstring_array_to_strings(cstring_array *self) {
|
||||||
char **strings = malloc(self->indices->n * sizeof(char *));
|
char **strings = malloc(self->indices->n * sizeof(char *));
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,12 @@
|
|||||||
|
/*
|
||||||
|
string_utils.h
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Utilities for manipulating strings in C.
|
||||||
|
*/
|
||||||
#ifndef STRING_UTILS_H
|
#ifndef STRING_UTILS_H
|
||||||
#define STRING_UTILS_H
|
#define STRING_UTILS_H
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -35,7 +39,7 @@
|
|||||||
#define UTF8PROC_OPTIONS_LOWERCASE UTF8PROC_OPTIONS_BASE | UTF8PROC_CASEFOLD
|
#define UTF8PROC_OPTIONS_LOWERCASE UTF8PROC_OPTIONS_BASE | UTF8PROC_CASEFOLD
|
||||||
|
|
||||||
|
|
||||||
// NOTE: this particular implementation works only for ASCII strings
|
// ASCII string methods
|
||||||
int string_compare_case_insensitive(const char *str1, const char *str2);
|
int string_compare_case_insensitive(const char *str1, const char *str2);
|
||||||
int string_compare_len_case_insensitive(const char *str1, const char *str2, size_t len);
|
int string_compare_len_case_insensitive(const char *str1, const char *str2, size_t len);
|
||||||
size_t string_common_prefix(const char *str1, const char *str2);
|
size_t string_common_prefix(const char *str1, const char *str2);
|
||||||
@@ -51,6 +55,7 @@ bool string_ends_with(const char *str, const char *ending);
|
|||||||
|
|
||||||
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
|
uint32_t string_translate(char *str, size_t len, char *word_chars, char *word_repls, size_t trans_len);
|
||||||
|
|
||||||
|
// UTF-8 string methods
|
||||||
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||||
|
|
||||||
@@ -76,41 +81,49 @@ size_t string_ltrim(char *str);
|
|||||||
size_t string_rtrim(char *str);
|
size_t string_rtrim(char *str);
|
||||||
size_t string_trim(char *str);
|
size_t string_trim(char *str);
|
||||||
|
|
||||||
/* Caller has to free the original string,
|
/* char_array is a dynamic character array defined in collections.h
|
||||||
also keep in mind that after operating on a char array,
|
but has a few additional methods related to string manipulation.
|
||||||
the pointer to the original string may get realloc'd and change
|
|
||||||
so need to set the char pointer to array.a when done.
|
The array pointer can be treated as a plain old C string for methods
|
||||||
Consider a macro which does this consistently
|
expecting NUL-terminated char pointers, but operations like
|
||||||
|
concatenation are cheap and safe.
|
||||||
*/
|
*/
|
||||||
char_array *char_array_from_string(char *str);
|
char_array *char_array_from_string(char *str);
|
||||||
|
char_array *char_array_from_string_no_copy(char *str, size_t n);
|
||||||
|
|
||||||
|
// Gets the underlying C string for a char_array
|
||||||
char *char_array_get_string(char_array *array);
|
char *char_array_get_string(char_array *array);
|
||||||
|
|
||||||
// Frees the char_array and returns a standard NUL-terminated string
|
// Frees the char_array and returns a standard NUL-terminated string
|
||||||
char *char_array_to_string(char_array *array);
|
char *char_array_to_string(char_array *array);
|
||||||
|
|
||||||
|
// Can use strlen(array->a) but this is faster
|
||||||
size_t char_array_len(char_array *array);
|
size_t char_array_len(char_array *array);
|
||||||
|
|
||||||
|
// append_* methods do not NUL-terminate
|
||||||
void char_array_append(char_array *array, char *str);
|
void char_array_append(char_array *array, char *str);
|
||||||
void char_array_append_len(char_array *array, char *str, size_t len);
|
void char_array_append_len(char_array *array, char *str, size_t len);
|
||||||
void char_array_append_reversed(char_array *array, char *str);
|
void char_array_append_reversed(char_array *array, char *str);
|
||||||
void char_array_append_reversed_len(char_array *array, char *str, size_t len);
|
void char_array_append_reversed_len(char_array *array, char *str, size_t len);
|
||||||
|
// add NUL terminator to a char_array
|
||||||
void char_array_terminate(char_array *array);
|
void char_array_terminate(char_array *array);
|
||||||
|
|
||||||
// Similar to strcat, strips NUL-byte and guarantees 0-terminated
|
// Similar to strcat but with dynamic resizing, guaranteed NUL-terminated
|
||||||
void char_array_cat(char_array *array, char *str);
|
void char_array_cat(char_array *array, char *str);
|
||||||
void char_array_cat_len(char_array *array, char *str, size_t len);
|
void char_array_cat_len(char_array *array, char *str, size_t len);
|
||||||
void char_array_cat_reversed(char_array *array, char *str);
|
void char_array_cat_reversed(char_array *array, char *str);
|
||||||
void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
|
void char_array_cat_reversed_len(char_array *array, char *str, size_t len);
|
||||||
|
|
||||||
// Cat with printf args
|
// Similar to cat methods but with printf args
|
||||||
void char_array_cat_printf(char_array *array, char *format, ...);
|
void char_array_cat_printf(char_array *array, char *format, ...);
|
||||||
|
|
||||||
|
// Mainly for paths or delimited strings
|
||||||
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||||
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
cstring_arrays represent n strings stored contiguously, delimited by NUL-byte.
|
cstring_arrays represent n strings stored contiguously, delimited by the NUL byte.
|
||||||
|
|
||||||
Instead of storing an array of char pointers (char **), cstring_arrays use this format:
|
Instead of storing an array of char pointers (char **), cstring_arrays use this format:
|
||||||
|
|
||||||
@@ -142,11 +155,15 @@ void cstring_array_clear(cstring_array *self);
|
|||||||
|
|
||||||
cstring_array *cstring_array_from_char_array(char_array *str);
|
cstring_array *cstring_array_from_char_array(char_array *str);
|
||||||
|
|
||||||
|
// Convert cstring_array to an array of n C strings and destroy the cstring_array
|
||||||
char **cstring_array_to_strings(cstring_array *self);
|
char **cstring_array_to_strings(cstring_array *self);
|
||||||
|
|
||||||
|
// Split on delimiter
|
||||||
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, int *count);
|
cstring_array *cstring_array_split(char *str, const char *separator, size_t separator_len, int *count);
|
||||||
|
|
||||||
void cstring_array_join_strings(cstring_array *self, char *separator, int count, ...);
|
// Split on delimiter by replacing (single character) separator with the NUL byte in the original string
|
||||||
|
cstring_array *cstring_array_split_no_copy(char *str, char separator, int *count);
|
||||||
|
|
||||||
uint32_t cstring_array_start_token(cstring_array *self);
|
uint32_t cstring_array_start_token(cstring_array *self);
|
||||||
uint32_t cstring_array_add_string(cstring_array *self, char *str);
|
uint32_t cstring_array_add_string(cstring_array *self, char *str);
|
||||||
uint32_t cstring_array_add_string_len(cstring_array *self, char *str, size_t len);
|
uint32_t cstring_array_add_string_len(cstring_array *self, char *str, size_t len);
|
||||||
|
|||||||
Reference in New Issue
Block a user