ucx/string.h

Fri, 19 Jul 2013 14:17:12 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 19 Jul 2013 14:17:12 +0200
changeset 119
baa839a7633f
parent 118
151f5345f303
child 123
7fb0f74517c5
permissions
-rw-r--r--

completed documentation on sstr_t + sstrsplit overhaul + allocator version of sstrsplit

olaf@20 1 /*
universe@103 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20 3 *
universe@103 4 * Copyright 2013 Olaf Wintermann. All rights reserved.
universe@103 5 *
universe@103 6 * Redistribution and use in source and binary forms, with or without
universe@103 7 * modification, are permitted provided that the following conditions are met:
universe@103 8 *
universe@103 9 * 1. Redistributions of source code must retain the above copyright
universe@103 10 * notice, this list of conditions and the following disclaimer.
universe@103 11 *
universe@103 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@103 13 * notice, this list of conditions and the following disclaimer in the
universe@103 14 * documentation and/or other materials provided with the distribution.
universe@103 15 *
universe@103 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103 26 * POSSIBILITY OF SUCH DAMAGE.
olaf@20 27 */
universe@116 28 /**
universe@116 29 * Bounded string implementation.
universe@116 30 *
universe@116 31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
universe@116 32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
universe@116 33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
universe@116 34 * within the structure.
universe@116 35 *
universe@116 36 * When using <code>sstr_t</code>, developers must be full aware of what type
universe@116 37 * of string (<code>NULL</code>-terminated) or not) they are using, when
universe@116 38 * accessing the <code>char* ptr</code> directly.
universe@116 39 *
universe@116 40 * The UCX string module provides some common string functions, known from
universe@116 41 * standard libc, working with <code>sstr_t</code>.
universe@116 42 *
universe@116 43 * @file string.h
universe@116 44 * @author Mike Becker
universe@116 45 * @author Olaf Wintermann
universe@116 46 */
olaf@20 47
universe@116 48 #ifndef UCX_STRING_H
universe@116 49 #define UCX_STRING_H
olaf@20 50
universe@69 51 #include "ucx.h"
olaf@109 52 #include "allocator.h"
universe@38 53 #include <stddef.h>
universe@38 54
universe@116 55 /** Shortcut for a <code>sstr_t struct</code> literal. */
universe@116 56 #define ST(s) { (char*)s, sizeof(s)-1 }
universe@116 57 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
universe@116 58 #define S(s) sstrn((char*)s, sizeof(s)-1)
olaf@20 59
olaf@20 60 #ifdef __cplusplus
olaf@20 61 extern "C" {
olaf@20 62 #endif
olaf@20 63
universe@116 64 /**
universe@116 65 * The UCX string structure.
universe@116 66 */
universe@116 67 typedef struct {
universe@116 68 /** A reference to the string (<b>not necessarily <code>NULL</code>
universe@116 69 * -terminated</b>) */
olaf@20 70 char *ptr;
universe@116 71 /** The length of the string */
olaf@20 72 size_t length;
olaf@20 73 } sstr_t;
olaf@20 74
universe@116 75 /**
universe@116 76 * Creates a new sstr_t based on a C string.
universe@116 77 *
universe@116 78 * The length is implicitly inferred by using a call to <code>strlen()</code>.
olaf@20 79 *
universe@116 80 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@116 81 * do want a copy, use sstrdup() on the return value of this function.
universe@116 82 *
universe@116 83 * @param cstring the C string to wrap
universe@116 84 * @return a new sstr_t containing the C string
universe@116 85 *
universe@116 86 * @see sstrn()
olaf@20 87 */
universe@116 88 sstr_t sstr(char *cstring);
olaf@20 89
universe@116 90 /**
universe@116 91 * Creates a new sstr_t of the specified length based on a C string.
olaf@20 92 *
universe@116 93 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@116 94 * do want a copy, use sstrdup() on the return value of this function.
universe@116 95 *
universe@116 96 * @param cstring the C string to wrap
universe@116 97 * @param length the length of the string
universe@116 98 * @return a new sstr_t containing the C string
universe@116 99 *
universe@116 100 * @see sstr()
universe@116 101 * @see S()
olaf@20 102 */
universe@116 103 sstr_t sstrn(char *cstring, size_t length);
olaf@20 104
olaf@20 105
universe@116 106 /**
universe@116 107 * Returns the cumulated length of all specified strings.
olaf@20 108 *
universe@116 109 * At least one string must be specified.
universe@116 110 *
universe@116 111 * <b>Attention:</b> if the count argument does not match the count of the
universe@116 112 * specified strings, the behavior is undefined.
universe@116 113 *
universe@116 114 * @param count the total number of specified strings (so at least 1)
universe@116 115 * @param string the first string
universe@116 116 * @param ... all other strings
universe@116 117 * @return the cumulated length of all strings
olaf@20 118 */
universe@116 119 size_t sstrnlen(size_t count, sstr_t string, ...);
olaf@20 120
olaf@20 121
universe@119 122 /**
universe@119 123 * Concatenates strings.
universe@119 124 *
universe@119 125 * At least one string must be specified and there must be enough memory
universe@119 126 * available referenced by the destination sstr_t.ptr for this function to
universe@119 127 * successfully concatenate all specified strings.
universe@119 128 *
universe@119 129 * The sstr_t.length of the destination string specifies the capacity and
universe@119 130 * should match the total memory available referenced by the destination
universe@119 131 * sstr_t.ptr. This function <i>never</i> copies data beyond the capacity and
universe@119 132 * does not modify any of the source strings.
universe@119 133 *
universe@119 134 * <b>Attention:</b>
universe@119 135 * <ul>
universe@119 136 * <li>Any content in the destination string will be overwritten</li>
universe@119 137 * <li>The destination sstr_t.ptr is <b>NOT</b>
universe@119 138 * <code>NULL</code>-terminated</li>
universe@119 139 * <li>The destination sstr_t.length is set to the total length of the
universe@119 140 * concatenated strings</li>
universe@119 141 * <li><i>Hint:</i> get a <code>NULL</code>-terminated string by performing
universe@119 142 * <code>mystring.ptr[mystring.length]='\0'</code> after calling this
universe@119 143 * function</li>
universe@119 144 * </ul>
universe@119 145 *
universe@119 146 * @param count the total number of strings to concatenate
universe@119 147 * @param dest new sstr_t with capacity information and allocated memory
universe@119 148 * @param src the first string
universe@119 149 * @param ... all other strings
universe@119 150 * @return the argument for <code>dest</code> is returned
universe@119 151 */
universe@119 152 sstr_t sstrncat(size_t count, sstr_t dest, sstr_t src, ...);
universe@119 153
universe@119 154
universe@119 155 /**
universe@119 156 * Returns a substring starting at the specified location.
universe@119 157 *
universe@119 158 * <b>Attention:</b> the new string references the same memory area as the
universe@119 159 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@119 160 * Use sstrdup() to get a copy.
universe@119 161 *
universe@119 162 * @param string input string
universe@119 163 * @param start start location of the substring
universe@119 164 * @return a substring of <code>string</code> starting at <code>start</code>
universe@119 165 *
universe@119 166 * @see sstrsubsl()
universe@119 167 * @see sstrchr()
universe@119 168 */
universe@119 169 sstr_t sstrsubs(sstr_t string, size_t start);
universe@119 170
universe@119 171 /**
universe@119 172 * Returns a substring with a maximum length starting at the specified location.
universe@119 173 *
universe@119 174 * <b>Attention:</b> the new string references the same memory area as the
universe@119 175 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@119 176 * Use sstrdup() to get a copy.
universe@119 177 *
universe@119 178 * @param string input string
universe@119 179 * @param start start location of the substring
universe@119 180 * @param length the maximum length of the substring
universe@119 181 * @return a substring of <code>string</code> starting at <code>start</code>
universe@119 182 * with a maximum length of <code>length</code>
universe@119 183 *
universe@119 184 * @see sstrsubs()
universe@119 185 * @see sstrchr()
universe@119 186 */
universe@119 187 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
universe@119 188
universe@119 189 /**
universe@119 190 * Returns a substring starting at the location of the first occurrence of the
universe@119 191 * specified character.
universe@119 192 *
universe@119 193 * If the string does not contain the character, an empty string is returned.
universe@119 194 *
universe@119 195 * @param string the string where to locate the character
universe@119 196 * @param chr the character to locate
universe@119 197 * @return a substring starting at the least location of <code>chr</code>
universe@119 198 *
universe@119 199 * @see sstrsubs()
universe@119 200 */
universe@119 201 sstr_t sstrchr(sstr_t string, int chr);
universe@119 202
universe@119 203 /**
universe@119 204 * Splits a string into parts by using a delimiter string.
universe@119 205 *
universe@119 206 * This function will return <code>NULL</code>, if one of the following happens:
universe@119 207 * <ul>
universe@119 208 * <li>the string length is zero</li>
universe@119 209 * <li>the delimeter length is zero</li>
universe@119 210 * <li>the string equals the delimeter</li>
universe@119 211 * <li>memory allocation fails</li>
universe@119 212 * </ul>
universe@119 213 *
universe@119 214 * The integer referenced by <code>count</code> is used as input and determines
universe@119 215 * the maximum size of the resulting list, i.e. the maximum count of splits to
universe@119 216 * perform + 1.
universe@119 217 *
universe@119 218 * The integer referenced by <code>count</code> is also used as output and is
universe@119 219 * set to
universe@119 220 * <ul>
universe@119 221 * <li>-2, on memory allocation errors</li>
universe@119 222 * <li>-1, if either the string or the delimiter is an empty string</li>
universe@119 223 * <li>0, if the string equals the delimiter</li>
universe@119 224 * <li>1, if the string does not contain the delimiter</li>
universe@119 225 * <li>the count of list items, otherwise</li>
universe@119 226 * </ul>
universe@119 227 *
universe@119 228 * If the string starts with the delimiter, the first item of the resulting
universe@119 229 * list will be an empty string.
universe@119 230 *
universe@119 231 * If the string ends with the delimiter and the maximum list size is not
universe@119 232 * exceeded, the last list item will be an empty string.
universe@119 233 *
universe@119 234 * <b>Attention:</b> All list items <b>AND</b> all sstr_t.ptr of the list
universe@119 235 * items must be manually passed to <code>free()</code>. Use sstrsplita() with
universe@119 236 * an allocator to managed memory, to avoid this.
olaf@20 237 *
universe@119 238 * @param string the string to split
universe@119 239 * @param delim the delimiter string
universe@119 240 * @param count IN: the maximum size of the resulting list (0 for an
universe@119 241 * unbounded list), OUT: the actual size of the list
universe@119 242 * @return a list of the split strings as sstr_t array or
universe@119 243 * <code>NULL</code> on error
universe@119 244 *
universe@119 245 * @see sstrsplita()
olaf@20 246 */
universe@119 247 sstr_t* sstrsplit(sstr_t string, sstr_t delim, size_t *count);
olaf@20 248
universe@119 249 /**
universe@119 250 * Performing sstrsplit() using an UcxAllocator.
universe@119 251 *
universe@119 252 * <i>Read the description of sstrsplit() for details.</i>
universe@119 253 *
universe@119 254 * The memory for the sstr_t.ptr pointers of the list items and the memory for
universe@119 255 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
universe@119 256 * function.
universe@119 257 *
universe@119 258 * <b>Note:</b> the allocator is not used for memory that is freed within the
universe@119 259 * same call of this function (locally scoped variables).
universe@119 260 *
universe@119 261 * @param string the string to split
universe@119 262 * @param delim the delimiter string
universe@119 263 * @param count IN: the maximum size of the resulting list (0 for an
universe@119 264 * unbounded list), OUT: the actual size of the list
universe@119 265 * @param allocator the UcxAllocator used for allocating memory
universe@119 266 * @return a list of the split strings as sstr_t array or
universe@119 267 * <code>NULL</code> on error
universe@119 268 *
universe@119 269 * @see sstrsplit()
olaf@20 270 */
universe@119 271 sstr_t* sstrsplita(sstr_t string, sstr_t delim, size_t *count,
universe@119 272 UcxAllocator *allocator);
olaf@20 273
universe@116 274 /**
universe@116 275 * Compares two UCX strings with standard <code>memcmp()</code>.
universe@116 276 *
universe@116 277 * At first it compares the sstr_t.length attribute of the two strings. The
universe@116 278 * <code>memcmp()</code> function is called, if and only if the lengths match.
universe@116 279 *
universe@116 280 * @param s1 the first string
universe@116 281 * @param s2 the second string
universe@116 282 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
universe@116 283 * length of s1 is greater than the length of s2 or the result of
universe@116 284 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
universe@116 285 */
olaf@68 286 int sstrcmp(sstr_t s1, sstr_t s2);
olaf@20 287
universe@116 288 /**
universe@116 289 * Creates a duplicate of the specified string.
universe@116 290 *
universe@116 291 * The new sstr_t will contain a copy allocated by standard
universe@116 292 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
universe@116 293 * <code>free()</code>.
universe@116 294 *
universe@118 295 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@118 296 * terminated.
universe@118 297 *
universe@116 298 * @param string the string to duplicate
universe@118 299 * @return a duplicate of the string
universe@119 300 * @see sstrdupa()
universe@116 301 */
universe@116 302 sstr_t sstrdup(sstr_t string);
olaf@20 303
universe@118 304 /**
universe@118 305 * Creates a duplicate of the specified string using an UcxAllocator.
universe@118 306 *
universe@118 307 * The new sstr_t will contain a copy allocated by the allocators
universe@118 308 * ucx_allocator_malloc function. So it is implementation depended, whether the
universe@118 309 * returned sstr_t.ptr pointer must be passed to the allocators
universe@118 310 * ucx_allocator_free function manually.
universe@118 311 *
universe@118 312 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@118 313 * terminated.
universe@118 314 *
universe@118 315 * @param allocator a valid instance of an UcxAllocator
universe@118 316 * @param string the string to duplicate
universe@118 317 * @return a duplicate of the string
universe@119 318 * @see sstrdup()
universe@118 319 */
universe@118 320 sstr_t sstrdupa(UcxAllocator *allocator, sstr_t string);
universe@118 321
universe@118 322 /**
universe@118 323 * Omits leading and trailing spaces.
universe@118 324 *
universe@118 325 * This function returns a new sstr_t containing a trimmed version of the
universe@118 326 * specified string.
universe@118 327 *
universe@118 328 * <b>Note:</b> the new sstr_t references the same memory, thus you
universe@118 329 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
universe@118 330 * <code>free()</code>. It is also highly recommended to avoid assignments like
universe@118 331 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
universe@118 332 * source string. Assignments of this type are only permitted, if the
universe@118 333 * sstr_t.ptr of the source string does not need to be freed or if another
universe@118 334 * reference to the source string exists.
universe@118 335 *
universe@118 336 * @param string the string that shall be trimmed
universe@118 337 * @return a new sstr_t containing the trimmed string
universe@118 338 */
olaf@96 339 sstr_t sstrtrim(sstr_t string);
olaf@96 340
olaf@20 341 #ifdef __cplusplus
olaf@20 342 }
olaf@20 343 #endif
olaf@20 344
universe@116 345 #endif /* UCX_STRING_H */

mercurial