| 1 | // tString.cpp  |
| 2 | //  |
| 3 | // tString is a simple and readable string class that implements sensible operators and implicit casts. The text in a  |
| 4 | // tString is considerd to be UTF-8 encoded. With UTF-8 encoding each character (code-point) may be encoded by 1 or more  |
| 5 | // code-units (a code-unit is 8 bits). The char8_t is used to repreresent a code-unit (as the C++ standard encourages).  |
| 6 | //  |
| 7 | // Externally a tString should be thought of as an array of code-units which may contain multiple null characters. A  |
| 8 | // valid string of length 5 could be "ab\0\0e" for example. Internally a tString is null-terminated, but that is for  |
| 9 | // implementational efficiency only -- many external functions require null-terminated strings, so it makes it easy to  |
| 10 | // return one if the internal representation already has a null terminator. For example the length-5 string "abcde" is  |
| 11 | // stored internally as 'a' 'b' 'c' 'd' 'e' '\0'.  |
| 12 | //  |
| 13 | // It can be inefficient (in time) to only maintain the exact amount of memory needed to store a particular string -- it  |
| 14 | // would require a new memory allocation every time a string changes size. For this reason tStrings have a 'capacity'.  |
| 15 | // The capacity of a tString is the number of code-units that can be stored without requiring additional memory  |
| 16 | // management calls. For example, a tString with capacity 10 could be storing "abcde". If you were to add "fghij" to the  |
| 17 | // string, it would be done without any delete[] or new calls. Note that internally a tString of capacity 10 actually  |
| 18 | // has malloced an array of 11 code-units, the 11th one being for the terminating null. Functions that affect capacity  |
| 19 | // (like Reserve) do not change the behaviour of a tString and are always safe, they simply affect the efficiency.  |
| 20 | //  |
| 21 | // When the tString does need to grow its capacity (perhaps another string is being added/appended to it) there is the  |
| 22 | // question of how much extra space to reserve. The SetGrowMethod may be used to set how much extra space is reserved  |
| 23 | // when a memory-size-changing operation takes place. By default a constant amount of extra memory is reserved.  |
| 24 | //  |
| 25 | // A few of the salient functions related to the above are:  |
| 26 | // Lenght : Returns how many code-units are used by the string. This is NOT like a strlen call as it does not  |
| 27 | // rely on nul-termination. It does not need to iterate as the length is stored explicitely.  |
| 28 | // Capacity : Returns the current capacity of the tString in code-units.  |
| 29 | // Reserve : This is instead of a SetCapacity call. There is no SetCapacity as we could not guarantee that a  |
| 30 | // requested capacity is non-destructive. Calling Reserve(5) on a string of Length 10 will not result  |
| 31 | // in a new capacity of 5 because it would require culling half of the code-units. Reserve can also be  |
| 32 | // used to shrink (release memory) if possible. See the comments before the function itself.  |
| 33 | // Shrink : Shrinks the tString to the least amount of memory used possible. Like calling Reserver(Length());  |
| 34 | // SetGrowMethod: Controls how much extra space (Capacity - Length) to reserve when performing a memory operation.  |
| 35 | //  |
| 36 | // For conversions of arbitrary types to tStrings, see tsPrint in the higher level System module.  |
| 37 | //  |
| 38 | // Copyright (c) 2004-2006, 2015, 2017, 2019-2023, 2025 Tristan Grimmer.  |
| 39 | // Copyright (c) 2020 Stefan Wessels.  |
| 40 | // Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby  |
| 41 | // granted, provided that the above copyright notice and this permission notice appear in all copies.  |
| 42 | //  |
| 43 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL  |
| 44 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,  |
| 45 | // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  |
| 46 | // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR  |
| 47 | // PERFORMANCE OF THIS SOFTWARE.  |
| 48 |   |
| 49 | #include "Foundation/tString.h"  |
| 50 | #include "Foundation/tStandard.h"  |
| 51 | #include "Foundation/tHash.h"  |
| 52 |   |
| 53 |   |
| 54 | tString::operator uint32()  |
| 55 | {  |
| 56 | // This function deals with a StringLength of zero gracefully. It does not deref the data pointer in this case.  |
| 57 | return tHash::tHashDataFast32(data: (const uint8*)CodeUnits, length: StringLength);  |
| 58 | }  |
| 59 |   |
| 60 |   |
| 61 | tString::operator uint32() const  |
| 62 | {  |
| 63 | return tHash::tHashDataFast32(data: (const uint8*)CodeUnits, length: StringLength);  |
| 64 | }  |
| 65 |   |
| 66 |   |
| 67 | tString tString::Left(const char marker) const  |
| 68 | {  |
| 69 | int pos = FindChar(c: marker);  |
| 70 | if (pos <= 0)  |
| 71 | return tString();  |
| 72 |   |
| 73 | tString buf(pos);  |
| 74 | tStd::tMemcpy(dest: buf.CodeUnits, src: CodeUnits, numBytes: pos);  |
| 75 | return buf;  |
| 76 | }  |
| 77 |   |
| 78 |   |
| 79 | tString tString::Right(const char marker) const  |
| 80 | {  |
| 81 | int pos = FindChar(c: marker, reverse: true);  |
| 82 | if ((pos == -1) || (pos == (StringLength-1)))  |
| 83 | return tString();  |
| 84 |   |
| 85 | tString buf(StringLength - 1 - pos);  |
| 86 | tStd::tMemcpy(dest: buf.CodeUnits, src: CodeUnits + pos + 1, numBytes: StringLength - 1 - pos);  |
| 87 | return buf;  |
| 88 | }  |
| 89 |   |
| 90 |   |
| 91 | tString tString::Left(int count) const  |
| 92 | {  |
| 93 | if (count <= 0)  |
| 94 | return tString();  |
| 95 |   |
| 96 | int length = StringLength;  |
| 97 | if (count > length)  |
| 98 | count = length;  |
| 99 |   |
| 100 | tString buf(count);  |
| 101 | tStd::tMemcpy(dest: buf.CodeUnits, src: CodeUnits, numBytes: count);  |
| 102 | return buf;  |
| 103 | }  |
| 104 |   |
| 105 |   |
| 106 | tString tString::Mid(int start, int count) const  |
| 107 | {  |
| 108 | int length = StringLength;  |
| 109 | if ((start < 0) || (start >= length) || (count <= 0))  |
| 110 | return tString();  |
| 111 |   |
| 112 | if ((start + count) > length)  |
| 113 | count = length - start;  |
| 114 |   |
| 115 | tString buf(count);  |
| 116 | tStd::tMemcpy(dest: buf.CodeUnits, src: CodeUnits + start, numBytes: count);  |
| 117 | return buf;  |
| 118 | }  |
| 119 |   |
| 120 |   |
| 121 | tString tString::Right(int count) const  |
| 122 | {  |
| 123 | if (count <= 0)  |
| 124 | return tString();  |
| 125 |   |
| 126 | int length = StringLength;  |
| 127 | int start = length - count;  |
| 128 | if (start < 0)  |
| 129 | {  |
| 130 | start = 0;  |
| 131 | count = length;  |
| 132 | }  |
| 133 |   |
| 134 | tString buf(count);  |
| 135 | tStd::tMemcpy(dest: buf.CodeUnits, src: CodeUnits + start, numBytes: count);  |
| 136 | return buf;  |
| 137 | }  |
| 138 |   |
| 139 |   |
| 140 | tString tString::(const char divider)  |
| 141 | {  |
| 142 | int pos = FindChar(c: divider);  |
| 143 | if (pos == -1)  |
| 144 | return tString();  |
| 145 |   |
| 146 | int count = pos;  |
| 147 | tString left(count);  |
| 148 | tStd::tMemcpy(dest: left.CodeUnits, src: CodeUnits, numBytes: count);  |
| 149 |   |
| 150 | // We don't need to reallocate memory for this string. We can just do a memmove and adjust the StringLength.  |
| 151 | // Memmove is needed since src and dest overlap. Capacity can stay the same.  |
| 152 | StringLength -= count+1;  |
| 153 | if (StringLength > 0)  |
| 154 | tStd::tMemmov(dest: CodeUnits, src: CodeUnits+pos+1, numBytes: StringLength);  |
| 155 | CodeUnits[StringLength] = '\0';  |
| 156 |   |
| 157 | return left;  |
| 158 | }  |
| 159 |   |
| 160 |   |
| 161 | tString tString::(const char divider)  |
| 162 | {  |
| 163 | int pos = FindChar(c: divider, reverse: true);  |
| 164 | if (pos == -1)  |
| 165 | return tString();  |
| 166 |   |
| 167 | int count = StringLength - pos - 1;  |
| 168 | tString right(count);  |
| 169 | tStd::tMemcpy(dest: right.CodeUnits, src: CodeUnits+pos+1, numBytes: count);  |
| 170 |   |
| 171 | // We don't need to reallocate or move memory for this string. We can just adjust the StringLength.  |
| 172 | // Capacity can stay the same.  |
| 173 | StringLength -= count+1;  |
| 174 | CodeUnits[StringLength] = '\0';  |
| 175 |   |
| 176 | return right;  |
| 177 | }  |
| 178 |   |
| 179 |   |
| 180 | tString tString::(int count)  |
| 181 | {  |
| 182 | if (count >= StringLength)  |
| 183 | {  |
| 184 | tString left(*this);  |
| 185 | Clear();  |
| 186 | return left;  |
| 187 | }  |
| 188 |   |
| 189 | if (count <= 0)  |
| 190 | return tString();  |
| 191 |   |
| 192 | tString left(count);  |
| 193 | tStd::tMemcpy(dest: left.CodeUnits, src: CodeUnits, numBytes: count);  |
| 194 |   |
| 195 | // We don't need to reallocate memory for this string. We can just do a memmove and adjust the StringLength.  |
| 196 | // Memmove is needed since src and dest overlap. Capacity can stay the same.  |
| 197 | StringLength -= count;  |
| 198 | if (StringLength > 0)  |
| 199 | tStd::tMemmov(dest: CodeUnits, src: CodeUnits+count, numBytes: StringLength);  |
| 200 | CodeUnits[StringLength] = '\0';  |
| 201 |   |
| 202 | return left;  |
| 203 | }  |
| 204 |   |
| 205 |   |
| 206 | tString tString::(int start, int count)  |
| 207 | {  |
| 208 | int length = StringLength;  |
| 209 | if ((start < 0) || (start >= length) || (count <= 0))  |
| 210 | return tString();  |
| 211 |   |
| 212 | if ((start + count) > length)  |
| 213 | count = length - start;  |
| 214 |   |
| 215 | tString mid(count);  |
| 216 | tStd::tMemcpy(dest: mid.CodeUnits, src: CodeUnits + start, numBytes: count);  |
| 217 |   |
| 218 | // We don't need to reallocate memory for this string. We can just do a memmove and adjust the StringLength.  |
| 219 | // Memmove is needed since src and dest overlap. Capacity can stay the same.  |
| 220 | int numMove = length - (start + count);  |
| 221 | if (numMove > 0)  |
| 222 | tStd::tMemcpy(dest: CodeUnits + start, src: CodeUnits + start + count, numBytes: numMove);  |
| 223 | StringLength -= count;  |
| 224 | CodeUnits[StringLength] = '\0';  |
| 225 |   |
| 226 | return mid;  |
| 227 | }  |
| 228 |   |
| 229 |   |
| 230 | tString tString::(int count)  |
| 231 | {  |
| 232 | if (count >= StringLength)  |
| 233 | {  |
| 234 | tString right(*this);  |
| 235 | Clear();  |
| 236 | return right;  |
| 237 | }  |
| 238 |   |
| 239 | if (count <= 0)  |
| 240 | return tString();  |
| 241 |   |
| 242 | tString right(count);  |
| 243 | tStd::tMemcpy(dest: right.CodeUnits, src: CodeUnits+StringLength-count, numBytes: count);  |
| 244 |   |
| 245 | // We don't need to reallocate or move memory for this string. We can just adjust the StringLength.  |
| 246 | // Capacity can stay the same.  |
| 247 | StringLength -= count;  |
| 248 | CodeUnits[StringLength] = '\0';  |
| 249 |   |
| 250 | return right;  |
| 251 | }  |
| 252 |   |
| 253 |   |
| 254 | tString tString::(const char8_t* prefix)  |
| 255 | {  |
| 256 | if (IsEmpty() || !prefix)  |
| 257 | return tString();  |
| 258 |   |
| 259 | int len = tStd::tStrlen(s: prefix);  |
| 260 | if ((len <= 0) || (len > StringLength))  |
| 261 | return tString();  |
| 262 |   |
| 263 | if (tStd::tStrncmp(a: CodeUnits, b: prefix, n: len) == 0)  |
| 264 | {  |
| 265 | // We don't need to reallocate memory for this string. We can just do a memmove and adjust the StringLength.  |
| 266 | // Memmove is needed since src and dest overlap. Capacity can stay the same.  |
| 267 | if (StringLength > len)  |
| 268 | tStd::tMemmov(dest: CodeUnits, src: CodeUnits+len, numBytes: StringLength-len);  |
| 269 | StringLength -= len;  |
| 270 | CodeUnits[StringLength] = '\0';  |
| 271 | return tString(prefix);  |
| 272 | }  |
| 273 |   |
| 274 | return tString();  |
| 275 | }  |
| 276 |   |
| 277 |   |
| 278 | tString tString::(const char8_t* suffix)  |
| 279 | {  |
| 280 | if (IsEmpty() || !suffix)  |
| 281 | return tString();  |
| 282 |   |
| 283 | int len = tStd::tStrlen(s: suffix);  |
| 284 | if ((len <= 0) || (len > StringLength))  |
| 285 | return tString();  |
| 286 |   |
| 287 | if (tStd::tStrncmp(a: &CodeUnits[StringLength-len], b: suffix, n: len) == 0)  |
| 288 | {  |
| 289 | // We don't need to reallocate or move memory for this string. We can just adjust the StringLength.  |
| 290 | // Capacity can stay the same.  |
| 291 | StringLength -= len;  |
| 292 | CodeUnits[StringLength] = '\0';  |
| 293 | return tString(suffix);  |
| 294 | }  |
| 295 |   |
| 296 | return tString();  |
| 297 | }  |
| 298 |   |
| 299 |   |
| 300 | int tString::Replace(const char8_t* search, const char8_t* replace)  |
| 301 | {  |
| 302 | // Zeroth scenario (trivial) -- Search is empty. Definitely won't be able to find it.  |
| 303 | if (!search || (search[0] == '\0'))  |
| 304 | return 0;  |
| 305 |   |
| 306 | // First scenario (trivial) -- The search length is bigger than the string length. It simply can't be there.  |
| 307 | int searchLength = tStd::tStrlen(s: search);  |
| 308 | if (searchLength > StringLength)  |
| 309 | return 0;  |
| 310 |   |
| 311 | int replaceLength = replace ? tStd::tStrlen(s: replace) : 0;  |
| 312 | int replaceCount = 0;  |
| 313 |   |
| 314 | // Second scenario (easy) -- The search and replace string lengths are equal. We know in this case there will be no  |
| 315 | // need to mess with memory and we don't care how many replacements there will be. We can just go ahead and replace  |
| 316 | // them in one loop.  |
| 317 | if (replaceLength == searchLength)  |
| 318 | {  |
| 319 | char8_t* searchStart = CodeUnits;  |
| 320 | while (searchStart < (CodeUnits + StringLength))  |
| 321 | {  |
| 322 | char8_t* foundString = (char8_t*)tStd::tMemsrch(haystack: searchStart, haystackNumBytes: StringLength-(searchStart-CodeUnits), needle: search, needleNumBytes: searchLength);  |
| 323 | if (foundString)  |
| 324 | {  |
| 325 | tStd::tMemcpy(dest: foundString, src: replace, numBytes: replaceLength);  |
| 326 | replaceCount++;  |
| 327 | }  |
| 328 | else  |
| 329 | {  |
| 330 | break;  |
| 331 | }  |
| 332 | searchStart = foundString + searchLength;  |
| 333 | }  |
| 334 | return replaceCount;  |
| 335 | }  |
| 336 |   |
| 337 | // Third scenario (hard) -- Different search and replace sizes. Supports empty replace string as well.  |
| 338 | // The first step is to count how many replacements there are going to be so we can set the capacity properly.  |
| 339 | char8_t* searchStart = CodeUnits;  |
| 340 | while (searchStart < (CodeUnits + StringLength))  |
| 341 | {  |
| 342 | char8_t* foundString = (char8_t*)tStd::tMemsrch(haystack: searchStart, haystackNumBytes: StringLength-(searchStart-CodeUnits), needle: search, needleNumBytes: searchLength);  |
| 343 | if (!foundString)  |
| 344 | break;  |
| 345 |   |
| 346 | replaceCount++;  |
| 347 | searchStart = foundString + searchLength;  |
| 348 | }  |
| 349 |   |
| 350 | // The new length may be bigger or smaller than the original. If the capNeeded is precisely  |
| 351 | // 0, it means that the entire string is being replaced with nothing, so we can exit early.  |
| 352 | // eg. Replace "abcd" in "abcdabcd" with ""  |
| 353 | int newLength = StringLength - (replaceCount*searchLength) + (replaceCount*replaceLength);  |
| 354 | if (newLength == 0)  |
| 355 | {  |
| 356 | Clear();  |
| 357 | return replaceCount;  |
| 358 | }  |
| 359 |   |
| 360 | // The easiest way of doing this is to have a scratchpad we can write the new string into.  |
| 361 | char8_t* newText = new char8_t[newLength];  |
| 362 | int newWritePos = 0;  |
| 363 |   |
| 364 | searchStart = CodeUnits;  |
| 365 | while (searchStart < (CodeUnits + StringLength))  |
| 366 | {  |
| 367 | char8_t* foundString = (char8_t*)tStd::tMemsrch(haystack: searchStart, haystackNumBytes: StringLength-(searchStart-CodeUnits), needle: search, needleNumBytes: searchLength);  |
| 368 | if (foundString)  |
| 369 | {  |
| 370 | // Copy the stuff before the found string.  |
| 371 | int lenBeforeFound = int(foundString-searchStart);  |
| 372 | if (lenBeforeFound > 0)  |
| 373 | tStd::tMemcpy(dest: newText+newWritePos, src: searchStart, numBytes: lenBeforeFound);  |
| 374 | newWritePos += int(foundString-searchStart);  |
| 375 |   |
| 376 | // Copy the replacement in.  |
| 377 | if (replaceLength > 0)  |
| 378 | tStd::tMemcpy(dest: newText+newWritePos, src: replace, numBytes: replaceLength);  |
| 379 | newWritePos += replaceLength;  |
| 380 | }  |
| 381 | else  |
| 382 | {  |
| 383 | // Copy the remainder when nothing found.  |
| 384 | int numRemain = newLength-newWritePos;  |
| 385 | if (numRemain > 0)  |
| 386 | tStd::tMemcpy(dest: newText+newWritePos, src: searchStart, numBytes: numRemain);  |
| 387 | break;  |
| 388 | }  |
| 389 | searchStart = foundString + searchLength;  |
| 390 | }  |
| 391 |   |
| 392 | // Make sure there's enough capacity.  |
| 393 | UpdateCapacity(capNeeded: newLength, preserve: false);  |
| 394 |   |
| 395 | // Copy the scratchpad data over.  |
| 396 | if (newLength > 0)  |
| 397 | tStd::tMemcpy(dest: CodeUnits, src: newText, numBytes: newLength);  |
| 398 | CodeUnits[newLength] = '\0';  |
| 399 | StringLength = newLength;  |
| 400 | delete[] newText;  |
| 401 |   |
| 402 | return replaceCount;  |
| 403 | }  |
| 404 |   |
| 405 |   |
| 406 | int tString::Remove(char rem)  |
| 407 | {  |
| 408 | int destIndex = 0;  |
| 409 | int numRemoved = 0;  |
| 410 |   |
| 411 | // This operation can be done in place.  |
| 412 | for (int i = 0; i < StringLength; i++)  |
| 413 | {  |
| 414 | if (CodeUnits[i] != rem)  |
| 415 | CodeUnits[destIndex++] = CodeUnits[i];  |
| 416 | else  |
| 417 | numRemoved++;  |
| 418 | }  |
| 419 | StringLength -= numRemoved;  |
| 420 | CodeUnits[StringLength] = '\0';  |
| 421 |   |
| 422 | return numRemoved;  |
| 423 | }  |
| 424 |   |
| 425 |   |
| 426 | int tString::RemoveLeading(const char* removeThese)  |
| 427 | {  |
| 428 | if (IsEmpty() || !removeThese || !removeThese[0])  |
| 429 | return 0;  |
| 430 |   |
| 431 | // Since the StringLength can't get bigger, no need to do any memory management. We can do it in one pass.  |
| 432 | int writeIndex = 0;  |
| 433 | bool checkPresence = true;  |
| 434 | int numRemoved = 0;  |
| 435 | for (int readIndex = 0; readIndex < StringLength; readIndex++)  |
| 436 | {  |
| 437 | char8_t readChar = CodeUnits[readIndex];  |
| 438 |   |
| 439 | // Is readChar present in theseChars?  |
| 440 | bool present = false; int j = 0;  |
| 441 | if (checkPresence)  |
| 442 | while (removeThese[j] && !present)  |
| 443 | if (removeThese[j++] == readChar)  |
| 444 | present = true;  |
| 445 |   |
| 446 | if (present && checkPresence)  |
| 447 | {  |
| 448 | numRemoved++;  |
| 449 | continue;  |
| 450 | }  |
| 451 |   |
| 452 | // Stop checking after hit first char not found.  |
| 453 | checkPresence = false;  |
| 454 | CodeUnits[writeIndex++] = readChar;  |
| 455 | }  |
| 456 |   |
| 457 | StringLength -= numRemoved;  |
| 458 | CodeUnits[StringLength] = '\0';  |
| 459 | return numRemoved;  |
| 460 | }  |
| 461 |   |
| 462 |   |
| 463 | int tString::RemoveTrailing(const char* removeThese)  |
| 464 | {  |
| 465 | if (IsEmpty() || !removeThese || !removeThese[0])  |
| 466 | return 0;  |
| 467 |   |
| 468 | // Since the StringLength can't get bigger, no need to do any memory management. We can do it in one pass.  |
| 469 | int writeIndex = StringLength-1;  |
| 470 | bool checkPresence = true;  |
| 471 | int numRemoved = 0;  |
| 472 | for (int readIndex = StringLength-1; readIndex >= 0; readIndex--)  |
| 473 | {  |
| 474 | char8_t readChar = CodeUnits[readIndex];  |
| 475 |   |
| 476 | // Is readChar present in theseChars?  |
| 477 | bool present = false; int j = 0;  |
| 478 | if (checkPresence)  |
| 479 | while (removeThese[j] && !present)  |
| 480 | if (removeThese[j++] == readChar)  |
| 481 | present = true;  |
| 482 |   |
| 483 | if (present && checkPresence)  |
| 484 | {  |
| 485 | numRemoved++;  |
| 486 | continue;  |
| 487 | }  |
| 488 |   |
| 489 | // Stop checking after hit first char (going backwards) not found.  |
| 490 | checkPresence = false;  |
| 491 | CodeUnits[writeIndex--] = readChar;  |
| 492 | }  |
| 493 |   |
| 494 | StringLength -= numRemoved;  |
| 495 |   |
| 496 | // Cuz we went backwards we now need to shift everything left to where Codeunits begins.  |
| 497 | // Important to use memory-move and not memory-copy because they overlap.  |
| 498 | if (numRemoved > 0)  |
| 499 | tStd::tMemmov(dest: CodeUnits, src: CodeUnits+writeIndex+1, numBytes: StringLength);  |
| 500 | CodeUnits[StringLength] = '\0';  |
| 501 | return numRemoved;  |
| 502 | }  |
| 503 |   |
| 504 | int tString::RemoveFirst()  |
| 505 | {  |
| 506 | if (IsEmpty())  |
| 507 | return 0;  |
| 508 |   |
| 509 | // We don't have a -1 on the StringLength here so we get the internal null terminator for free.  |
| 510 | tStd::tMemmov(dest: CodeUnits, src: CodeUnits+1, numBytes: StringLength);  |
| 511 | StringLength--;  |
| 512 | return 1;  |
| 513 | }  |
| 514 |   |
| 515 |   |
| 516 | int tString::RemoveLast()  |
| 517 | {  |
| 518 | if (IsEmpty())  |
| 519 | return 0;  |
| 520 |   |
| 521 | StringLength--;  |
| 522 | CodeUnits[StringLength] = '\0';  |
| 523 | return 1;  |
| 524 | }  |
| 525 |   |
| 526 |   |
| 527 | int tString::RemoveAny(const char* theseChars)  |
| 528 | {  |
| 529 | if (IsEmpty() || !theseChars || !theseChars[0])  |
| 530 | return 0;  |
| 531 |   |
| 532 | // Since the StringLength can't get bigger, no need to do any memory management. We can do it in one pass.  |
| 533 | int writeIndex = 0;  |
| 534 | int numRemoved = 0;  |
| 535 | for (int readIndex = 0; readIndex < StringLength; readIndex++)  |
| 536 | {  |
| 537 | char8_t readChar = CodeUnits[readIndex];  |
| 538 |   |
| 539 | // Is readChar present in theseChars?  |
| 540 | bool removed = false;  |
| 541 | int j = 0;  |
| 542 | while (theseChars[j] && !removed)  |
| 543 | if (theseChars[j++] == readChar)  |
| 544 | removed = true;  |
| 545 |   |
| 546 | if (removed)  |
| 547 | {  |
| 548 | numRemoved++;  |
| 549 | continue;  |
| 550 | }  |
| 551 |   |
| 552 | CodeUnits[writeIndex++] = readChar;  |
| 553 | }  |
| 554 |   |
| 555 | StringLength -= numRemoved;  |
| 556 | CodeUnits[StringLength] = '\0';  |
| 557 | return numRemoved;  |
| 558 | }  |
| 559 |   |
| 560 |   |
| 561 | int tString::RemoveAnyNot(const char* theseChars)  |
| 562 | {  |
| 563 | if (IsEmpty())  |
| 564 | return 0;  |
| 565 |   |
| 566 | if (!theseChars || !theseChars[0])  |
| 567 | {  |
| 568 | int numChars = Length();  |
| 569 | Clear();  |
| 570 | return numChars;  |
| 571 | }  |
| 572 |   |
| 573 | // Since the StringLength can't get bigger, no need to do any memory management. We can do it in one pass.  |
| 574 | int writeIndex = 0;  |
| 575 | int numRemoved = 0;  |
| 576 | for (int readIndex = 0; readIndex < StringLength; readIndex++)  |
| 577 | {  |
| 578 | char8_t readChar = CodeUnits[readIndex];  |
| 579 |   |
| 580 | // Is readChar present in theseChars?  |
| 581 | bool removed = true;  |
| 582 | int j = 0;  |
| 583 | while (theseChars[j] && removed)  |
| 584 | if (theseChars[j++] == readChar)  |
| 585 | removed = false;  |
| 586 |   |
| 587 | if (removed)  |
| 588 | {  |
| 589 | numRemoved++;  |
| 590 | continue;  |
| 591 | }  |
| 592 |   |
| 593 | CodeUnits[writeIndex++] = readChar;  |
| 594 | }  |
| 595 |   |
| 596 | StringLength -= numRemoved;  |
| 597 | CodeUnits[StringLength] = '\0';  |
| 598 | return numRemoved;  |
| 599 | }  |
| 600 |   |
| 601 |   |
| 602 | int tString::GetUTF16(char16_t* dst, bool incNullTerminator) const  |
| 603 | {  |
| 604 | if (IsEmpty())  |
| 605 | return 0;  |
| 606 |   |
| 607 | if (!dst)  |
| 608 | return tStd::tUTF16(dst: nullptr, src: CodeUnits, srcLen: StringLength) + (incNullTerminator ? 1 : 0);  |
| 609 |   |
| 610 | int numUnitsWritten = tStd::tUTF16(dst, src: CodeUnits, srcLen: StringLength);  |
| 611 | if (incNullTerminator)  |
| 612 | {  |
| 613 | dst[numUnitsWritten] = 0;  |
| 614 | numUnitsWritten++;  |
| 615 | }  |
| 616 |   |
| 617 | return numUnitsWritten;  |
| 618 | }  |
| 619 |   |
| 620 |   |
| 621 | int tString::GetUTF32(char32_t* dst, bool incNullTerminator) const  |
| 622 | {  |
| 623 | if (IsEmpty())  |
| 624 | return 0;  |
| 625 |   |
| 626 | if (!dst)  |
| 627 | return tStd::tUTF32(dst: nullptr, src: CodeUnits, srcLen: StringLength) + (incNullTerminator ? 1 : 0);  |
| 628 |   |
| 629 | int numUnitsWritten = tStd::tUTF32(dst, src: CodeUnits, srcLen: StringLength);  |
| 630 | if (incNullTerminator)  |
| 631 | {  |
| 632 | dst[numUnitsWritten] = 0;  |
| 633 | numUnitsWritten++;  |
| 634 | }  |
| 635 |   |
| 636 | return numUnitsWritten;  |
| 637 | }  |
| 638 |   |
| 639 |   |
| 640 | int tString::SetUTF16(const char16_t* src, int srcLen)  |
| 641 | {  |
| 642 | if (!src || (srcLen == 0))  |
| 643 | {  |
| 644 | Clear();  |
| 645 | return 0;  |
| 646 | }  |
| 647 |   |
| 648 | // If srcLen < 0 it means ignore srcLen and assume src is null-terminated.  |
| 649 | if (srcLen < 0)  |
| 650 | {  |
| 651 | int len = tStd::tUTF8s(dst: nullptr, src);  |
| 652 | UpdateCapacity(capNeeded: len, preserve: false);  |
| 653 | StringLength = tStd::tUTF8s(dst: CodeUnits, src);  |
| 654 | }  |
| 655 | else  |
| 656 | {  |
| 657 | int len = tStd::tUTF8(dst: nullptr, src, srcLen);  |
| 658 | UpdateCapacity(capNeeded: len, preserve: false);  |
| 659 | tStd::tUTF8(dst: CodeUnits, src, srcLen);  |
| 660 | CodeUnits[len] = '\0';  |
| 661 | StringLength = len;  |
| 662 | }  |
| 663 |   |
| 664 | return StringLength;  |
| 665 | }  |
| 666 |   |
| 667 |   |
| 668 | int tString::SetUTF32(const char32_t* src, int srcLen)  |
| 669 | {  |
| 670 | if (!src || (srcLen == 0))  |
| 671 | {  |
| 672 | Clear();  |
| 673 | return 0;  |
| 674 | }  |
| 675 |   |
| 676 | // If srcLen < 0 it means ignore srcLen and assume src is null-terminated.  |
| 677 | if (srcLen < 0)  |
| 678 | {  |
| 679 | int len = tStd::tUTF8s(dst: nullptr, src);  |
| 680 | UpdateCapacity(capNeeded: len, preserve: false);  |
| 681 | StringLength = tStd::tUTF8s(dst: CodeUnits, src);  |
| 682 | }  |
| 683 | else  |
| 684 | {  |
| 685 | int len = tStd::tUTF8(dst: nullptr, src, srcLen);  |
| 686 | UpdateCapacity(capNeeded: len, preserve: false);  |
| 687 | tStd::tUTF8(dst: CodeUnits, src, srcLen);  |
| 688 | CodeUnits[len] = '\0';  |
| 689 | StringLength = len;  |
| 690 | }  |
| 691 |   |
| 692 | return StringLength;  |
| 693 | }  |
| 694 |   |
| 695 |   |
| 696 | void tString::UpdateCapacity(int capNeeded, bool preserve)  |
| 697 | {  |
| 698 | int grow = 0;  |
| 699 | if (capNeeded > 0)  |
| 700 | grow = (GrowParam >= 0) ? GrowParam : capNeeded*(-GrowParam);  |
| 701 |   |
| 702 | capNeeded += grow;  |
| 703 | if (capNeeded < MinCapacity)  |
| 704 | capNeeded = MinCapacity;  |
| 705 |   |
| 706 | if (CurrCapacity >= capNeeded)  |
| 707 | {  |
| 708 | if (!preserve)  |
| 709 | {  |
| 710 | StringLength = 0;  |
| 711 | CodeUnits[0] = '\0';  |
| 712 | }  |
| 713 | return;  |
| 714 | }  |
| 715 |   |
| 716 | char8_t* newUnits = new char8_t[capNeeded+1];  |
| 717 | if (preserve)  |
| 718 | {  |
| 719 | tAssert(capNeeded >= StringLength);  |
| 720 | if (StringLength > 0)  |
| 721 | tStd::tMemcpy(dest: newUnits, src: CodeUnits, numBytes: StringLength);  |
| 722 | }  |
| 723 | else  |
| 724 | {  |
| 725 | StringLength = 0;  |
| 726 | }  |
| 727 | newUnits[StringLength] = '\0';  |
| 728 |   |
| 729 | // CodeUnits mey be nullptr the first time.  |
| 730 | delete[] CodeUnits;  |
| 731 | CodeUnits = newUnits;  |
| 732 | CurrCapacity = capNeeded;  |
| 733 | }  |
| 734 |   |
| 735 |   |
| 736 | int tStd::tExplode(tList<tStringItem>& components, const tString& src, char divider)  |
| 737 | {  |
| 738 | tString source = src;  |
| 739 | int startCount = components.GetNumItems();  |
| 740 | while (source.FindChar(c: divider) != -1)  |
| 741 | {  |
| 742 | tString component = source.ExtractLeft(divider);  |
| 743 | components.Append(item: new tStringItem(component));  |
| 744 | }  |
| 745 |   |
| 746 | // If there's anything left in source we need to add it.  |
| 747 | if (!source.IsEmpty())  |
| 748 | components.Append(item: new tStringItem(source));  |
| 749 |   |
| 750 | return components.GetNumItems() - startCount;  |
| 751 | }  |
| 752 |   |
| 753 |   |
| 754 | int tStd::tExplode(tList<tStringItem>& components, const tString& src, const tString& divider)  |
| 755 | {  |
| 756 | // Well, this is a bit of a cheezy way of doing this. We just assume that ASCII character 31,  |
| 757 | // the "unit separator", is meant for this kind of thing and not otherwise present in the src string.  |
| 758 | tString source = src;  |
| 759 | char8_t sep[2];  |
| 760 | sep[0] = 31;  |
| 761 | sep[1] = 0;  |
| 762 | source.Replace(search: divider, replace: sep);  |
| 763 | return tExplode(components, src: source, divider: 31);  |
| 764 | }  |
| 765 | |