| 1 | // tName.h  |
| 2 | //  |
| 3 | // tName is similar to a tString but much simpler. It supports (currently) no string manipulation functions but it  |
| 4 | // is much faster for other operations -- in particular comparisons. Internally it stores both a UTF-8 code-unit array,  |
| 5 | // and a 64 bit hash. The hash allows the tName to be treated like an ID and gives it fast checks on equality operators.  |
| 6 | // A hash table is NOT used by this class -- with a 64 bit hash and a universe of 1000000 strings, the probability of a  |
| 7 | // collision is miniscule at around 2.7e-8 (assuming the hash function is good).  |
| 8 | //  |
| 9 | // The text in a tName is considered to be UTF-8 encoded. With UTF-8 encoding each character (code-point) may be encoded  |
| 10 | // by 1 or more code-units (a code-unit is 8 bits). The char8_t is used to repreresent a code-unit (as the C++ standard  |
| 11 | // encourages).  |
| 12 | //  |
| 13 | // Unlike tString, tName does NOT maintain a buffer with higher capacity than the string length or manage growing or  |
| 14 | // shrinking the buffer. What it does is allocate a buffer for the precise size required (number of code-units plus one  |
| 15 | // for the terminating null).  |
| 16 | //  |
| 17 | // @todo If the string is very small, it could be modified to use what would be reserved for the code-unit array  |
| 18 | // pointer as the data itself and avoids a heap allocation. When compiling as x64, you would get up to 7 code-units in  |
| 19 | // length (the eighth byte is for the null). When compiling for x86 or any target with 4 byte pointers this feature  |
| 20 | // would be disabled. It would make tName well-suited to store FourCCs.  |
| 21 | //  |
| 22 | // tName also maintains the length as a separate 4 byte integer. In total:  |
| 23 | // * 8 bytes of hash.  |
| 24 | // * 8 bytes (either pointer-to-data or data).  |
| 25 | // * 4 bytes length.  |
| 26 | //  |
| 27 | // A tName is _always_ null-terminated internally however you may store a string with more than one null ('\0') in it.  |
| 28 | //  |
| 29 | // Copyright (c) 2025 Tristan Grimmer.  |
| 30 | // Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby  |
| 31 | // granted, provided that the above copyright notice and this permission notice appear in all copies.  |
| 32 | //  |
| 33 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL  |
| 34 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,  |
| 35 | // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  |
| 36 | // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR  |
| 37 | // PERFORMANCE OF THIS SOFTWARE.  |
| 38 |   |
| 39 | #pragma once  |
| 40 | #include "Foundation/tStandard.h"  |
| 41 | #include "Foundation/tString.h"  |
| 42 | #include "Foundation/tHash.h"  |
| 43 |   |
| 44 |   |
| 45 | struct tName  |
| 46 | {  |
| 47 | // Constructs an initially invalid tName. Invalid is considered different to the empty string.  |
| 48 | tName() { }  |
| 49 |   |
| 50 | // Copy cons.  |
| 51 | tName(const tName& src) { Set(src); }  |
| 52 |   |
| 53 | // Construct from tString. tString has a different concept of IsValid (it is the empty string). For this reason  |
| 54 | // after this constructor the tName will always be valid (IsSet true). An empty tString generates a valid tName set  |
| 55 | // to the empty string.  |
| 56 | tName(const tString& src) { Set(src); }  |
| 57 |   |
| 58 | // Creates a tName with a single character, Note the char type here. A char8_t can't be guaranteed to store a  |
| 59 | // unicode codepoint if the codepoint requires continuations in the UTF-8 encoding. So, here we support char only  |
| 60 | // which we use for ASCII characters since ASCII chars are guaranteed to _not_ need continuation units in UFT-8.  |
| 61 | tName(char c) { Set(c); }  |
| 62 |   |
| 63 | // These constructors expect the string pointers to be null-terminated. You can create a UTF-8 tName from an ASCII  |
| 64 | // string (char*) since all ASCII strings are valid UTF-8. Constructors taking char8_t, char16_t, or chat32_t  |
| 65 | // pointers assume the src is UTF encoded. If src is nullptr, an invalid tName is created.  |
| 66 | tName(const char* src) { Set(src); }  |
| 67 | tName(const char8_t* src) { Set(src); }  |
| 68 | tName(const char16_t* src) { Set(src); }  |
| 69 | tName(const char32_t* src) { Set(src); }  |
| 70 |   |
| 71 | // These constructors, that specify the input length, allow you to have more than one null be present in the tName.  |
| 72 | // The internal hash is computed on the number of code-units stored minus the internal terminating null.  |
| 73 | // For example, with 0 meaning '\0'.  |
| 74 | //  |
| 75 | // AB0CD0 with srcLen = 5 will be stored in the tName as AB0CD0  |
| 76 | // AB0CD0 with srcLen = 6 will be stored in the tName as AB0CD00  |
| 77 | // AB0CD with srcLen = 5 will be stored in the tName as AB0CD0  |
| 78 | // 0 with srcLen = 0 will be stored in the tName as 0. This is the empty string.  |
| 79 | // AB0CD0 with srcLen = 2 will be stored in the tName as AB0  |
| 80 | // AB0CD0 with srcLen = 3 will be stored in the tName as AB00  |
| 81 | //  |
| 82 | // In all cases the length returned by Length() will match the supplied srcLen. If src is nullptr or srcLen < 0, the  |
| 83 | // resulting tName is invalid.  |
| 84 | tName(const char* src, int srcLen) { Set(src, srcLen); }  |
| 85 | tName(const char8_t* src, int srcLen) { Set(src, srcLen); }  |
| 86 | tName(const char16_t* src, int srcLen) { Set(src, srcLen); }  |
| 87 | tName(const char32_t* src, int srcLen) { Set(src, srcLen); }  |
| 88 |   |
| 89 | // The tStringUTF constructors allow the src strings to have multiple nulls in them.  |
| 90 | tName(const tStringUTF16& src) { Set(src); }  |
| 91 | tName(const tStringUTF32& src) { Set(src); }  |
| 92 | virtual ~tName() { Clear(); }  |
| 93 |   |
| 94 | // The set functions always clear the current string and set it to the supplied src.  |
| 95 | void Set(const tName& src);  |
| 96 | void Set(const tString& src);  |
| 97 | void Set(char);  |
| 98 |   |
| 99 | // For Set functions that take in a pointer, if the src pointer is nullptr, the resulting tName is invalid.  |
| 100 | void Set(const char* src) { Set((const char8_t*)src); }  |
| 101 | void Set(const char8_t* src);  |
| 102 | void Set(const char16_t* src) { SetUTF16(src); }  |
| 103 | void Set(const char32_t* src) { SetUTF32(src); }  |
| 104 |   |
| 105 | // For Set functions that take in a pointer and a length (allowing multiple nulls), if src is nullptr or srcLen < 0,  |
| 106 | // the resulting tName is invalid. In all cases the length returned by Length() will match the supplied srcLen.  |
| 107 | void Set(const char* src, int srcLen) { Set(src: (const char8_t*)src, srcLen); }  |
| 108 | void Set(const char8_t* src, int srcLen);  |
| 109 | void Set(const char16_t* src, int srcLen);  |
| 110 | void Set(const char32_t* src, int srcLen);  |
| 111 |   |
| 112 | void Set(const tStringUTF16& src);  |
| 113 | void Set(const tStringUTF32& src);  |
| 114 |   |
| 115 | void Clear() /* Makes the string invalid. Frees any heap memory used. */ { CodeUnitsSize = 0; delete[] CodeUnits; CodeUnits = nullptr; Hash = 0; }  |
| 116 | void Empty() /* Makes the string a valid empty string. */ { Clear(); CodeUnitsSize = 1; CodeUnits = new char8_t[CodeUnitsSize]; CodeUnits[0] = '\0'; Hash = ComputeHash(); }  |
| 117 |   |
| 118 | // The length in char8_t's (code-units), not the display length (which is not that useful). Returns -1 if the tName  |
| 119 | // is not set (invalid). The tName may have multiple nulls in it. This is fine, it does not stop at the first one.  |
| 120 | int Length() const { return CodeUnitsSize - 1; }  |
| 121 |   |
| 122 | uint64 GetHash() const { return Hash; }  |
| 123 | uint64 GetID() const { return GetHash(); }  |
| 124 | uint64 ID() const { return GetHash(); }  |
| 125 | uint64 AsID() const { return GetHash(); }  |
| 126 |   |
| 127 | bool IsEmpty() const /* Returns true for the empty name (length 0). This is a valid name. */ { return (CodeUnitsSize == 1); } // The 1 accounts for the internal null terminator.  |
| 128 | bool IsValid() const /* Returns true for empty name "" (length 0) or any string with length >= 1 */ { return (CodeUnitsSize != 0); }  |
| 129 | bool IsInvalid() const /* Returns true for an invalid tName (length -1). */ { return (CodeUnitsSize == 0); }  |
| 130 |   |
| 131 | bool IsSet() const /* Synonym for IsValid. Also returns true for the empty name. */ { return IsValid(); }  |
| 132 | bool IsNotSet() const /* Synonym for IsInvalid. Only returns true for tNames that haven't been set. */ { return IsInvalid(); }  |
| 133 |   |
| 134 | // The IsEqual variants taking (only) pointers assume null-terminated inputs. Two empty names are considered equal.  |
| 135 | // If the input is nullptr (for functions taking pointers) it is not considered equal to an empty name. A nullptr is  |
| 136 | // treated as an unset/invalid name and is not equal to anything (including another invalid name). For variants  |
| 137 | // taking pointers and a length, all characters are checked (multiple null chars supported). If strLen < 0 the input  |
| 138 | // is treated as invalid and equality is guaranteed false. For variants taking no length, the name is considered  |
| 139 | // equal if the characters match up to the first null in the tName (even if there are more of them internally). For  |
| 140 | // the IsEqual that takes in another tName, the comparisons are very fast as only the hash is compared.  |
| 141 | bool IsEqual(const tName& nam) const /* Fast. Compares hashes. */ { if (IsInvalid() || nam.IsInvalid()) return false; return (Hash == nam.Hash); }  |
| 142 | bool IsEqual(const char* str) const /* A nullptr str is treated as an invalid string. */ { return IsEqual(str, strLen: str ? tStd::tStrlen(s: str) : -1); }  |
| 143 | bool IsEqual(const char8_t* str) const { return IsEqual(str, strLen: str ? tStd::tStrlen(s: str) : -1); }  |
| 144 | bool IsEqual(const char* str, int strLen) const /* strLen = 0 and non-null str is the empty string. */ { return IsEqual(str: (const char8_t*)str, strLen); }  |
| 145 | bool IsEqual(const char8_t* str, int strLen) const; /* Defined inline below. */  |
| 146 |   |
| 147 | // Appends supplied suffix name to this name. Handles the full length of suffix -- including multiple nulls if there  |
| 148 | // are any.  |
| 149 | tName& Append(const tName& suffix);  |
| 150 | tName& operator=(const tName& nam) { Set(nam); return *this; }  |
| 151 |   |
| 152 | // These are not particulary fast, but they are useful if you just want to construct a concatenated tName and not  |
| 153 | // modify it afterwards.  |
| 154 | friend tName operator+(const tName& prefix, const tName& suffix);  |
| 155 | tName& operator+=(const tName& suffix) { return Append(suffix); }  |
| 156 |   |
| 157 | // These allow for implicit conversion to a UTF-8 code-unit pointer. By not including implicit casts to const char*  |
| 158 | // we are encouraging further proper use of char8_t. You can either make the function you are calling take the  |
| 159 | // proper UTF-* type, or explicitly call Chr() or Txt() to get an old char-based pointer.  |
| 160 | operator const char8_t*() { return CodeUnits; }  |
| 161 | operator const char8_t*() const { return CodeUnits; }  |
| 162 |   |
| 163 | // The array index operator may be somewhat meaningless if there is a continuation at the index. It is assumed you  |
| 164 | // know what you're doing. The returned type of char is meant to emphasize that the returned value should be  |
| 165 | // interpreted as an ASCII char since char8_t are what is used in UTF-8 continuations. This also allows the result  |
| 166 | // to be used with the char-constructor of another string if desired.  |
| 167 | char& operator[](int i) { return ((char*)CodeUnits)[i]; }  |
| 168 |   |
| 169 | // These return the fast 32 bit hash of the string data (code units). They take into account the full  |
| 170 | // represented string -- not just up to the first null. That is, they use StringLength as the data-set size.  |
| 171 | explicit operator uint32();  |
| 172 | explicit operator uint32() const;  |
| 173 |   |
| 174 | // Similar to above but return the 64 bit hash (not a fast version).  |
| 175 | explicit operator uint64() { return GetHash(); }  |
| 176 | explicit operator uint64() const { return GetHash(); }  |
| 177 |   |
| 178 | // Accesses the raw UTF-8 codeunits represented by the 'official' unsigned UTF-8 character datatype char8_t. An  |
| 179 | // unset (invalid) tName will return nullptr. Charz, additionally, will return nullptr for tNames that are the empty  |
| 180 | // string "".  |
| 181 | char8_t* Text() { return CodeUnits; }  |
| 182 | const char8_t* Chars() const { return CodeUnits; }  |
| 183 | const char8_t* Charz() const /* Like Chars() but returns nullptr if the name is empty, not a pointer to "". */ { return IsEmpty() ? nullptr : CodeUnits; }  |
| 184 | char8_t* Units() const /* Unicode naming. Code 'units'. */ { return CodeUnits; }  |
| 185 |   |
| 186 | // Many other functions and libraries that are UTF-8 compliant do not yet (and may never) use the proper char8_t  |
| 187 | // type and use char* and const char*. These functions allow you to retrieve the tName using the char type. You can  |
| 188 | // also use these with tPrintf and %s. These are synonyms of the above 4 calls.  |
| 189 | char* Txt() { return (char*)CodeUnits; }  |
| 190 | const char* Chr() const { return (const char*)CodeUnits; }  |
| 191 | const char* Chz() const /* Like Chr() but returns nullptr if the string is empty, not a pointer to "". */ { return IsEmpty() ? nullptr : (const char*)CodeUnits; }  |
| 192 | char8_t* Pod() const /* Plain Old Data */ { return CodeUnits; }  |
| 193 |   |
| 194 | // The GetAs functions consider the contents of the current tName up to the first null encountered. See comment  |
| 195 | // for tStrtoiT in tStandard.h for format requirements. The summary is that if base is -1, the function looks one of  |
| 196 | // the following prefixes in the string, defaulting to base 10 if none found. For invalid names 0 is returned.  |
| 197 | //  |
| 198 | // Base 16 prefixes: x X 0x 0X #  |
| 199 | // Base 10 prefixes: d D 0d 0D  |
| 200 | // Base 8 prefixes: o O 0o 0O @  |
| 201 | // Base 2 prefixes: b B 0b 0B  |
| 202 | int GetAsInt(int base = -1) const { return GetAsInt32(base); }  |
| 203 | int32 GetAsInt32(int base = -1) const { return tStd::tStrtoi32(s: CodeUnits, base); }  |
| 204 | int64 GetAsInt64(int base = -1) const { return tStd::tStrtoi64(s: CodeUnits, base); }  |
| 205 | uint GetAsUInt(int base = -1) const { return GetAsUInt32(base); }  |
| 206 | uint32 GetAsUInt32(int base = -1) const { return tStd::tStrtoui32(s: CodeUnits, base); }  |
| 207 | uint64 GetAsUInt64(int base = -1) const { return tStd::tStrtoui64(s: CodeUnits, base); }  |
| 208 |   |
| 209 | // Case insensitive. Interprets "true", "t", "yes", "y", "on", "enable", "enabled", "1", "+", and strings that  |
| 210 | // represent non-zero integers as boolean true. Otherwise false.  |
| 211 | bool GetAsBool() const { return tStd::tStrtob(s: CodeUnits); }  |
| 212 |   |
| 213 | // Base 10 interpretation only.  |
| 214 | float GetAsFloat() const { return tStd::tStrtof(s: CodeUnits); }  |
| 215 | double GetAsDouble() const { return tStd::tStrtod(s: CodeUnits); }  |
| 216 |   |
| 217 | // Shorter synonyms.  |
| 218 | int AsInt(int base = -1) const { return GetAsInt(base); }  |
| 219 | int AsInt32(int base = -1) const { return GetAsInt32(base); }  |
| 220 | int64 AsInt64(int base = -1) const { return GetAsInt64(base); }  |
| 221 | uint AsUInt(int base = -1) const { return GetAsUInt(base); }  |
| 222 | uint AsUInt32(int base = -1) const { return GetAsUInt32(base); }  |
| 223 | uint64 AsUInt64(int base = -1) const { return GetAsUInt64(base); }  |
| 224 | bool AsBool() const { return GetAsBool(); }  |
| 225 | float AsFloat() const { return GetAsFloat(); }  |
| 226 | double AsDouble() const { return GetAsDouble(); }  |
| 227 |   |
| 228 | // Same as above but return false on any parse error instead of just returning 0. These return false if the tName is  |
| 229 | // invalid. @todo Float and double versions.  |
| 230 | bool ToInt(int& v, int base = -1) const { return ToInt32(v, base); }  |
| 231 | bool ToInt32(int32& v, int base = -1) const { return tStd::tStrtoi32(v, s: CodeUnits, base); }  |
| 232 | bool ToInt64(int64& v, int base = -1) const { return tStd::tStrtoi64(v, s: CodeUnits, base); }  |
| 233 | bool ToUInt(uint& v, int base = -1) const { return ToUInt32(v, base); }  |
| 234 | bool ToUInt32(uint32& v, int base = -1) const { return tStd::tStrtoui32(v, s: CodeUnits, base); }  |
| 235 | bool ToUInt64(uint64& v, int base = -1) const { return tStd::tStrtoui64(v, s: CodeUnits, base); }  |
| 236 |   |
| 237 | // tName UTF encoding/decoding functions. tName is encoded in UTF-8. These functions allow you to convert from tName  |
| 238 | // to UTF-16/32 arrays. If dst is nullptr returns the number of charN codeunits needed. If incNullTerminator is  |
| 239 | // false the number needed will be one fewer. If dst is valid, writes the codeunits to dst and returns number of  |
| 240 | // charNN codeunits written. If tName is invalid OR empty, 0 is returned and dst (if provided) is not modified.  |
| 241 | int GetUTF16(char16_t* dst, bool incNullTerminator = true) const;  |
| 242 | int GetUTF32(char32_t* dst, bool incNullTerminator = true) const;  |
| 243 |   |
| 244 | // Sets the tName from a UTF codeunit array. If srcLen is -1 assumes supplied array is null-terminated, otherwise  |
| 245 | // specify how long it is. Returns new length (not including null terminator) of the tName. If either src is  |
| 246 | // nullptr or srcLen is 0, the result is an invalid tName and 0 is returned.  |
| 247 | int SetUTF16(const char16_t* src, int srcLen = -1);  |
| 248 | int SetUTF32(const char32_t* src, int srcLen = -1);  |
| 249 |   |
| 250 | protected:  |
| 251 | // Assumes CodeUnits and CodeUnitsSize are set appropriately. If CodeUnitsSize is 0 (implying CodeUnits is nullptr)  |
| 252 | // the tName is invalid and returns 0 for the hash. Note that the empty string does NOT get a 0 hash.  |
| 253 | uint64 ComputeHash() const;  |
| 254 |   |
| 255 | // By using the char8_t we are indicating the data is stored with UTF-8 encoding. Note that unlike char, a char8_t  |
| 256 | // is guaranteed to be unsigned, as well as a distinct type. In unicode spec for UTFn, these are called code-units.  |
| 257 | // With tNames the CodeUnits pointer (8 bytes) is directly used to store names of up to 7 code-units. The size  |
| 258 | // depends on whether you are compiling for 32 or 64 bit -- the pointer sizes are either 4 or 8 bytes.  |
| 259 | char8_t* CodeUnits = nullptr;  |
| 260 |   |
| 261 | uint64 Hash = 0;  |
| 262 |   |
| 263 | // The length of the CodeUnits array (not including the null terminator). For an invalid tName the Length is -1.  |
| 264 | int32 CodeUnitsSize = 0;  |
| 265 | };  |
| 266 |   |
| 267 |   |
| 268 | // Binary operator overloads should be outside the class so we can do things like if ("a" == b) where b is a tName.  |
| 269 | // Operators below that take char or char8_t pointers assume they are null-terminated.  |
| 270 | inline bool operator==(const tName& a, const tName& b) { return a.IsEqual(nam: b); }  |
| 271 | inline bool operator!=(const tName& a, const tName& b) { return !a.IsEqual(nam: b); }  |
| 272 | inline bool operator==(const tName& a, const char8_t* b) { return a.IsEqual(str: b); }  |
| 273 | inline bool operator!=(const tName& a, const char8_t* b) { return !a.IsEqual(str: b); }  |
| 274 | inline bool operator==(const char8_t* a, const tName& b) { return b.IsEqual(str: a); }  |
| 275 | inline bool operator!=(const char8_t* a, const tName& b) { return !b.IsEqual(str: a); }  |
| 276 | inline bool operator==(const char* a, const tName& b) { return b.IsEqual(str: a); }  |
| 277 | inline bool operator!=(const char* a, const tName& b) { return !b.IsEqual(str: a); }  |
| 278 |   |
| 279 |   |
| 280 | // The tNameItem class is just the tName class except they can be placed on tLists.  |
| 281 | struct tNameItem : public tLink<tNameItem>, public tName  |
| 282 | {  |
| 283 | public:  |
| 284 | tNameItem() : tName() { }  |
| 285 |   |
| 286 | // The tNameItem copy cons is missing because as an intrusive list-item it can only be on one list at a time.  |
| 287 | tNameItem(const tName& s) : tName(s) { }  |
| 288 | tNameItem(const tStringUTF16& s) : tName(s) { }  |
| 289 | tNameItem(const tStringUTF32& s) : tName(s) { }  |
| 290 | tNameItem(const char8_t* c) : tName(c) { }  |
| 291 | tNameItem(char c) : tName(c) { }  |
| 292 |   |
| 293 | // This call does NOT change the list that the tNameItem is on. The link remains unmodified.  |
| 294 | tNameItem& operator=(const tNameItem&);  |
| 295 | };  |
| 296 |   |
| 297 |   |
| 298 | // Implementation below this line.  |
| 299 |   |
| 300 |   |
| 301 | inline uint64 tName::ComputeHash() const  |
| 302 | {  |
| 303 | if (IsInvalid())  |
| 304 | return 0;  |
| 305 |   |
| 306 | // This call deals with Length=0 gracefully (empty string). It does not deref the data pointer in this case.  |
| 307 | uint64 hash = tHash::tHashData64(data: (const uint8*)CodeUnits, length: Length());  |
| 308 | if (hash == 0)  |
| 309 | hash = 0xFFFFFFFFFFFFFFFF;  |
| 310 |   |
| 311 | return hash;  |
| 312 | }  |
| 313 |   |
| 314 |   |
| 315 | inline tName::operator uint32()  |
| 316 | {  |
| 317 | if (IsInvalid())  |
| 318 | return 0;  |
| 319 |   |
| 320 | // This call deals with Length=0 gracefully (empty string). It does not deref the data pointer in this case.  |
| 321 | return tHash::tHashDataFast32(data: (const uint8*)CodeUnits, length: Length());  |
| 322 | }  |
| 323 |   |
| 324 |   |
| 325 | inline tName::operator uint32() const  |
| 326 | {  |
| 327 | if (IsInvalid())  |
| 328 | return 0;  |
| 329 |   |
| 330 | // This function deals with a Length of zero gracefully. It does not deref the data pointer in this case.  |
| 331 | return tHash::tHashDataFast32(data: (const uint8*)CodeUnits, length: Length());  |
| 332 | }  |
| 333 |   |
| 334 |   |
| 335 | inline void tName::Set(const tName& src)  |
| 336 | {  |
| 337 | if (this == &src)  |
| 338 | return;  |
| 339 |   |
| 340 | Clear();  |
| 341 | if (src.IsInvalid())  |
| 342 | return;  |
| 343 |   |
| 344 | CodeUnitsSize = src.CodeUnitsSize;  |
| 345 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 346 | tStd::tMemcpy(dest: CodeUnits, src: src.CodeUnits, numBytes: CodeUnitsSize);  |
| 347 | Hash = src.Hash;  |
| 348 | }  |
| 349 |   |
| 350 |   |
| 351 | inline void tName::Set(const tString& src)  |
| 352 | {  |
| 353 | // We happen to know that tString internally always has a trailing null so we can leverage that and add 1 to the  |
| 354 | // length.  |
| 355 | if (src.IsEmpty())  |
| 356 | {  |
| 357 | Empty();  |
| 358 | return;  |
| 359 | }  |
| 360 |   |
| 361 | Clear();  |
| 362 | CodeUnitsSize = src.Length() + 1;  |
| 363 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 364 | tStd::tMemcpy(dest: CodeUnits, src: src.Units(), numBytes: CodeUnitsSize);  |
| 365 | Hash = ComputeHash();  |
| 366 | }  |
| 367 |   |
| 368 |   |
| 369 | inline void tName::Set(char c)  |
| 370 | {  |
| 371 | Clear();  |
| 372 | CodeUnitsSize = 2;  |
| 373 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 374 | CodeUnits[0] = c;  |
| 375 | CodeUnits[1] = '\0';  |
| 376 | Hash = ComputeHash();  |
| 377 | }  |
| 378 |   |
| 379 |   |
| 380 | inline void tName::Set(const char8_t* src)  |
| 381 | {  |
| 382 | Clear();  |
| 383 | if (!src)  |
| 384 | return;  |
| 385 |   |
| 386 | CodeUnitsSize = tStd::tStrlen(s: src) + 1;  |
| 387 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 388 | tStd::tMemcpy(dest: CodeUnits, src, numBytes: CodeUnitsSize); // Includes the terminating null.  |
| 389 | Hash = ComputeHash();  |
| 390 | }  |
| 391 |   |
| 392 |   |
| 393 | inline void tName::Set(const char8_t* src, int srcLen)  |
| 394 | {  |
| 395 | Clear();  |
| 396 | if (!src || (srcLen < 0))  |
| 397 | return;  |
| 398 |   |
| 399 | CodeUnitsSize = srcLen + 1;  |
| 400 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 401 | if (srcLen > 0)  |
| 402 | tStd::tMemcpy(dest: CodeUnits, src, numBytes: srcLen);  |
| 403 | CodeUnits[srcLen] = '\0';  |
| 404 | Hash = ComputeHash();  |
| 405 | }  |
| 406 |   |
| 407 |   |
| 408 | inline void tName::Set(const char16_t* src, int srcLen)  |
| 409 | {  |
| 410 | Clear();  |
| 411 | if (!src || (srcLen < 0))  |
| 412 | return;  |
| 413 | SetUTF16(src, srcLen);  |
| 414 | }  |
| 415 |   |
| 416 |   |
| 417 | inline void tName::Set(const char32_t* src, int srcLen)  |
| 418 | {  |
| 419 | Clear();  |
| 420 | if (!src || (srcLen < 0))  |
| 421 | return;  |
| 422 | SetUTF32(src, srcLen);  |
| 423 | }  |
| 424 |   |
| 425 |   |
| 426 | inline void tName::Set(const tStringUTF16& src)  |
| 427 | {  |
| 428 | SetUTF16(src: src.Units(), srcLen: src.Length());  |
| 429 | }  |
| 430 |   |
| 431 |   |
| 432 | inline void tName::Set(const tStringUTF32& src)  |
| 433 | {  |
| 434 | SetUTF32(src: src.Units(), srcLen: src.Length());  |
| 435 | }  |
| 436 |   |
| 437 |   |
| 438 | inline bool tName::IsEqual(const char8_t* str, int strLen) const  |
| 439 | {  |
| 440 | if (IsInvalid() || !str || (strLen < 0) || (Length() != strLen))  |
| 441 | return false;  |
| 442 |   |
| 443 | if ((strLen == 0) && IsEmpty())  |
| 444 | return true;  |
| 445 |   |
| 446 | return !tStd::tMemcmp(a: CodeUnits, b: str, numBytes: strLen);  |
| 447 | }  |
| 448 |   |
| 449 |   |
| 450 | inline tName& tName::Append(const tName& suffix)  |
| 451 | {  |
| 452 | // Empty is guaranteed to have CodeUnitsSize == 1, a single null character.  |
| 453 | if (suffix.IsInvalid() || suffix.IsEmpty())  |
| 454 | return *this;  |
| 455 |   |
| 456 | if (IsInvalid() || IsEmpty())  |
| 457 | {  |
| 458 | Set(suffix);  |
| 459 | return *this;  |
| 460 | }  |
| 461 |   |
| 462 | int thisSize = Length();  |
| 463 | int thatSize = suffix.Length();  |
| 464 | int newSize = thisSize + thatSize + 1;  |
| 465 |   |
| 466 | char8_t* newUnits = new char8_t[newSize];  |
| 467 | tStd::tMemcpy(dest: newUnits, src: CodeUnits, numBytes: thisSize);  |
| 468 |   |
| 469 | // The plus one is so we get the terminating null with the memcpy.  |
| 470 | tStd::tMemcpy(dest: newUnits+thisSize, src: suffix.CodeUnits, numBytes: thatSize+1);  |
| 471 | Clear();  |
| 472 |   |
| 473 | CodeUnits = newUnits;  |
| 474 | CodeUnitsSize = newSize;  |
| 475 | Hash = ComputeHash();  |
| 476 | return *this;  |
| 477 | }  |
| 478 |   |
| 479 |   |
| 480 | inline tName operator+(const tName& prefix, const tName& suffix)  |
| 481 | {  |
| 482 | tName concatenated(prefix);  |
| 483 | return concatenated.Append(suffix);  |
| 484 | }  |
| 485 |   |
| 486 |   |
| 487 | inline int tName::GetUTF16(char16_t* dst, bool incNullTerminator) const  |
| 488 | {  |
| 489 | if (IsInvalid() || IsEmpty())  |
| 490 | return 0;  |
| 491 |   |
| 492 | if (!dst)  |
| 493 | return tStd::tUTF16(dst: nullptr, src: CodeUnits, srcLen: Length()) + (incNullTerminator ? 1 : 0);  |
| 494 |   |
| 495 | int numUnitsWritten = tStd::tUTF16(dst, src: CodeUnits, srcLen: Length());  |
| 496 | if (incNullTerminator)  |
| 497 | {  |
| 498 | dst[numUnitsWritten] = 0;  |
| 499 | numUnitsWritten++;  |
| 500 | }  |
| 501 |   |
| 502 | return numUnitsWritten;  |
| 503 | }  |
| 504 |   |
| 505 |   |
| 506 | inline int tName::GetUTF32(char32_t* dst, bool incNullTerminator) const  |
| 507 | {  |
| 508 | if (IsInvalid() || IsEmpty())  |
| 509 | return 0;  |
| 510 |   |
| 511 | if (!dst)  |
| 512 | return tStd::tUTF32(dst: nullptr, src: CodeUnits, srcLen: Length()) + (incNullTerminator ? 1 : 0);  |
| 513 |   |
| 514 | int numUnitsWritten = tStd::tUTF32(dst, src: CodeUnits, srcLen: Length());  |
| 515 | if (incNullTerminator)  |
| 516 | {  |
| 517 | dst[numUnitsWritten] = 0;  |
| 518 | numUnitsWritten++;  |
| 519 | }  |
| 520 |   |
| 521 | return numUnitsWritten;  |
| 522 | }  |
| 523 |   |
| 524 |   |
| 525 | inline int tName::SetUTF16(const char16_t* src, int srcLen)  |
| 526 | {  |
| 527 | Clear();  |
| 528 | if (!src || (srcLen == 0))  |
| 529 | return 0;  |
| 530 |   |
| 531 | // If srcLen < 0 it means ignore srcLen and assume src is null-terminated.  |
| 532 | if (srcLen < 0)  |
| 533 | {  |
| 534 | CodeUnitsSize = tStd::tUTF8s(dst: nullptr, src) + 1; // +1 for the internal null termination.  |
| 535 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 536 | tStd::tUTF8s(dst: CodeUnits, src); // Writes the null terminator.  |
| 537 | }  |
| 538 | else  |
| 539 | {  |
| 540 | int len = tStd::tUTF8(dst: nullptr, src, srcLen);  |
| 541 | CodeUnitsSize = len + 1; // +1 for the internal null termination.  |
| 542 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 543 | tStd::tUTF8(dst: CodeUnits, src, srcLen);  |
| 544 | CodeUnits[len] = '\0';  |
| 545 | }  |
| 546 |   |
| 547 | Hash = ComputeHash();  |
| 548 | return Length();  |
| 549 | }  |
| 550 |   |
| 551 |   |
| 552 | inline int tName::SetUTF32(const char32_t* src, int srcLen)  |
| 553 | {  |
| 554 | Clear();  |
| 555 | if (!src || (srcLen == 0))  |
| 556 | return 0;  |
| 557 |   |
| 558 | // If srcLen < 0 it means ignore srcLen and assume src is null-terminated.  |
| 559 | if (srcLen < 0)  |
| 560 | {  |
| 561 | CodeUnitsSize = tStd::tUTF8s(dst: nullptr, src) + 1; // +1 for the internal null termination.  |
| 562 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 563 | tStd::tUTF8s(dst: CodeUnits, src);  |
| 564 | }  |
| 565 | else  |
| 566 | {  |
| 567 | int len = tStd::tUTF8(dst: nullptr, src, srcLen);  |
| 568 | CodeUnitsSize = len + 1; // +1 for the internal null termination.  |
| 569 | tStd::tUTF8(dst: CodeUnits, src, srcLen);  |
| 570 | CodeUnits[len] = '\0';  |
| 571 | }  |
| 572 |   |
| 573 | Hash = ComputeHash();  |
| 574 | return Length();  |
| 575 | }  |
| 576 |   |
| 577 |   |
| 578 | inline tNameItem& tNameItem::operator=(const tNameItem& src)  |
| 579 | {  |
| 580 | if (this == &src)  |
| 581 | return *this;  |
| 582 |   |
| 583 | Clear();  |
| 584 | if (src.IsInvalid())  |
| 585 | return *this;  |
| 586 |   |
| 587 | CodeUnitsSize = src.CodeUnitsSize;  |
| 588 | CodeUnits = new char8_t[CodeUnitsSize];  |
| 589 | tStd::tMemcpy(dest: CodeUnits, src: src.CodeUnits, numBytes: CodeUnitsSize);  |
| 590 | Hash = src.Hash;  |
| 591 |   |
| 592 | return *this;  |
| 593 | }  |
| 594 | |