| 1 | // tHash.h  |
| 2 | //  |
| 3 | // Hash functions for various kinds of data. Using 64 or 256 bit versions if you want to avoid collisions. There are two  |
| 4 | // 32 bit hash functions. A fast version used for most string hashes, and a slower but better version. All functions  |
| 5 | // return the supplied initialization vector(iv) if there was no data to hash. To compute a single hash from multiple  |
| 6 | // data sources like strings, binary data, or files, you do NOT need to consolidate all the source data into one buffer  |
| 7 | // first. Just set the initialization vector to the hash computed from the previous step.  |
| 8 | //  |
| 9 | // Copyright (c) 2004-2006, 2015, 2017, 2019, 2021, 2023 Tristan Grimmer.  |
| 10 | // Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby  |
| 11 | // granted, provided that the above copyright notice and this permission notice appear in all copies.  |
| 12 | //  |
| 13 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL  |
| 14 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,  |
| 15 | // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  |
| 16 | // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR  |
| 17 | // PERFORMANCE OF THIS SOFTWARE.  |
| 18 | //  |
| 19 | // The SHA-256 implementation is taken from https://github.com/amosnier/sha-2. All functions and constants in the  |
| 20 | // tHash_SHA256 namespace should be considered unencumbered as per Alain Mosnier's licence file:  |
| 21 | //  |
| 22 | // This is free and unencumbered software released into the public domain.  |
| 23 | //  |
| 24 | // Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form  |
| 25 | // or as a compiled binary, for any purpose, commercial or non-commercial, and by any means.  |
| 26 | //  |
| 27 | // In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright  |
| 28 | // interest in the software to the public domain. We make this dedication for the benefit of the public at large and to  |
| 29 | // the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in  |
| 30 | // perpetuity of all present and future rights to this software under copyright law.  |
| 31 | //  |
| 32 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE  |
| 33 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE  |
| 34 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  |
| 35 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  |
| 36 | //  |
| 37 | // For more information, please refer to <http://unlicense.org>  |
| 38 |   |
| 39 | #pragma once  |
| 40 | #include <Foundation/tStandard.h>  |
| 41 | #include <Foundation/tString.h>  |
| 42 | #include <Foundation/tFixInt.h>  |
| 43 | namespace tHash  |
| 44 | {  |
| 45 |   |
| 46 |   |
| 47 | enum class tHashAlgorithm  |
| 48 | {  |
| 49 | Fast32,  |
| 50 | Jenkins32,  |
| 51 | Jenkins64,  |
| 52 | MD5, // MD5 is 128 bit. For cryptographic purposes, no MD5.  |
| 53 | Jenkins256,  |
| 54 | SHA256  |
| 55 | };  |
| 56 |   |
| 57 |   |
| 58 | // These initialization vectors should not be modified unless you want to break a lot of code. The zero 32bit one is  |
| 59 | // responsible for things like stringhash returning zero on empty strings. The SHA256 function uses its own default IV.  |
| 60 | const uint32 HashIV32 = 0;  |
| 61 | const uint64 HashIV64 = 0;  |
| 62 | const tuint128 HashIV128 = 0;  |
| 63 | const tuint256 HashIV256 = 0;  |
| 64 |   |
| 65 | // Normally the initial iv should not be modified for these fast hash functions. The fast hash functions are the only  |
| 66 | // ones that store the entire state in the hash. Allows you to concatenate hashes of separate strings/data-sequences  |
| 67 | // together by passing the hash of the previous call into the function again. This way you don't need to create a  |
| 68 | // concatenated string/data-set to get its hash, you simply chain multiple calls together. The fast hash functions  |
| 69 | // are the only ones that guarantee the same hash value whether computed in parts or as a single data-set.  |
| 70 | uint32 tHashDataFast32(const uint8* data, int length, uint32 iv = HashIV32);  |
| 71 | uint32 tHashStringFast32(const char*, uint32 iv = HashIV32);  |
| 72 | uint32 tHashStringFast32(const char8_t*, uint32 iv = HashIV32);  |
| 73 | uint32 tHashStringFast32(const tString&, uint32 iv = HashIV32);  |
| 74 | uint32 tHashString(const char*);  |
| 75 | uint32 tHashString(const char8_t*);  |
| 76 |   |
| 77 | // The CT (Compile Time) variant uses the fast-hash algorithm. It is super handy for use in the 'case' part of switch  |
| 78 | // statements or any time you know the string literal explicitly. In these cases the compiler can do all the work.  |
| 79 | constexpr uint32 tHashCT(const char*, uint32 iv = HashIV32);  |
| 80 | constexpr uint32 tHashCT(const char8_t*, uint32 iv = HashIV32);  |
| 81 |   |
| 82 | // The HashData32/64/128/256 and variants do _not_ guarantee the same hash value if they are chained together compared  |
| 83 | // to the hash of the same data computed as a single block. This is because the entire state is not stored in the hash  |
| 84 | // itself since these are much better hash functions than the Fast32 versions. Chaining is still useful as uniqueness is  |
| 85 | // still guaranteed and if any data changes in any of the sources the end result will vary. Chaining is performed in the  |
| 86 | // same manner as HashDataFast32. Algorithms in use for below functions:  |
| 87 | //  |
| 88 | // tHash*32: Robert J. Jenkins Jr., 1997. See http://burtleburtle.net/bob/hash/evahash.html  |
| 89 | // tHash*64: Robert J. Jenkins Jr., 1997. See http://burtleburtle.net/bob/hash/evahash.html  |
| 90 | // tHash*128: MD5. Not cryptographically secure any more.  |
| 91 | // tHash*256: Robert J. Jenkins Jr., 1997. See http://burtleburtle.net/bob/hash/evahash.html  |
| 92 | //  |
| 93 | // If you want SHA-256 call it directly.  |
| 94 | // If you want MD5 call it directly with the default default MD5 initialization vector.  |
| 95 | uint32 tHashData32(const uint8* data, int length, uint32 iv = HashIV32);  |
| 96 | uint32 tHashString32(const char*, uint32 iv = HashIV32);  |
| 97 | uint32 tHashString32(const tString&, uint32 iv = HashIV32);  |
| 98 |   |
| 99 | uint64 tHashData64(const uint8* data, int length, uint64 iv = HashIV64);  |
| 100 | uint64 tHashString64(const char*, uint64 iv = HashIV64);  |
| 101 | uint64 tHashString64(const tString&, uint64 iv = HashIV64);  |
| 102 |   |
| 103 | tuint128 tHashData128(const uint8* data, int length, tuint128 iv = HashIV128);  |
| 104 | tuint128 tHashString128(const char*, tuint128 iv = HashIV128);  |
| 105 | tuint128 tHashString128(const tString&, tuint128 iv = HashIV128);  |
| 106 |   |
| 107 | tuint256 tHashData256(const uint8* data, int length, tuint256 iv = HashIV256);  |
| 108 | tuint256 tHashString256(const char*, tuint256 iv = HashIV256);  |
| 109 | tuint256 tHashString256(const tString&, tuint256 iv = HashIV256);  |
| 110 |   |
| 111 | // MD5 is _not_ to be used for cryptographic purposes. The MD5 functions are used by the HashData128 functions but with  |
| 112 | // a non-standard default iv. The direct MD5 hash functions below use a very specific default initialization vector.   |
| 113 | // We allow it to be specified so you can still esily chain using the previous hash as the next iv.  |
| 114 | const tuint128 HashIVMD5("67452301" "efcdab89" "98badcfe" "10325476" , 16);  |
| 115 | tuint128 tHashDataMD5(const uint8* data, int length, tuint128 iv = HashIVMD5);  |
| 116 | tuint128 tHashStringMD5(const char*, tuint128 iv = HashIVMD5);  |
| 117 | tuint128 tHashStringMD5(const tString&, tuint128 iv = HashIVMD5);  |
| 118 |   |
| 119 | // For the SHA256 functions there is a very specific initialization vector supplied. This is defined in  |
| 120 | // https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf  |
| 121 | // We allow it to be specified so you can still chain using the previous hash.  |
| 122 | const tuint256 HashIVSHA256("6a09e667" "bb67ae85" "3c6ef372" "a54ff53a" "510e527f" "9b05688c" "1f83d9ab" "5be0cd19" , 16);  |
| 123 | tuint256 tHashDataSHA256(const uint8* data, int length, tuint256 iv = HashIVSHA256);  |
| 124 | tuint256 tHashStringSHA256(const char*, tuint256 iv = HashIVSHA256);  |
| 125 | tuint256 tHashStringSHA256(const tString&, tuint256 iv = HashIVSHA256);  |
| 126 |   |
| 127 |   |
| 128 | // Implementation below this line.  |
| 129 |   |
| 130 |   |
| 131 | inline uint32 tHashStringFast32(const char* string, uint32 iv)  |
| 132 | {  |
| 133 | if (!string)  |
| 134 | return 0;  |
| 135 | return tHashDataFast32(data: (uint8*)string, length: tStd::tStrlen(s: string), iv);  |
| 136 | }  |
| 137 |   |
| 138 |   |
| 139 | inline uint32 tHashStringFast32(const char8_t* string, uint32 iv)  |
| 140 | {  |
| 141 | return tHashStringFast32(string: (const char*)string, iv);  |
| 142 | }  |
| 143 |   |
| 144 |   |
| 145 | // This (compile-time) constant expression relies on the odometer-style looping of unsigned ints to compute the hash.  |
| 146 | // Since it's inline, you may need to pragma warning(disable:4307), which warns of const integral overflow.  |
| 147 | // For the below functions, char* is for ASCII strings, char8_t is for UTF-8. The UTF-8 versions fallback to the regular  |
| 148 | // versions as these work fine for UTF-8 strings.  |
| 149 | inline constexpr uint32 tHashCT(const char* s, uint32 hash) { return *s ? tHashCT(s: s + 1, hash: hash + (hash << 5) + uint8(*s)) : hash; }  |
| 150 | inline constexpr uint32 tHashCT(const char8_t* s, uint32 hash) { return *s ? tHashCT(s: s + 1, hash: hash + (hash << 5) + uint8(*s)) : hash; }  |
| 151 | inline uint32 tHashStringFast32(const tString& s, uint32 iv) { return tHashStringFast32(string: s.Chars(), iv); }  |
| 152 | inline uint32 tHashString(const char* s) { return tHashStringFast32(string: s); }  |
| 153 | inline uint32 tHashString(const char8_t* s) { return tHashStringFast32(string: s); }  |
| 154 | inline uint32 tHashString32(const char* string, uint32 iv) { return tHashData32(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 155 | inline uint32 tHashString32(const char8_t* string, uint32 iv) { return tHashData32(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 156 | inline uint32 tHashString32(const tString& s, uint32 iv) { return tHashString32(string: s.Chars(), iv); }  |
| 157 | inline uint64 tHashString64(const char* string, uint64 iv) { return tHashData64(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 158 | inline uint64 tHashString64(const char8_t* string, uint64 iv) { return tHashData64(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 159 | inline uint64 tHashString64(const tString& s, uint64 iv) { return tHashString64(string: s.Chars(), iv); }  |
| 160 | inline tuint128 tHashStringMD5(const char* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 161 | inline tuint128 tHashStringMD5(const char8_t* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 162 | inline tuint128 tHashStringMD5(const tString& s, tuint128 iv) { return tHashStringMD5(string: s.Chars(), iv); }  |
| 163 | inline tuint128 tHashData128(const uint8* data, int length, tuint128 iv) { return tHashDataMD5(data, length, iv); }  |
| 164 | inline tuint128 tHashString128(const char* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 165 | inline tuint128 tHashString128(const char8_t* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 166 | inline tuint128 tHashString128(const tString& s, tuint128 iv) { return tHashStringMD5(string: s.Chars(), iv); }  |
| 167 | inline tuint256 tHashString256(const char* string, tuint256 iv) { return tHashData256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 168 | inline tuint256 tHashString256(const char8_t* string, tuint256 iv) { return tHashData256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 169 | inline tuint256 tHashString256(const tString& s, tuint256 iv) { return tHashString256(string: s.Chars(), iv); }  |
| 170 | inline tuint256 tHashStringSHA256(const char* string, tuint256 iv) { return tHashDataSHA256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 171 | inline tuint256 tHashStringSHA256(const char8_t* string, tuint256 iv) { return tHashDataSHA256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); }  |
| 172 | inline tuint256 tHashStringSHA256(const tString& s, tuint256 iv) { return tHashStringSHA256(string: s.Chars(), iv); }  |
| 173 |   |
| 174 |   |
| 175 | }  |
| 176 | |