1// tHash.h 
2// 
3// Hash functions for various kinds of data. Using 64 or 256 bit versions if you want to avoid collisions. There are two 
4// 32 bit hash functions. A fast version used for most string hashes, and a slower but better version. All functions 
5// return the supplied initialization vector(iv) if there was no data to hash. To compute a single hash from multiple 
6// data sources like strings, binary data, or files, you do NOT need to consolidate all the source data into one buffer 
7// first. Just set the initialization vector to the hash computed from the previous step. 
8// 
9// Copyright (c) 2004-2006, 2015, 2017, 2019, 2021, 2023 Tristan Grimmer. 
10// Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby 
11// granted, provided that the above copyright notice and this permission notice appear in all copies. 
12// 
13// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL 
14// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 
15// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 
16// AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 
17// PERFORMANCE OF THIS SOFTWARE. 
18// 
19// The SHA-256 implementation is taken from https://github.com/amosnier/sha-2. All functions and constants in the 
20// tHash_SHA256 namespace should be considered unencumbered as per Alain Mosnier's licence file: 
21// 
22// This is free and unencumbered software released into the public domain. 
23// 
24// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form 
25// or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. 
26// 
27// In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright 
28// interest in the software to the public domain. We make this dedication for the benefit of the public at large and to 
29// the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in 
30// perpetuity of all present and future rights to this software under copyright law. 
31// 
32// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 
33// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE 
34// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
35// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
36// 
37// For more information, please refer to <http://unlicense.org> 
38 
39#pragma once 
40#include <Foundation/tStandard.h> 
41#include <Foundation/tString.h> 
42#include <Foundation/tFixInt.h> 
43namespace tHash 
44
45 
46 
47enum class tHashAlgorithm 
48
49 Fast32
50 Jenkins32
51 Jenkins64
52 MD5, // MD5 is 128 bit. For cryptographic purposes, no MD5. 
53 Jenkins256
54 SHA256 
55}; 
56 
57 
58// These initialization vectors should not be modified unless you want to break a lot of code. The zero 32bit one is 
59// responsible for things like stringhash returning zero on empty strings. The SHA256 function uses its own default IV. 
60const uint32 HashIV32 = 0
61const uint64 HashIV64 = 0
62const tuint128 HashIV128 = 0
63const tuint256 HashIV256 = 0
64 
65// Normally the initial iv should not be modified for these fast hash functions. The fast hash functions are the only 
66// ones that store the entire state in the hash. Allows you to concatenate hashes of separate strings/data-sequences 
67// together by passing the hash of the previous call into the function again. This way you don't need to create a 
68// concatenated string/data-set to get its hash, you simply chain multiple calls together. The fast hash functions 
69// are the only ones that guarantee the same hash value whether computed in parts or as a single data-set. 
70uint32 tHashDataFast32(const uint8* data, int length, uint32 iv = HashIV32); 
71uint32 tHashStringFast32(const char*, uint32 iv = HashIV32); 
72uint32 tHashStringFast32(const char8_t*, uint32 iv = HashIV32); 
73uint32 tHashStringFast32(const tString&, uint32 iv = HashIV32); 
74uint32 tHashString(const char*); 
75uint32 tHashString(const char8_t*); 
76 
77// The CT (Compile Time) variant uses the fast-hash algorithm. It is super handy for use in the 'case' part of switch 
78// statements or any time you know the string literal explicitly. In these cases the compiler can do all the work. 
79constexpr uint32 tHashCT(const char*, uint32 iv = HashIV32); 
80constexpr uint32 tHashCT(const char8_t*, uint32 iv = HashIV32); 
81 
82// The HashData32/64/128/256 and variants do _not_ guarantee the same hash value if they are chained together compared 
83// to the hash of the same data computed as a single block. This is because the entire state is not stored in the hash 
84// itself since these are much better hash functions than the Fast32 versions. Chaining is still useful as uniqueness is 
85// still guaranteed and if any data changes in any of the sources the end result will vary. Chaining is performed in the 
86// same manner as HashDataFast32. Algorithms in use for below functions: 
87// 
88// tHash*32: Robert J. Jenkins Jr., 1997. See http://burtleburtle.net/bob/hash/evahash.html 
89// tHash*64: Robert J. Jenkins Jr., 1997. See http://burtleburtle.net/bob/hash/evahash.html 
90// tHash*128: MD5. Not cryptographically secure any more. 
91// tHash*256: Robert J. Jenkins Jr., 1997. See http://burtleburtle.net/bob/hash/evahash.html 
92// 
93// If you want SHA-256 call it directly. 
94// If you want MD5 call it directly with the default default MD5 initialization vector. 
95uint32 tHashData32(const uint8* data, int length, uint32 iv = HashIV32); 
96uint32 tHashString32(const char*, uint32 iv = HashIV32); 
97uint32 tHashString32(const tString&, uint32 iv = HashIV32); 
98 
99uint64 tHashData64(const uint8* data, int length, uint64 iv = HashIV64); 
100uint64 tHashString64(const char*, uint64 iv = HashIV64); 
101uint64 tHashString64(const tString&, uint64 iv = HashIV64); 
102 
103tuint128 tHashData128(const uint8* data, int length, tuint128 iv = HashIV128); 
104tuint128 tHashString128(const char*, tuint128 iv = HashIV128); 
105tuint128 tHashString128(const tString&, tuint128 iv = HashIV128); 
106 
107tuint256 tHashData256(const uint8* data, int length, tuint256 iv = HashIV256); 
108tuint256 tHashString256(const char*, tuint256 iv = HashIV256); 
109tuint256 tHashString256(const tString&, tuint256 iv = HashIV256); 
110 
111// MD5 is _not_ to be used for cryptographic purposes. The MD5 functions are used by the HashData128 functions but with 
112// a non-standard default iv. The direct MD5 hash functions below use a very specific default initialization vector.  
113// We allow it to be specified so you can still esily chain using the previous hash as the next iv. 
114const tuint128 HashIVMD5("67452301" "efcdab89" "98badcfe" "10325476", 16); 
115tuint128 tHashDataMD5(const uint8* data, int length, tuint128 iv = HashIVMD5); 
116tuint128 tHashStringMD5(const char*, tuint128 iv = HashIVMD5); 
117tuint128 tHashStringMD5(const tString&, tuint128 iv = HashIVMD5); 
118 
119// For the SHA256 functions there is a very specific initialization vector supplied. This is defined in 
120// https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf 
121// We allow it to be specified so you can still chain using the previous hash. 
122const tuint256 HashIVSHA256("6a09e667" "bb67ae85" "3c6ef372" "a54ff53a" "510e527f" "9b05688c" "1f83d9ab" "5be0cd19", 16); 
123tuint256 tHashDataSHA256(const uint8* data, int length, tuint256 iv = HashIVSHA256); 
124tuint256 tHashStringSHA256(const char*, tuint256 iv = HashIVSHA256); 
125tuint256 tHashStringSHA256(const tString&, tuint256 iv = HashIVSHA256); 
126 
127 
128// Implementation below this line. 
129 
130 
131inline uint32 tHashStringFast32(const char* string, uint32 iv
132
133 if (!string
134 return 0
135 return tHashDataFast32(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); 
136
137 
138 
139inline uint32 tHashStringFast32(const char8_t* string, uint32 iv
140
141 return tHashStringFast32(string: (const char*)string, iv); 
142
143 
144 
145// This (compile-time) constant expression relies on the odometer-style looping of unsigned ints to compute the hash. 
146// Since it's inline, you may need to pragma warning(disable:4307), which warns of const integral overflow. 
147// For the below functions, char* is for ASCII strings, char8_t is for UTF-8. The UTF-8 versions fallback to the regular 
148// versions as these work fine for UTF-8 strings. 
149inline constexpr uint32 tHashCT(const char* s, uint32 hash) { return *s ? tHashCT(s: s + 1, hash: hash + (hash << 5) + uint8(*s)) : hash; } 
150inline constexpr uint32 tHashCT(const char8_t* s, uint32 hash) { return *s ? tHashCT(s: s + 1, hash: hash + (hash << 5) + uint8(*s)) : hash; } 
151inline uint32 tHashStringFast32(const tString& s, uint32 iv) { return tHashStringFast32(string: s.Chars(), iv); } 
152inline uint32 tHashString(const char* s) { return tHashStringFast32(string: s); } 
153inline uint32 tHashString(const char8_t* s) { return tHashStringFast32(string: s); } 
154inline uint32 tHashString32(const char* string, uint32 iv) { return tHashData32(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
155inline uint32 tHashString32(const char8_t* string, uint32 iv) { return tHashData32(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
156inline uint32 tHashString32(const tString& s, uint32 iv) { return tHashString32(string: s.Chars(), iv); } 
157inline uint64 tHashString64(const char* string, uint64 iv) { return tHashData64(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
158inline uint64 tHashString64(const char8_t* string, uint64 iv) { return tHashData64(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
159inline uint64 tHashString64(const tString& s, uint64 iv) { return tHashString64(string: s.Chars(), iv); } 
160inline tuint128 tHashStringMD5(const char* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
161inline tuint128 tHashStringMD5(const char8_t* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
162inline tuint128 tHashStringMD5(const tString& s, tuint128 iv) { return tHashStringMD5(string: s.Chars(), iv); } 
163inline tuint128 tHashData128(const uint8* data, int length, tuint128 iv) { return tHashDataMD5(data, length, iv); } 
164inline tuint128 tHashString128(const char* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
165inline tuint128 tHashString128(const char8_t* string, tuint128 iv) { return tHashDataMD5(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
166inline tuint128 tHashString128(const tString& s, tuint128 iv) { return tHashStringMD5(string: s.Chars(), iv); } 
167inline tuint256 tHashString256(const char* string, tuint256 iv) { return tHashData256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
168inline tuint256 tHashString256(const char8_t* string, tuint256 iv) { return tHashData256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
169inline tuint256 tHashString256(const tString& s, tuint256 iv) { return tHashString256(string: s.Chars(), iv); } 
170inline tuint256 tHashStringSHA256(const char* string, tuint256 iv) { return tHashDataSHA256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
171inline tuint256 tHashStringSHA256(const char8_t* string, tuint256 iv) { return tHashDataSHA256(data: (uint8*)string, length: tStd::tStrlen(s: string), iv); } 
172inline tuint256 tHashStringSHA256(const tString& s, tuint256 iv) { return tHashStringSHA256(string: s.Chars(), iv); } 
173 
174 
175
176