1/* bcdec.h - v0.97 
2 provides functions to decompress blocks of BC compressed images 
3 written by Sergii "iOrange" Kudlai in 2022 
4 
5 This library does not allocate memory and is trying to use as less stack as possible 
6 
7 The library was never optimized specifically for speed but for the overall size 
8 it has zero external dependencies and is not using any runtime functions 
9 
10 Supported BC formats: 
11 BC1 (also known as DXT1) + it's "binary alpha" variant BC1A (DXT1A) 
12 BC2 (also known as DXT3) 
13 BC3 (also known as DXT5) 
14 BC4 (also known as ATI1N) 
15 BC5 (also known as ATI2N) 
16 BC6H (HDR format) 
17 BC7 
18 
19 BC1/BC2/BC3/BC7 are expected to decompress into 4*4 RGBA blocks 8bit per component (32bit pixel) 
20 BC4/BC5 are expected to decompress into 4*4 R/RG blocks 8bit per component (8bit and 16bit pixel) 
21 BC6H is expected to decompress into 4*4 RGB blocks of either 32bit float or 16bit "half" per 
22 component (96bit or 48bit pixel) 
23 
24 For more info, issues and suggestions please visit https://github.com/iOrange/bcdec 
25 
26 CREDITS: 
27 Aras Pranckevicius (@aras-p) - BC1/BC3 decoders optimizations (up to 3x the speed) 
28 - BC6H/BC7 bits pulling routines optimizations 
29 - optimized BC6H by moving unquantize out of the loop 
30 - Split BC6H decompression function into 'half' and 
31 'float' variants 
32 
33 Michael Schmidt (@RunDevelopment) - Found better "magic" coefficients for integer interpolation 
34 of reference colors in BC1 color block, that match with 
35 the floating point interpolation. This also made it faster 
36 than integer division by 3! 
37 
38 bugfixes: 
39 @linkmauve 
40 
41 LICENSE: See end of file for license information. 
42*/ 
43 
44#ifndef BCDEC_HEADER_INCLUDED 
45#define BCDEC_HEADER_INCLUDED 
46 
47#define BCDEC_VERSION_MAJOR 0 
48#define BCDEC_VERSION_MINOR 97 
49 
50/* if BCDEC_STATIC causes problems, try defining BCDECDEF to 'inline' or 'static inline' */ 
51#ifndef BCDECDEF 
52#ifdef BCDEC_STATIC 
53#define BCDECDEF static 
54#else 
55#ifdef __cplusplus 
56#define BCDECDEF extern "C" 
57#else 
58#define BCDECDEF extern 
59#endif 
60#endif 
61#endif 
62 
63/* Used information sources: 
64 https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression 
65 https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc6h-format 
66 https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format 
67 https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format-mode-reference 
68 
69 ! WARNING ! Khronos's BPTC partitions tables contain mistakes, do not use them! 
70 https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.html#BPTC 
71 
72 ! Use tables from here instead ! 
73 https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt 
74 
75 Leaving it here as it's a nice read 
76 https://fgiesen.wordpress.com/2021/10/04/gpu-bcn-decoding/ 
77 
78 Fast half to float function from here 
79 https://gist.github.com/rygorous/2144712 
80*/ 
81 
82#define BCDEC_BC1_BLOCK_SIZE 8 
83#define BCDEC_BC2_BLOCK_SIZE 16 
84#define BCDEC_BC3_BLOCK_SIZE 16 
85#define BCDEC_BC4_BLOCK_SIZE 8 
86#define BCDEC_BC5_BLOCK_SIZE 16 
87#define BCDEC_BC6H_BLOCK_SIZE 16 
88#define BCDEC_BC7_BLOCK_SIZE 16 
89 
90#define BCDEC_BC1_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC1_BLOCK_SIZE) 
91#define BCDEC_BC2_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC2_BLOCK_SIZE) 
92#define BCDEC_BC3_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC3_BLOCK_SIZE) 
93#define BCDEC_BC4_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC4_BLOCK_SIZE) 
94#define BCDEC_BC5_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC5_BLOCK_SIZE) 
95#define BCDEC_BC6H_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC6H_BLOCK_SIZE) 
96#define BCDEC_BC7_COMPRESSED_SIZE(w, h) ((((w)>>2)*((h)>>2))*BCDEC_BC7_BLOCK_SIZE) 
97 
98BCDECDEF void bcdec_bc1(const void* compressedBlock, void* decompressedBlock, int destinationPitch); 
99BCDECDEF void bcdec_bc2(const void* compressedBlock, void* decompressedBlock, int destinationPitch); 
100BCDECDEF void bcdec_bc3(const void* compressedBlock, void* decompressedBlock, int destinationPitch); 
101BCDECDEF void bcdec_bc4(const void* compressedBlock, void* decompressedBlock, int destinationPitch); 
102BCDECDEF void bcdec_bc5(const void* compressedBlock, void* decompressedBlock, int destinationPitch); 
103BCDECDEF void bcdec_bc6h_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned); 
104BCDECDEF void bcdec_bc6h_half(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned); 
105BCDECDEF void bcdec_bc7(const void* compressedBlock, void* decompressedBlock, int destinationPitch); 
106 
107#endif /* BCDEC_HEADER_INCLUDED */ 
108 
109#ifdef BCDEC_IMPLEMENTATION 
110 
111static void bcdec__color_block(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int onlyOpaqueMode) { 
112 unsigned short c0, c1; 
113 unsigned int refColors[4]; /* 0xAABBGGRR */ 
114 unsigned char* dstColors; 
115 unsigned int colorIndices; 
116 int i, j, idx; 
117 unsigned int r0, g0, b0, r1, g1, b1, r, g, b; 
118 
119 c0 = ((unsigned short*)compressedBlock)[0]; 
120 c1 = ((unsigned short*)compressedBlock)[1]; 
121 
122 /* Unpack 565 ref colors */ 
123 r0 = (c0 >> 11) & 0x1F
124 g0 = (c0 >> 5) & 0x3F
125 b0 = c0 & 0x1F
126 
127 r1 = (c1 >> 11) & 0x1F
128 g1 = (c1 >> 5) & 0x3F
129 b1 = c1 & 0x1F
130 
131 /* Expand 565 ref colors to 888 */ 
132 r = (r0 * 527 + 23) >> 6
133 g = (g0 * 259 + 33) >> 6
134 b = (b0 * 527 + 23) >> 6
135 refColors[0] = 0xFF000000 | (b << 16) | (g << 8) | r; 
136 
137 r = (r1 * 527 + 23) >> 6
138 g = (g1 * 259 + 33) >> 6
139 b = (b1 * 527 + 23) >> 6
140 refColors[1] = 0xFF000000 | (b << 16) | (g << 8) | r; 
141 
142 if (c0 > c1 || onlyOpaqueMode) { /* Standard BC1 mode (also BC3 color block uses ONLY this mode) */ 
143 /* color_2 = 2/3*color_0 + 1/3*color_1 
144 color_3 = 1/3*color_0 + 2/3*color_1 */ 
145 r = ((2 * r0 + r1) * 351 + 61) >> 7
146 g = ((2 * g0 + g1) * 2763 + 1039) >> 11
147 b = ((2 * b0 + b1) * 351 + 61) >> 7
148 refColors[2] = 0xFF000000 | (b << 16) | (g << 8) | r; 
149 
150 r = ((r0 + r1 * 2) * 351 + 61) >> 7
151 g = ((g0 + g1 * 2) * 2763 + 1039) >> 11
152 b = ((b0 + b1 * 2) * 351 + 61) >> 7
153 refColors[3] = 0xFF000000 | (b << 16) | (g << 8) | r; 
154 } else { /* Quite rare BC1A mode */ 
155 /* color_2 = 1/2*color_0 + 1/2*color_1; 
156 color_3 = 0; */ 
157 r = ((r0 + r1) * 1053 + 125) >> 8
158 g = ((g0 + g1) * 4145 + 1019) >> 11
159 b = ((b0 + b1) * 1053 + 125) >> 8
160 refColors[2] = 0xFF000000 | (b << 16) | (g << 8) | r; 
161 
162 refColors[3] = 0x00000000
163
164 
165 colorIndices = ((unsigned int*)compressedBlock)[1]; 
166 
167 /* Fill out the decompressed color block */ 
168 dstColors = (unsigned char*)decompressedBlock; 
169 for (i = 0; i < 4; ++i) { 
170 for (j = 0; j < 4; ++j) { 
171 idx = colorIndices & 0x03
172 ((unsigned int*)dstColors)[j] = refColors[idx]; 
173 colorIndices >>= 2
174
175 
176 dstColors += destinationPitch; 
177
178
179 
180static void bcdec__sharp_alpha_block(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
181 unsigned short* alpha; 
182 unsigned char* decompressed; 
183 int i, j; 
184 
185 alpha = (unsigned short*)compressedBlock; 
186 decompressed = (unsigned char*)decompressedBlock; 
187 
188 for (i = 0; i < 4; ++i) { 
189 for (j = 0; j < 4; ++j) { 
190 decompressed[j * 4] = ((alpha[i] >> (4 * j)) & 0x0F) * 17
191
192 
193 decompressed += destinationPitch; 
194
195
196 
197static void bcdec__smooth_alpha_block(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int pixelSize) { 
198 unsigned char* decompressed; 
199 unsigned char alpha[8]; 
200 int i, j; 
201 unsigned long long block, indices; 
202 
203 block = *(unsigned long long*)compressedBlock; 
204 decompressed = (unsigned char*)decompressedBlock; 
205 
206 alpha[0] = block & 0xFF
207 alpha[1] = (block >> 8) & 0xFF
208 
209 if (alpha[0] > alpha[1]) { 
210 /* 6 interpolated alpha values. */ 
211 alpha[2] = (6 * alpha[0] + alpha[1] + 1) / 7; /* 6/7*alpha_0 + 1/7*alpha_1 */ 
212 alpha[3] = (5 * alpha[0] + 2 * alpha[1] + 1) / 7; /* 5/7*alpha_0 + 2/7*alpha_1 */ 
213 alpha[4] = (4 * alpha[0] + 3 * alpha[1] + 1) / 7; /* 4/7*alpha_0 + 3/7*alpha_1 */ 
214 alpha[5] = (3 * alpha[0] + 4 * alpha[1] + 1) / 7; /* 3/7*alpha_0 + 4/7*alpha_1 */ 
215 alpha[6] = (2 * alpha[0] + 5 * alpha[1] + 1) / 7; /* 2/7*alpha_0 + 5/7*alpha_1 */ 
216 alpha[7] = ( alpha[0] + 6 * alpha[1] + 1) / 7; /* 1/7*alpha_0 + 6/7*alpha_1 */ 
217
218 else
219 /* 4 interpolated alpha values. */ 
220 alpha[2] = (4 * alpha[0] + alpha[1] + 1) / 5; /* 4/5*alpha_0 + 1/5*alpha_1 */ 
221 alpha[3] = (3 * alpha[0] + 2 * alpha[1] + 1) / 5; /* 3/5*alpha_0 + 2/5*alpha_1 */ 
222 alpha[4] = (2 * alpha[0] + 3 * alpha[1] + 1) / 5; /* 2/5*alpha_0 + 3/5*alpha_1 */ 
223 alpha[5] = ( alpha[0] + 4 * alpha[1] + 1) / 5; /* 1/5*alpha_0 + 4/5*alpha_1 */ 
224 alpha[6] = 0x00
225 alpha[7] = 0xFF
226
227 
228 indices = block >> 16
229 for (i = 0; i < 4; ++i) { 
230 for (j = 0; j < 4; ++j) { 
231 decompressed[j * pixelSize] = alpha[indices & 0x07]; 
232 indices >>= 3
233
234 
235 decompressed += destinationPitch; 
236
237
238 
239typedef struct bcdec__bitstream { 
240 unsigned long long low; 
241 unsigned long long high; 
242} bcdec__bitstream_t; 
243 
244static int bcdec__bitstream_read_bits(bcdec__bitstream_t* bstream, int numBits) { 
245 unsigned int mask = (1 << numBits) - 1
246 /* Read the low N bits */ 
247 unsigned int bits = (bstream->low & mask); 
248 
249 bstream->low >>= numBits; 
250 /* Put the low N bits of "high" into the high 64-N bits of "low". */ 
251 bstream->low |= (bstream->high & mask) << (sizeof(bstream->high) * 8 - numBits); 
252 bstream->high >>= numBits; 
253  
254 return bits; 
255
256 
257static int bcdec__bitstream_read_bit(bcdec__bitstream_t* bstream) { 
258 return bcdec__bitstream_read_bits(bstream, 1); 
259
260 
261/* reversed bits pulling, used in BC6H decoding 
262 why ?? just why ??? */ 
263static int bcdec__bitstream_read_bits_r(bcdec__bitstream_t* bstream, int numBits) { 
264 int bits = bcdec__bitstream_read_bits(bstream, numBits); 
265 /* Reverse the bits. */ 
266 int result = 0
267 while (numBits--) { 
268 result <<= 1
269 result |= (bits & 1); 
270 bits >>= 1
271
272 return result; 
273
274 
275 
276 
277BCDECDEF void bcdec_bc1(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
278 bcdec__color_block(compressedBlock, decompressedBlock, destinationPitch, 0); 
279
280 
281BCDECDEF void bcdec_bc2(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
282 bcdec__color_block(((char*)compressedBlock) + 8, decompressedBlock, destinationPitch, 1); 
283 bcdec__sharp_alpha_block(compressedBlock, ((char*)decompressedBlock) + 3, destinationPitch); 
284
285 
286BCDECDEF void bcdec_bc3(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
287 bcdec__color_block(((char*)compressedBlock) + 8, decompressedBlock, destinationPitch, 1); 
288 bcdec__smooth_alpha_block(compressedBlock, ((char*)decompressedBlock) + 3, destinationPitch, 4); 
289
290 
291BCDECDEF void bcdec_bc4(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
292 bcdec__smooth_alpha_block(compressedBlock, decompressedBlock, destinationPitch, 1); 
293
294 
295BCDECDEF void bcdec_bc5(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
296 bcdec__smooth_alpha_block(compressedBlock, decompressedBlock, destinationPitch, 2); 
297 bcdec__smooth_alpha_block(((char*)compressedBlock) + 8, ((char*)decompressedBlock) + 1, destinationPitch, 2); 
298
299 
300/* http://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend */ 
301static int bcdec__extend_sign(int val, int bits) { 
302 return (val << (32 - bits)) >> (32 - bits); 
303
304 
305static int bcdec__transform_inverse(int val, int a0, int bits, int isSigned) { 
306 /* If the precision of A0 is "p" bits, then the transform algorithm is: 
307 B0 = (B0 + A0) & ((1 << p) - 1) */ 
308 val = (val + a0) & ((1 << bits) - 1); 
309 if (isSigned) { 
310 val = bcdec__extend_sign(val, bits); 
311
312 return val; 
313
314 
315/* pretty much copy-paste from documentation */ 
316static int bcdec__unquantize(int val, int bits, int isSigned) { 
317 int unq, s = 0
318 
319 if (!isSigned) { 
320 if (bits >= 15) { 
321 unq = val; 
322 } else if (!val) { 
323 unq = 0
324 } else if (val == ((1 << bits) - 1)) { 
325 unq = 0xFFFF
326 } else
327 unq = ((val << 16) + 0x8000) >> bits; 
328
329 } else
330 if (bits >= 16) { 
331 unq = val; 
332 } else
333 if (val < 0) { 
334 s = 1
335 val = -val; 
336
337 
338 if (val == 0) { 
339 unq = 0
340 } else if (val >= ((1 << (bits - 1)) - 1)) { 
341 unq = 0x7FFF
342 } else
343 unq = ((val << 15) + 0x4000) >> (bits - 1); 
344
345 
346 if (s) { 
347 unq = -unq; 
348
349
350
351 return unq; 
352
353 
354static int bcdec__interpolate(int a, int b, int* weights, int index) { 
355 return (a * (64 - weights[index]) + b * weights[index] + 32) >> 6
356
357 
358static unsigned short bcdec__finish_unquantize(int val, int isSigned) { 
359 int s; 
360 
361 if (!isSigned) { 
362 return (unsigned short)((val * 31) >> 6); /* scale the magnitude by 31 / 64 */ 
363 } else
364 val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5; /* scale the magnitude by 31 / 32 */ 
365 s = 0
366 if (val < 0) { 
367 s = 0x8000
368 val = -val; 
369
370 return (unsigned short)(s | val); 
371
372
373 
374/* modified half_to_float_fast4 from https://gist.github.com/rygorous/2144712 */ 
375static float bcdec__half_to_float_quick(unsigned short half) { 
376 typedef union
377 unsigned int u; 
378 float f; 
379 } FP32; 
380 
381 static const FP32 magic = { 113 << 23 }; 
382 static const unsigned int shifted_exp = 0x7c00 << 13; /* exponent mask after shift */ 
383 FP32 o; 
384 unsigned int exp; 
385 
386 o.u = (half & 0x7fff) << 13; /* exponent/mantissa bits */ 
387 exp = shifted_exp & o.u; /* just the exponent */ 
388 o.u += (127 - 15) << 23; /* exponent adjust */ 
389 
390 /* handle exponent special cases */ 
391 if (exp == shifted_exp) { /* Inf/NaN? */ 
392 o.u += (128 - 16) << 23; /* extra exp adjust */ 
393 } else if (exp == 0) { /* Zero/Denormal? */ 
394 o.u += 1 << 23; /* extra exp adjust */ 
395 o.f -= magic.f; /* renormalize */ 
396
397 
398 o.u |= (half & 0x8000) << 16; /* sign bit */ 
399 return o.f; 
400
401 
402BCDECDEF void bcdec_bc6h_half(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned) { 
403 static char actual_bits_count[4][14] = { 
404 { 10, 7, 11, 11, 11, 9, 8, 8, 8, 6, 10, 11, 12, 16 }, /* W */ 
405 { 5, 6, 5, 4, 4, 5, 6, 5, 5, 6, 10, 9, 8, 4 }, /* dR */ 
406 { 5, 6, 4, 5, 4, 5, 5, 6, 5, 6, 10, 9, 8, 4 }, /* dG */ 
407 { 5, 6, 4, 4, 5, 5, 5, 5, 6, 6, 10, 9, 8, 4 } /* dB */ 
408 }; 
409 
410 /* There are 32 possible partition sets for a two-region tile. 
411 Each 4x4 block represents a single shape. 
412 Here also every fix-up index has MSB bit set. */ 
413 static unsigned char partition_sets[32][4][4] = { 
414 { {128, 0, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 1, 129} }, /* 0 */ 
415 { {128, 0, 0, 1}, {0, 0, 0, 1}, { 0, 0, 0, 1}, {0, 0, 0, 129} }, /* 1 */ 
416 { {128, 1, 1, 1}, {0, 1, 1, 1}, { 0, 1, 1, 1}, {0, 1, 1, 129} }, /* 2 */ 
417 { {128, 0, 0, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 1, 1, 129} }, /* 3 */ 
418 { {128, 0, 0, 0}, {0, 0, 0, 1}, { 0, 0, 0, 1}, {0, 0, 1, 129} }, /* 4 */ 
419 { {128, 0, 1, 1}, {0, 1, 1, 1}, { 0, 1, 1, 1}, {1, 1, 1, 129} }, /* 5 */ 
420 { {128, 0, 0, 1}, {0, 0, 1, 1}, { 0, 1, 1, 1}, {1, 1, 1, 129} }, /* 6 */ 
421 { {128, 0, 0, 0}, {0, 0, 0, 1}, { 0, 0, 1, 1}, {0, 1, 1, 129} }, /* 7 */ 
422 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 1}, {0, 0, 1, 129} }, /* 8 */ 
423 { {128, 0, 1, 1}, {0, 1, 1, 1}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 9 */ 
424 { {128, 0, 0, 0}, {0, 0, 0, 1}, { 0, 1, 1, 1}, {1, 1, 1, 129} }, /* 10 */ 
425 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 1}, {0, 1, 1, 129} }, /* 11 */ 
426 { {128, 0, 0, 1}, {0, 1, 1, 1}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 12 */ 
427 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 13 */ 
428 { {128, 0, 0, 0}, {1, 1, 1, 1}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 14 */ 
429 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 0}, {1, 1, 1, 129} }, /* 15 */ 
430 { {128, 0, 0, 0}, {1, 0, 0, 0}, { 1, 1, 1, 0}, {1, 1, 1, 129} }, /* 16 */ 
431 { {128, 1, 129, 1}, {0, 0, 0, 1}, { 0, 0, 0, 0}, {0, 0, 0, 0} }, /* 17 */ 
432 { {128, 0, 0, 0}, {0, 0, 0, 0}, {129, 0, 0, 0}, {1, 1, 1, 0} }, /* 18 */ 
433 { {128, 1, 129, 1}, {0, 0, 1, 1}, { 0, 0, 0, 1}, {0, 0, 0, 0} }, /* 19 */ 
434 { {128, 0, 129, 1}, {0, 0, 0, 1}, { 0, 0, 0, 0}, {0, 0, 0, 0} }, /* 20 */ 
435 { {128, 0, 0, 0}, {1, 0, 0, 0}, {129, 1, 0, 0}, {1, 1, 1, 0} }, /* 21 */ 
436 { {128, 0, 0, 0}, {0, 0, 0, 0}, {129, 0, 0, 0}, {1, 1, 0, 0} }, /* 22 */ 
437 { {128, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 129} }, /* 23 */ 
438 { {128, 0, 129, 1}, {0, 0, 0, 1}, { 0, 0, 0, 1}, {0, 0, 0, 0} }, /* 24 */ 
439 { {128, 0, 0, 0}, {1, 0, 0, 0}, {129, 0, 0, 0}, {1, 1, 0, 0} }, /* 25 */ 
440 { {128, 1, 129, 0}, {0, 1, 1, 0}, { 0, 1, 1, 0}, {0, 1, 1, 0} }, /* 26 */ 
441 { {128, 0, 129, 1}, {0, 1, 1, 0}, { 0, 1, 1, 0}, {1, 1, 0, 0} }, /* 27 */ 
442 { {128, 0, 0, 1}, {0, 1, 1, 1}, {129, 1, 1, 0}, {1, 0, 0, 0} }, /* 28 */ 
443 { {128, 0, 0, 0}, {1, 1, 1, 1}, {129, 1, 1, 1}, {0, 0, 0, 0} }, /* 29 */ 
444 { {128, 1, 129, 1}, {0, 0, 0, 1}, { 1, 0, 0, 0}, {1, 1, 1, 0} }, /* 30 */ 
445 { {128, 0, 129, 1}, {1, 0, 0, 1}, { 1, 0, 0, 1}, {1, 1, 0, 0} } /* 31 */ 
446 }; 
447 
448 static int aWeight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; 
449 static int aWeight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; 
450 
451 bcdec__bitstream_t bstream; 
452 int mode, partition, numPartitions, i, j, partitionSet, indexBits, index, ep_i, actualBits0Mode; 
453 int r[4], g[4], b[4]; /* wxyz */ 
454 unsigned short* decompressed; 
455 int* weights; 
456 
457 decompressed = (unsigned short*)decompressedBlock; 
458 
459 bstream.low = ((unsigned long long*)compressedBlock)[0]; 
460 bstream.high = ((unsigned long long*)compressedBlock)[1]; 
461 
462 r[0] = r[1] = r[2] = r[3] = 0
463 g[0] = g[1] = g[2] = g[3] = 0
464 b[0] = b[1] = b[2] = b[3] = 0
465 
466 mode = bcdec__bitstream_read_bits(&bstream, 2); 
467 if (mode > 1) { 
468 mode |= (bcdec__bitstream_read_bits(&bstream, 3) << 2); 
469
470 
471 /* modes >= 11 (10 in my code) are using 0 one, others will read it from the bitstream */ 
472 partition = 0
473 
474 switch (mode) { 
475 /* mode 1 */ 
476 case 0b00: { 
477 /* Partitition indices: 46 bits 
478 Partition: 5 bits 
479 Color Endpoints: 75 bits (10.555, 10.555, 10.555) */ 
480 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
481 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
482 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
483 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
484 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
485 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
486 r[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* rx[4:0] */ 
487 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
488 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
489 g[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* gx[4:0] */ 
490 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
491 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
492 b[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* bx[4:0] */ 
493 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
494 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
495 r[2] |= bcdec__bitstream_read_bits(&bstream, 5); /* ry[4:0] */ 
496 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
497 r[3] |= bcdec__bitstream_read_bits(&bstream, 5); /* rz[4:0] */ 
498 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
499 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
500 mode = 0
501 } break
502 
503 /* mode 2 */ 
504 case 0b01: { 
505 /* Partitition indices: 46 bits 
506 Partition: 5 bits 
507 Color Endpoints: 75 bits (7666, 7666, 7666) */ 
508 g[2] |= bcdec__bitstream_read_bit(&bstream) << 5; /* gy[5] */ 
509 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
510 g[3] |= bcdec__bitstream_read_bit(&bstream) << 5; /* gz[5] */ 
511 r[0] |= bcdec__bitstream_read_bits(&bstream, 7); /* rw[6:0] */ 
512 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
513 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
514 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
515 g[0] |= bcdec__bitstream_read_bits(&bstream, 7); /* gw[6:0] */ 
516 b[2] |= bcdec__bitstream_read_bit(&bstream) << 5; /* by[5] */ 
517 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
518 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
519 b[0] |= bcdec__bitstream_read_bits(&bstream, 7); /* bw[6:0] */ 
520 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
521 b[3] |= bcdec__bitstream_read_bit(&bstream) << 5; /* bz[5] */ 
522 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
523 r[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* rx[5:0] */ 
524 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
525 g[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* gx[5:0] */ 
526 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
527 b[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* bx[5:0] */ 
528 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
529 r[2] |= bcdec__bitstream_read_bits(&bstream, 6); /* ry[5:0] */ 
530 r[3] |= bcdec__bitstream_read_bits(&bstream, 6); /* rz[5:0] */ 
531 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
532 mode = 1
533 } break
534 
535 /* mode 3 */ 
536 case 0b00010: { 
537 /* Partitition indices: 46 bits 
538 Partition: 5 bits 
539 Color Endpoints: 72 bits (11.555, 11.444, 11.444) */ 
540 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
541 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
542 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
543 r[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* rx[4:0] */ 
544 r[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* rw[10] */ 
545 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
546 g[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* gx[3:0] */ 
547 g[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* gw[10] */ 
548 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
549 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
550 b[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* bx[3:0] */ 
551 b[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* bw[10] */ 
552 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
553 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
554 r[2] |= bcdec__bitstream_read_bits(&bstream, 5); /* ry[4:0] */ 
555 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
556 r[3] |= bcdec__bitstream_read_bits(&bstream, 5); /* rz[4:0] */ 
557 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
558 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
559 mode = 2
560 } break
561 
562 /* mode 4 */ 
563 case 0b00110: { 
564 /* Partitition indices: 46 bits 
565 Partition: 5 bits 
566 Color Endpoints: 72 bits (11.444, 11.555, 11.444) */ 
567 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
568 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
569 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
570 r[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* rx[3:0] */ 
571 r[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* rw[10] */ 
572 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
573 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
574 g[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* gx[4:0] */ 
575 g[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* gw[10] */ 
576 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
577 b[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* bx[3:0] */ 
578 b[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* bw[10] */ 
579 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
580 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
581 r[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* ry[3:0] */ 
582 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
583 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
584 r[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* rz[3:0] */ 
585 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
586 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
587 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
588 mode = 3
589 } break
590 
591 /* mode 5 */ 
592 case 0b01010: { 
593 /* Partitition indices: 46 bits 
594 Partition: 5 bits 
595 Color Endpoints: 72 bits (11.444, 11.444, 11.555) */ 
596 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
597 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
598 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
599 r[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* rx[3:0] */ 
600 r[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* rw[10] */ 
601 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
602 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
603 g[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* gx[3:0] */ 
604 g[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* gw[10] */ 
605 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
606 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
607 b[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* bx[4:0] */ 
608 b[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* bw[10] */ 
609 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
610 r[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* ry[3:0] */ 
611 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
612 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
613 r[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* rz[3:0] */ 
614 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */  
615 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
616 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
617 mode = 4
618 } break
619 
620 /* mode 6 */ 
621 case 0b01110: { 
622 /* Partitition indices: 46 bits 
623 Partition: 5 bits 
624 Color Endpoints: 72 bits (9555, 9555, 9555) */ 
625 r[0] |= bcdec__bitstream_read_bits(&bstream, 9); /* rw[8:0] */ 
626 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
627 g[0] |= bcdec__bitstream_read_bits(&bstream, 9); /* gw[8:0] */ 
628 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
629 b[0] |= bcdec__bitstream_read_bits(&bstream, 9); /* bw[8:0] */ 
630 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
631 r[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* rx[4:0] */ 
632 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
633 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
634 g[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* gx[4:0] */ 
635 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
636 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gx[3:0] */ 
637 b[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* bx[4:0] */ 
638 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
639 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
640 r[2] |= bcdec__bitstream_read_bits(&bstream, 5); /* ry[4:0] */ 
641 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
642 r[3] |= bcdec__bitstream_read_bits(&bstream, 5); /* rz[4:0] */ 
643 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
644 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
645 mode = 5
646 } break
647 
648 /* mode 7 */ 
649 case 0b10010: { 
650 /* Partitition indices: 46 bits 
651 Partition: 5 bits 
652 Color Endpoints: 72 bits (8666, 8555, 8555) */ 
653 r[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* rw[7:0] */ 
654 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
655 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
656 g[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* gw[7:0] */ 
657 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
658 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
659 b[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* bw[7:0] */ 
660 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
661 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
662 r[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* rx[5:0] */ 
663 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
664 g[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* gx[4:0] */ 
665 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
666 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
667 b[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* bx[4:0] */ 
668 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
669 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
670 r[2] |= bcdec__bitstream_read_bits(&bstream, 6); /* ry[5:0] */ 
671 r[3] |= bcdec__bitstream_read_bits(&bstream, 6); /* rz[5:0] */ 
672 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
673 mode = 6
674 } break
675 
676 /* mode 8 */ 
677 case 0b10110: { 
678 /* Partitition indices: 46 bits 
679 Partition: 5 bits 
680 Color Endpoints: 72 bits (8555, 8666, 8555) */ 
681 r[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* rw[7:0] */ 
682 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
683 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
684 g[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* gw[7:0] */ 
685 g[2] |= bcdec__bitstream_read_bit(&bstream) << 5; /* gy[5] */ 
686 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
687 b[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* bw[7:0] */ 
688 g[3] |= bcdec__bitstream_read_bit(&bstream) << 5; /* gz[5] */ 
689 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
690 r[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* rx[4:0] */ 
691 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
692 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
693 g[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* gx[5:0] */ 
694 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* zx[3:0] */ 
695 b[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* bx[4:0] */ 
696 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
697 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
698 r[2] |= bcdec__bitstream_read_bits(&bstream, 5); /* ry[4:0] */ 
699 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
700 r[3] |= bcdec__bitstream_read_bits(&bstream, 5); /* rz[4:0] */ 
701 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
702 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
703 mode = 7
704 } break
705 
706 /* mode 9 */ 
707 case 0b11010: { 
708 /* Partitition indices: 46 bits 
709 Partition: 5 bits 
710 Color Endpoints: 72 bits (8555, 8555, 8666) */ 
711 r[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* rw[7:0] */ 
712 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
713 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
714 g[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* gw[7:0] */ 
715 b[2] |= bcdec__bitstream_read_bit(&bstream) << 5; /* by[5] */ 
716 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
717 b[0] |= bcdec__bitstream_read_bits(&bstream, 8); /* bw[7:0] */ 
718 b[3] |= bcdec__bitstream_read_bit(&bstream) << 5; /* bz[5] */ 
719 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
720 r[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* bw[4:0] */ 
721 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
722 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
723 g[1] |= bcdec__bitstream_read_bits(&bstream, 5); /* gx[4:0] */ 
724 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
725 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
726 b[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* bx[5:0] */ 
727 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
728 r[2] |= bcdec__bitstream_read_bits(&bstream, 5); /* ry[4:0] */ 
729 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
730 r[3] |= bcdec__bitstream_read_bits(&bstream, 5); /* rz[4:0] */ 
731 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
732 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
733 mode = 8
734 } break
735 
736 /* mode 10 */ 
737 case 0b11110: { 
738 /* Partitition indices: 46 bits 
739 Partition: 5 bits 
740 Color Endpoints: 72 bits (6666, 6666, 6666) */ 
741 r[0] |= bcdec__bitstream_read_bits(&bstream, 6); /* rw[5:0] */ 
742 g[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gz[4] */ 
743 b[3] |= bcdec__bitstream_read_bit(&bstream); /* bz[0] */ 
744 b[3] |= bcdec__bitstream_read_bit(&bstream) << 1; /* bz[1] */ 
745 b[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* by[4] */ 
746 g[0] |= bcdec__bitstream_read_bits(&bstream, 6); /* gw[5:0] */ 
747 g[2] |= bcdec__bitstream_read_bit(&bstream) << 5; /* gy[5] */ 
748 b[2] |= bcdec__bitstream_read_bit(&bstream) << 5; /* by[5] */ 
749 b[3] |= bcdec__bitstream_read_bit(&bstream) << 2; /* bz[2] */ 
750 g[2] |= bcdec__bitstream_read_bit(&bstream) << 4; /* gy[4] */ 
751 b[0] |= bcdec__bitstream_read_bits(&bstream, 6); /* bw[5:0] */ 
752 g[3] |= bcdec__bitstream_read_bit(&bstream) << 5; /* gz[5] */ 
753 b[3] |= bcdec__bitstream_read_bit(&bstream) << 3; /* bz[3] */ 
754 b[3] |= bcdec__bitstream_read_bit(&bstream) << 5; /* bz[5] */ 
755 b[3] |= bcdec__bitstream_read_bit(&bstream) << 4; /* bz[4] */ 
756 r[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* rx[5:0] */ 
757 g[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* gy[3:0] */ 
758 g[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* gx[5:0] */ 
759 g[3] |= bcdec__bitstream_read_bits(&bstream, 4); /* gz[3:0] */ 
760 b[1] |= bcdec__bitstream_read_bits(&bstream, 6); /* bx[5:0] */ 
761 b[2] |= bcdec__bitstream_read_bits(&bstream, 4); /* by[3:0] */ 
762 r[2] |= bcdec__bitstream_read_bits(&bstream, 6); /* ry[5:0] */ 
763 r[3] |= bcdec__bitstream_read_bits(&bstream, 6); /* rz[5:0] */ 
764 partition = bcdec__bitstream_read_bits(&bstream, 5); /* d[4:0] */ 
765 mode = 9
766 } break
767 
768 /* mode 11 */ 
769 case 0b00011: { 
770 /* Partitition indices: 63 bits 
771 Partition: 0 bits 
772 Color Endpoints: 60 bits (10.10, 10.10, 10.10) */ 
773 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
774 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
775 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
776 r[1] |= bcdec__bitstream_read_bits(&bstream, 10); /* rx[9:0] */ 
777 g[1] |= bcdec__bitstream_read_bits(&bstream, 10); /* gx[9:0] */ 
778 b[1] |= bcdec__bitstream_read_bits(&bstream, 10); /* bx[9:0] */ 
779 mode = 10
780 } break
781 
782 /* mode 12 */ 
783 case 0b00111: { 
784 /* Partitition indices: 63 bits 
785 Partition: 0 bits 
786 Color Endpoints: 60 bits (11.9, 11.9, 11.9) */ 
787 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
788 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
789 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
790 r[1] |= bcdec__bitstream_read_bits(&bstream, 9); /* rx[8:0] */ 
791 r[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* rw[10] */ 
792 g[1] |= bcdec__bitstream_read_bits(&bstream, 9); /* gx[8:0] */ 
793 g[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* gw[10] */ 
794 b[1] |= bcdec__bitstream_read_bits(&bstream, 9); /* bx[8:0] */ 
795 b[0] |= bcdec__bitstream_read_bit(&bstream) << 10; /* bw[10] */ 
796 mode = 11
797 } break
798 
799 /* mode 13 */ 
800 case 0b01011: { 
801 /* Partitition indices: 63 bits 
802 Partition: 0 bits 
803 Color Endpoints: 60 bits (12.8, 12.8, 12.8) */ 
804 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
805 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
806 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
807 r[1] |= bcdec__bitstream_read_bits(&bstream, 8); /* rx[7:0] */ 
808 r[0] |= bcdec__bitstream_read_bits_r(&bstream, 2) << 10;/* rx[10:11] */ 
809 g[1] |= bcdec__bitstream_read_bits(&bstream, 8); /* gx[7:0] */ 
810 g[0] |= bcdec__bitstream_read_bits_r(&bstream, 2) << 10;/* gx[10:11] */ 
811 b[1] |= bcdec__bitstream_read_bits(&bstream, 8); /* bx[7:0] */ 
812 b[0] |= bcdec__bitstream_read_bits_r(&bstream, 2) << 10;/* bx[10:11] */ 
813 mode = 12
814 } break
815 
816 /* mode 14 */ 
817 case 0b01111: { 
818 /* Partitition indices: 63 bits 
819 Partition: 0 bits 
820 Color Endpoints: 60 bits (16.4, 16.4, 16.4) */ 
821 r[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* rw[9:0] */ 
822 g[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* gw[9:0] */ 
823 b[0] |= bcdec__bitstream_read_bits(&bstream, 10); /* bw[9:0] */ 
824 r[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* rx[3:0] */ 
825 r[0] |= bcdec__bitstream_read_bits_r(&bstream, 6) << 10;/* rw[10:15] */ 
826 g[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* gx[3:0] */ 
827 g[0] |= bcdec__bitstream_read_bits_r(&bstream, 6) << 10;/* gw[10:15] */ 
828 b[1] |= bcdec__bitstream_read_bits(&bstream, 4); /* bx[3:0] */ 
829 b[0] |= bcdec__bitstream_read_bits_r(&bstream, 6) << 10;/* bw[10:15] */ 
830 mode = 13
831 } break
832 
833 default: { 
834 /* Modes 10011, 10111, 11011, and 11111 (not shown) are reserved. 
835 Do not use these in your encoder. If the hardware is passed blocks 
836 with one of these modes specified, the resulting decompressed block 
837 must contain all zeroes in all channels except for the alpha channel. */ 
838 for (i = 0; i < 4; ++i) { 
839 for (j = 0; j < 4; ++j) { 
840 decompressed[j * 3 + 0] = 0
841 decompressed[j * 3 + 1] = 0
842 decompressed[j * 3 + 2] = 0
843
844 decompressed += destinationPitch; 
845
846 
847 return
848
849
850 
851 numPartitions = (mode >= 10) ? 0 : 1
852 
853 actualBits0Mode = actual_bits_count[0][mode]; 
854 if (isSigned) { 
855 r[0] = bcdec__extend_sign(r[0], actualBits0Mode); 
856 g[0] = bcdec__extend_sign(g[0], actualBits0Mode); 
857 b[0] = bcdec__extend_sign(b[0], actualBits0Mode); 
858
859 
860 /* Mode 11 (like Mode 10) does not use delta compression, 
861 and instead stores both color endpoints explicitly. */ 
862 if ((mode != 9 && mode != 10) || isSigned) { 
863 for (i = 1; i < (numPartitions + 1) * 2; ++i) { 
864 r[i] = bcdec__extend_sign(r[i], actual_bits_count[1][mode]); 
865 g[i] = bcdec__extend_sign(g[i], actual_bits_count[2][mode]); 
866 b[i] = bcdec__extend_sign(b[i], actual_bits_count[3][mode]); 
867
868
869 
870 if (mode != 9 && mode != 10) { 
871 for (i = 1; i < (numPartitions + 1) * 2; ++i) { 
872 r[i] = bcdec__transform_inverse(r[i], r[0], actualBits0Mode, isSigned); 
873 g[i] = bcdec__transform_inverse(g[i], g[0], actualBits0Mode, isSigned); 
874 b[i] = bcdec__transform_inverse(b[i], b[0], actualBits0Mode, isSigned); 
875
876
877 
878 for (i = 0; i < (numPartitions + 1) * 2; ++i) { 
879 r[i] = bcdec__unquantize(r[i], actualBits0Mode, isSigned); 
880 g[i] = bcdec__unquantize(g[i], actualBits0Mode, isSigned); 
881 b[i] = bcdec__unquantize(b[i], actualBits0Mode, isSigned); 
882
883 
884 weights = (mode >= 10) ? aWeight4 : aWeight3; 
885 for (i = 0; i < 4; ++i) { 
886 for (j = 0; j < 4; ++j) { 
887 partitionSet = (mode >= 10) ? ((i|j) ? 0 : 128) : partition_sets[partition][i][j]; 
888 
889 indexBits = (mode >= 10) ? 4 : 3
890 /* fix-up index is specified with one less bit */ 
891 /* The fix-up index for subset 0 is always index 0 */ 
892 if (partitionSet & 0x80) { 
893 indexBits--; 
894
895 partitionSet &= 0x01
896 
897 index = bcdec__bitstream_read_bits(&bstream, indexBits); 
898 
899 ep_i = partitionSet * 2
900 decompressed[j * 3 + 0] = bcdec__finish_unquantize( 
901 bcdec__interpolate(r[ep_i], r[ep_i+1], weights, index), isSigned); 
902 decompressed[j * 3 + 1] = bcdec__finish_unquantize( 
903 bcdec__interpolate(g[ep_i], g[ep_i+1], weights, index), isSigned); 
904 decompressed[j * 3 + 2] = bcdec__finish_unquantize( 
905 bcdec__interpolate(b[ep_i], b[ep_i+1], weights, index), isSigned); 
906
907 
908 decompressed += destinationPitch; 
909
910
911 
912BCDECDEF void bcdec_bc6h_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned) { 
913 unsigned short block[16*3]; 
914 float* decompressed; 
915 const unsigned short* b; 
916 int i, j; 
917 
918 bcdec_bc6h_half(compressedBlock, block, 4*3, isSigned); 
919 b = block; 
920 decompressed = (float*)decompressedBlock; 
921 for (i = 0; i < 4; ++i) { 
922 for (j = 0; j < 4; ++j) { 
923 decompressed[j * 3 + 0] = bcdec__half_to_float_quick(*b++); 
924 decompressed[j * 3 + 1] = bcdec__half_to_float_quick(*b++); 
925 decompressed[j * 3 + 2] = bcdec__half_to_float_quick(*b++); 
926
927 decompressed += destinationPitch; 
928
929
930 
931static void bcdec__swap_values(int* a, int* b) { 
932 a[0] ^= b[0], b[0] ^= a[0], a[0] ^= b[0]; 
933
934 
935BCDECDEF void bcdec_bc7(const void* compressedBlock, void* decompressedBlock, int destinationPitch) { 
936 static char actual_bits_count[2][8] = { 
937 { 4, 6, 5, 7, 5, 7, 7, 5 }, /* RGBA */ 
938 { 0, 0, 0, 0, 6, 8, 7, 5 }, /* Alpha */ 
939 }; 
940 
941 /* There are 64 possible partition sets for a two-region tile. 
942 Each 4x4 block represents a single shape. 
943 Here also every fix-up index has MSB bit set. */ 
944 static unsigned char partition_sets[2][64][4][4] = { 
945 { /* Partition table for 2-subset BPTC */ 
946 { {128, 0, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 1, 129} }, /* 0 */ 
947 { {128, 0, 0, 1}, {0, 0, 0, 1}, { 0, 0, 0, 1}, {0, 0, 0, 129} }, /* 1 */ 
948 { {128, 1, 1, 1}, {0, 1, 1, 1}, { 0, 1, 1, 1}, {0, 1, 1, 129} }, /* 2 */ 
949 { {128, 0, 0, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 1, 1, 129} }, /* 3 */ 
950 { {128, 0, 0, 0}, {0, 0, 0, 1}, { 0, 0, 0, 1}, {0, 0, 1, 129} }, /* 4 */ 
951 { {128, 0, 1, 1}, {0, 1, 1, 1}, { 0, 1, 1, 1}, {1, 1, 1, 129} }, /* 5 */ 
952 { {128, 0, 0, 1}, {0, 0, 1, 1}, { 0, 1, 1, 1}, {1, 1, 1, 129} }, /* 6 */ 
953 { {128, 0, 0, 0}, {0, 0, 0, 1}, { 0, 0, 1, 1}, {0, 1, 1, 129} }, /* 7 */ 
954 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 1}, {0, 0, 1, 129} }, /* 8 */ 
955 { {128, 0, 1, 1}, {0, 1, 1, 1}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 9 */ 
956 { {128, 0, 0, 0}, {0, 0, 0, 1}, { 0, 1, 1, 1}, {1, 1, 1, 129} }, /* 10 */ 
957 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 1}, {0, 1, 1, 129} }, /* 11 */ 
958 { {128, 0, 0, 1}, {0, 1, 1, 1}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 12 */ 
959 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 13 */ 
960 { {128, 0, 0, 0}, {1, 1, 1, 1}, { 1, 1, 1, 1}, {1, 1, 1, 129} }, /* 14 */ 
961 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 0}, {1, 1, 1, 129} }, /* 15 */ 
962 { {128, 0, 0, 0}, {1, 0, 0, 0}, { 1, 1, 1, 0}, {1, 1, 1, 129} }, /* 16 */ 
963 { {128, 1, 129, 1}, {0, 0, 0, 1}, { 0, 0, 0, 0}, {0, 0, 0, 0} }, /* 17 */ 
964 { {128, 0, 0, 0}, {0, 0, 0, 0}, {129, 0, 0, 0}, {1, 1, 1, 0} }, /* 18 */ 
965 { {128, 1, 129, 1}, {0, 0, 1, 1}, { 0, 0, 0, 1}, {0, 0, 0, 0} }, /* 19 */ 
966 { {128, 0, 129, 1}, {0, 0, 0, 1}, { 0, 0, 0, 0}, {0, 0, 0, 0} }, /* 20 */ 
967 { {128, 0, 0, 0}, {1, 0, 0, 0}, {129, 1, 0, 0}, {1, 1, 1, 0} }, /* 21 */ 
968 { {128, 0, 0, 0}, {0, 0, 0, 0}, {129, 0, 0, 0}, {1, 1, 0, 0} }, /* 22 */ 
969 { {128, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 129} }, /* 23 */ 
970 { {128, 0, 129, 1}, {0, 0, 0, 1}, { 0, 0, 0, 1}, {0, 0, 0, 0} }, /* 24 */ 
971 { {128, 0, 0, 0}, {1, 0, 0, 0}, {129, 0, 0, 0}, {1, 1, 0, 0} }, /* 25 */ 
972 { {128, 1, 129, 0}, {0, 1, 1, 0}, { 0, 1, 1, 0}, {0, 1, 1, 0} }, /* 26 */ 
973 { {128, 0, 129, 1}, {0, 1, 1, 0}, { 0, 1, 1, 0}, {1, 1, 0, 0} }, /* 27 */ 
974 { {128, 0, 0, 1}, {0, 1, 1, 1}, {129, 1, 1, 0}, {1, 0, 0, 0} }, /* 28 */ 
975 { {128, 0, 0, 0}, {1, 1, 1, 1}, {129, 1, 1, 1}, {0, 0, 0, 0} }, /* 29 */ 
976 { {128, 1, 129, 1}, {0, 0, 0, 1}, { 1, 0, 0, 0}, {1, 1, 1, 0} }, /* 30 */ 
977 { {128, 0, 129, 1}, {1, 0, 0, 1}, { 1, 0, 0, 1}, {1, 1, 0, 0} }, /* 31 */ 
978 { {128, 1, 0, 1}, {0, 1, 0, 1}, { 0, 1, 0, 1}, {0, 1, 0, 129} }, /* 32 */ 
979 { {128, 0, 0, 0}, {1, 1, 1, 1}, { 0, 0, 0, 0}, {1, 1, 1, 129} }, /* 33 */ 
980 { {128, 1, 0, 1}, {1, 0, 129, 0}, { 0, 1, 0, 1}, {1, 0, 1, 0} }, /* 34 */ 
981 { {128, 0, 1, 1}, {0, 0, 1, 1}, {129, 1, 0, 0}, {1, 1, 0, 0} }, /* 35 */ 
982 { {128, 0, 129, 1}, {1, 1, 0, 0}, { 0, 0, 1, 1}, {1, 1, 0, 0} }, /* 36 */ 
983 { {128, 1, 0, 1}, {0, 1, 0, 1}, {129, 0, 1, 0}, {1, 0, 1, 0} }, /* 37 */ 
984 { {128, 1, 1, 0}, {1, 0, 0, 1}, { 0, 1, 1, 0}, {1, 0, 0, 129} }, /* 38 */ 
985 { {128, 1, 0, 1}, {1, 0, 1, 0}, { 1, 0, 1, 0}, {0, 1, 0, 129} }, /* 39 */ 
986 { {128, 1, 129, 1}, {0, 0, 1, 1}, { 1, 1, 0, 0}, {1, 1, 1, 0} }, /* 40 */ 
987 { {128, 0, 0, 1}, {0, 0, 1, 1}, {129, 1, 0, 0}, {1, 0, 0, 0} }, /* 41 */ 
988 { {128, 0, 129, 1}, {0, 0, 1, 0}, { 0, 1, 0, 0}, {1, 1, 0, 0} }, /* 42 */ 
989 { {128, 0, 129, 1}, {1, 0, 1, 1}, { 1, 1, 0, 1}, {1, 1, 0, 0} }, /* 43 */ 
990 { {128, 1, 129, 0}, {1, 0, 0, 1}, { 1, 0, 0, 1}, {0, 1, 1, 0} }, /* 44 */ 
991 { {128, 0, 1, 1}, {1, 1, 0, 0}, { 1, 1, 0, 0}, {0, 0, 1, 129} }, /* 45 */ 
992 { {128, 1, 1, 0}, {0, 1, 1, 0}, { 1, 0, 0, 1}, {1, 0, 0, 129} }, /* 46 */ 
993 { {128, 0, 0, 0}, {0, 1, 129, 0}, { 0, 1, 1, 0}, {0, 0, 0, 0} }, /* 47 */ 
994 { {128, 1, 0, 0}, {1, 1, 129, 0}, { 0, 1, 0, 0}, {0, 0, 0, 0} }, /* 48 */ 
995 { {128, 0, 129, 0}, {0, 1, 1, 1}, { 0, 0, 1, 0}, {0, 0, 0, 0} }, /* 49 */ 
996 { {128, 0, 0, 0}, {0, 0, 129, 0}, { 0, 1, 1, 1}, {0, 0, 1, 0} }, /* 50 */ 
997 { {128, 0, 0, 0}, {0, 1, 0, 0}, {129, 1, 1, 0}, {0, 1, 0, 0} }, /* 51 */ 
998 { {128, 1, 1, 0}, {1, 1, 0, 0}, { 1, 0, 0, 1}, {0, 0, 1, 129} }, /* 52 */ 
999 { {128, 0, 1, 1}, {0, 1, 1, 0}, { 1, 1, 0, 0}, {1, 0, 0, 129} }, /* 53 */ 
1000 { {128, 1, 129, 0}, {0, 0, 1, 1}, { 1, 0, 0, 1}, {1, 1, 0, 0} }, /* 54 */ 
1001 { {128, 0, 129, 1}, {1, 0, 0, 1}, { 1, 1, 0, 0}, {0, 1, 1, 0} }, /* 55 */ 
1002 { {128, 1, 1, 0}, {1, 1, 0, 0}, { 1, 1, 0, 0}, {1, 0, 0, 129} }, /* 56 */ 
1003 { {128, 1, 1, 0}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {1, 0, 0, 129} }, /* 57 */ 
1004 { {128, 1, 1, 1}, {1, 1, 1, 0}, { 1, 0, 0, 0}, {0, 0, 0, 129} }, /* 58 */ 
1005 { {128, 0, 0, 1}, {1, 0, 0, 0}, { 1, 1, 1, 0}, {0, 1, 1, 129} }, /* 59 */ 
1006 { {128, 0, 0, 0}, {1, 1, 1, 1}, { 0, 0, 1, 1}, {0, 0, 1, 129} }, /* 60 */ 
1007 { {128, 0, 129, 1}, {0, 0, 1, 1}, { 1, 1, 1, 1}, {0, 0, 0, 0} }, /* 61 */ 
1008 { {128, 0, 129, 0}, {0, 0, 1, 0}, { 1, 1, 1, 0}, {1, 1, 1, 0} }, /* 62 */ 
1009 { {128, 1, 0, 0}, {0, 1, 0, 0}, { 0, 1, 1, 1}, {0, 1, 1, 129} } /* 63 */ 
1010 }, 
1011 { /* Partition table for 3-subset BPTC */ 
1012 { {128, 0, 1, 129}, {0, 0, 1, 1}, { 0, 2, 2, 1}, { 2, 2, 2, 130} }, /* 0 */ 
1013 { {128, 0, 0, 129}, {0, 0, 1, 1}, {130, 2, 1, 1}, { 2, 2, 2, 1} }, /* 1 */ 
1014 { {128, 0, 0, 0}, {2, 0, 0, 1}, {130, 2, 1, 1}, { 2, 2, 1, 129} }, /* 2 */ 
1015 { {128, 2, 2, 130}, {0, 0, 2, 2}, { 0, 0, 1, 1}, { 0, 1, 1, 129} }, /* 3 */ 
1016 { {128, 0, 0, 0}, {0, 0, 0, 0}, {129, 1, 2, 2}, { 1, 1, 2, 130} }, /* 4 */ 
1017 { {128, 0, 1, 129}, {0, 0, 1, 1}, { 0, 0, 2, 2}, { 0, 0, 2, 130} }, /* 5 */ 
1018 { {128, 0, 2, 130}, {0, 0, 2, 2}, { 1, 1, 1, 1}, { 1, 1, 1, 129} }, /* 6 */ 
1019 { {128, 0, 1, 1}, {0, 0, 1, 1}, {130, 2, 1, 1}, { 2, 2, 1, 129} }, /* 7 */ 
1020 { {128, 0, 0, 0}, {0, 0, 0, 0}, {129, 1, 1, 1}, { 2, 2, 2, 130} }, /* 8 */ 
1021 { {128, 0, 0, 0}, {1, 1, 1, 1}, {129, 1, 1, 1}, { 2, 2, 2, 130} }, /* 9 */ 
1022 { {128, 0, 0, 0}, {1, 1, 129, 1}, { 2, 2, 2, 2}, { 2, 2, 2, 130} }, /* 10 */ 
1023 { {128, 0, 1, 2}, {0, 0, 129, 2}, { 0, 0, 1, 2}, { 0, 0, 1, 130} }, /* 11 */ 
1024 { {128, 1, 1, 2}, {0, 1, 129, 2}, { 0, 1, 1, 2}, { 0, 1, 1, 130} }, /* 12 */ 
1025 { {128, 1, 2, 2}, {0, 129, 2, 2}, { 0, 1, 2, 2}, { 0, 1, 2, 130} }, /* 13 */ 
1026 { {128, 0, 1, 129}, {0, 1, 1, 2}, { 1, 1, 2, 2}, { 1, 2, 2, 130} }, /* 14 */ 
1027 { {128, 0, 1, 129}, {2, 0, 0, 1}, {130, 2, 0, 0}, { 2, 2, 2, 0} }, /* 15 */ 
1028 { {128, 0, 0, 129}, {0, 0, 1, 1}, { 0, 1, 1, 2}, { 1, 1, 2, 130} }, /* 16 */ 
1029 { {128, 1, 1, 129}, {0, 0, 1, 1}, {130, 0, 0, 1}, { 2, 2, 0, 0} }, /* 17 */ 
1030 { {128, 0, 0, 0}, {1, 1, 2, 2}, {129, 1, 2, 2}, { 1, 1, 2, 130} }, /* 18 */ 
1031 { {128, 0, 2, 130}, {0, 0, 2, 2}, { 0, 0, 2, 2}, { 1, 1, 1, 129} }, /* 19 */ 
1032 { {128, 1, 1, 129}, {0, 1, 1, 1}, { 0, 2, 2, 2}, { 0, 2, 2, 130} }, /* 20 */ 
1033 { {128, 0, 0, 129}, {0, 0, 0, 1}, {130, 2, 2, 1}, { 2, 2, 2, 1} }, /* 21 */ 
1034 { {128, 0, 0, 0}, {0, 0, 129, 1}, { 0, 1, 2, 2}, { 0, 1, 2, 130} }, /* 22 */ 
1035 { {128, 0, 0, 0}, {1, 1, 0, 0}, {130, 2, 129, 0}, { 2, 2, 1, 0} }, /* 23 */ 
1036 { {128, 1, 2, 130}, {0, 129, 2, 2}, { 0, 0, 1, 1}, { 0, 0, 0, 0} }, /* 24 */ 
1037 { {128, 0, 1, 2}, {0, 0, 1, 2}, {129, 1, 2, 2}, { 2, 2, 2, 130} }, /* 25 */ 
1038 { {128, 1, 1, 0}, {1, 2, 130, 1}, {129, 2, 2, 1}, { 0, 1, 1, 0} }, /* 26 */ 
1039 { {128, 0, 0, 0}, {0, 1, 129, 0}, { 1, 2, 130, 1}, { 1, 2, 2, 1} }, /* 27 */ 
1040 { {128, 0, 2, 2}, {1, 1, 0, 2}, {129, 1, 0, 2}, { 0, 0, 2, 130} }, /* 28 */ 
1041 { {128, 1, 1, 0}, {0, 129, 1, 0}, { 2, 0, 0, 2}, { 2, 2, 2, 130} }, /* 29 */ 
1042 { {128, 0, 1, 1}, {0, 1, 2, 2}, { 0, 1, 130, 2}, { 0, 0, 1, 129} }, /* 30 */ 
1043 { {128, 0, 0, 0}, {2, 0, 0, 0}, {130, 2, 1, 1}, { 2, 2, 2, 129} }, /* 31 */ 
1044 { {128, 0, 0, 0}, {0, 0, 0, 2}, {129, 1, 2, 2}, { 1, 2, 2, 130} }, /* 32 */ 
1045 { {128, 2, 2, 130}, {0, 0, 2, 2}, { 0, 0, 1, 2}, { 0, 0, 1, 129} }, /* 33 */ 
1046 { {128, 0, 1, 129}, {0, 0, 1, 2}, { 0, 0, 2, 2}, { 0, 2, 2, 130} }, /* 34 */ 
1047 { {128, 1, 2, 0}, {0, 129, 2, 0}, { 0, 1, 130, 0}, { 0, 1, 2, 0} }, /* 35 */ 
1048 { {128, 0, 0, 0}, {1, 1, 129, 1}, { 2, 2, 130, 2}, { 0, 0, 0, 0} }, /* 36 */ 
1049 { {128, 1, 2, 0}, {1, 2, 0, 1}, {130, 0, 129, 2}, { 0, 1, 2, 0} }, /* 37 */ 
1050 { {128, 1, 2, 0}, {2, 0, 1, 2}, {129, 130, 0, 1}, { 0, 1, 2, 0} }, /* 38 */ 
1051 { {128, 0, 1, 1}, {2, 2, 0, 0}, { 1, 1, 130, 2}, { 0, 0, 1, 129} }, /* 39 */ 
1052 { {128, 0, 1, 1}, {1, 1, 130, 2}, { 2, 2, 0, 0}, { 0, 0, 1, 129} }, /* 40 */ 
1053 { {128, 1, 0, 129}, {0, 1, 0, 1}, { 2, 2, 2, 2}, { 2, 2, 2, 130} }, /* 41 */ 
1054 { {128, 0, 0, 0}, {0, 0, 0, 0}, {130, 1, 2, 1}, { 2, 1, 2, 129} }, /* 42 */ 
1055 { {128, 0, 2, 2}, {1, 129, 2, 2}, { 0, 0, 2, 2}, { 1, 1, 2, 130} }, /* 43 */ 
1056 { {128, 0, 2, 130}, {0, 0, 1, 1}, { 0, 0, 2, 2}, { 0, 0, 1, 129} }, /* 44 */ 
1057 { {128, 2, 2, 0}, {1, 2, 130, 1}, { 0, 2, 2, 0}, { 1, 2, 2, 129} }, /* 45 */ 
1058 { {128, 1, 0, 1}, {2, 2, 130, 2}, { 2, 2, 2, 2}, { 0, 1, 0, 129} }, /* 46 */ 
1059 { {128, 0, 0, 0}, {2, 1, 2, 1}, {130, 1, 2, 1}, { 2, 1, 2, 129} }, /* 47 */ 
1060 { {128, 1, 0, 129}, {0, 1, 0, 1}, { 0, 1, 0, 1}, { 2, 2, 2, 130} }, /* 48 */ 
1061 { {128, 2, 2, 130}, {0, 1, 1, 1}, { 0, 2, 2, 2}, { 0, 1, 1, 129} }, /* 49 */ 
1062 { {128, 0, 0, 2}, {1, 129, 1, 2}, { 0, 0, 0, 2}, { 1, 1, 1, 130} }, /* 50 */ 
1063 { {128, 0, 0, 0}, {2, 129, 1, 2}, { 2, 1, 1, 2}, { 2, 1, 1, 130} }, /* 51 */ 
1064 { {128, 2, 2, 2}, {0, 129, 1, 1}, { 0, 1, 1, 1}, { 0, 2, 2, 130} }, /* 52 */ 
1065 { {128, 0, 0, 2}, {1, 1, 1, 2}, {129, 1, 1, 2}, { 0, 0, 0, 130} }, /* 53 */ 
1066 { {128, 1, 1, 0}, {0, 129, 1, 0}, { 0, 1, 1, 0}, { 2, 2, 2, 130} }, /* 54 */ 
1067 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 2, 1, 129, 2}, { 2, 1, 1, 130} }, /* 55 */ 
1068 { {128, 1, 1, 0}, {0, 129, 1, 0}, { 2, 2, 2, 2}, { 2, 2, 2, 130} }, /* 56 */ 
1069 { {128, 0, 2, 2}, {0, 0, 1, 1}, { 0, 0, 129, 1}, { 0, 0, 2, 130} }, /* 57 */ 
1070 { {128, 0, 2, 2}, {1, 1, 2, 2}, {129, 1, 2, 2}, { 0, 0, 2, 130} }, /* 58 */ 
1071 { {128, 0, 0, 0}, {0, 0, 0, 0}, { 0, 0, 0, 0}, { 2, 129, 1, 130} }, /* 59 */ 
1072 { {128, 0, 0, 130}, {0, 0, 0, 1}, { 0, 0, 0, 2}, { 0, 0, 0, 129} }, /* 60 */ 
1073 { {128, 2, 2, 2}, {1, 2, 2, 2}, { 0, 2, 2, 2}, {129, 2, 2, 130} }, /* 61 */ 
1074 { {128, 1, 0, 129}, {2, 2, 2, 2}, { 2, 2, 2, 2}, { 2, 2, 2, 130} }, /* 62 */ 
1075 { {128, 1, 1, 129}, {2, 0, 1, 1}, {130, 2, 0, 1}, { 2, 2, 2, 0} } /* 63 */ 
1076
1077 }; 
1078 
1079 static int aWeight2[] = { 0, 21, 43, 64 }; 
1080 static int aWeight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; 
1081 static int aWeight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; 
1082 
1083 static unsigned char sModeHasPBits = 0b11001011
1084 
1085 bcdec__bitstream_t bstream; 
1086 int mode, partition, numPartitions, numEndpoints, i, j, k, rotation, partitionSet; 
1087 int indexSelectionBit, indexBits, indexBits2, index, index2; 
1088 int endpoints[6][4]; 
1089 char indices[4][4]; 
1090 int r, g, b, a; 
1091 int* weights, * weights2; 
1092 unsigned char* decompressed; 
1093 
1094 decompressed = (unsigned char*)decompressedBlock; 
1095 
1096 bstream.low = ((unsigned long long*)compressedBlock)[0]; 
1097 bstream.high = ((unsigned long long*)compressedBlock)[1]; 
1098 
1099 for (mode = 0; mode < 8 && (0 == bcdec__bitstream_read_bit(&bstream)); ++mode); 
1100 
1101 /* unexpected mode, clear the block (transparent black) */ 
1102 if (mode >= 8) { 
1103 for (i = 0; i < 4; ++i) { 
1104 for (j = 0; j < 4; ++j) { 
1105 decompressed[j * 4 + 0] = 0
1106 decompressed[j * 4 + 1] = 0
1107 decompressed[j * 4 + 2] = 0
1108 decompressed[j * 4 + 3] = 0
1109
1110 decompressed += destinationPitch; 
1111
1112 
1113 return
1114
1115 
1116 partition = 0
1117 numPartitions = 1
1118 rotation = 0
1119 indexSelectionBit = 0
1120 
1121 if (mode == 0 || mode == 1 || mode == 2 || mode == 3 || mode == 7) { 
1122 numPartitions = (mode == 0 || mode == 2) ? 3 : 2
1123 partition = bcdec__bitstream_read_bits(&bstream, (mode == 0) ? 4 : 6); 
1124
1125 
1126 numEndpoints = numPartitions * 2
1127 
1128 if (mode == 4 || mode == 5) { 
1129 rotation = bcdec__bitstream_read_bits(&bstream, 2); 
1130 
1131 if (mode == 4) { 
1132 indexSelectionBit = bcdec__bitstream_read_bit(&bstream); 
1133
1134
1135 
1136 /* Extract endpoints */ 
1137 /* RGB */ 
1138 for (i = 0; i < 3; ++i) { 
1139 for (j = 0; j < numEndpoints; ++j) { 
1140 endpoints[j][i] = bcdec__bitstream_read_bits(&bstream, actual_bits_count[0][mode]); 
1141
1142
1143 /* Alpha (if any) */ 
1144 if (actual_bits_count[1][mode] > 0) { 
1145 for (j = 0; j < numEndpoints; ++j) { 
1146 endpoints[j][3] = bcdec__bitstream_read_bits(&bstream, actual_bits_count[1][mode]); 
1147
1148
1149 
1150 /* Fully decode endpoints */ 
1151 /* First handle modes that have P-bits */ 
1152 if (mode == 0 || mode == 1 || mode == 3 || mode == 6 || mode == 7) { 
1153 for (i = 0; i < numEndpoints; ++i) { 
1154 /* component-wise left-shift */ 
1155 for (j = 0; j < 4; ++j) { 
1156 endpoints[i][j] <<= 1
1157
1158
1159 
1160 /* if P-bit is shared */ 
1161 if (mode == 1) { 
1162 i = bcdec__bitstream_read_bit(&bstream); 
1163 j = bcdec__bitstream_read_bit(&bstream); 
1164 
1165 /* rgb component-wise insert pbits */ 
1166 for (k = 0; k < 3; ++k) { 
1167 endpoints[0][k] |= i; 
1168 endpoints[1][k] |= i; 
1169 endpoints[2][k] |= j; 
1170 endpoints[3][k] |= j; 
1171
1172 } else if (sModeHasPBits & (1 << mode)) { 
1173 /* unique P-bit per endpoint */ 
1174 for (i = 0; i < numEndpoints; ++i) { 
1175 j = bcdec__bitstream_read_bit(&bstream); 
1176 for (k = 0; k < 4; ++k) { 
1177 endpoints[i][k] |= j; 
1178
1179
1180
1181
1182 
1183 for (i = 0; i < numEndpoints; ++i) { 
1184 /* get color components precision including pbit */ 
1185 j = actual_bits_count[0][mode] + ((sModeHasPBits >> mode) & 1); 
1186 
1187 for (k = 0; k < 3; ++k) { 
1188 /* left shift endpoint components so that their MSB lies in bit 7 */ 
1189 endpoints[i][k] = endpoints[i][k] << (8 - j); 
1190 /* Replicate each component's MSB into the LSBs revealed by the left-shift operation above */ 
1191 endpoints[i][k] = endpoints[i][k] | (endpoints[i][k] >> j); 
1192
1193 
1194 /* get alpha component precision including pbit */ 
1195 j = actual_bits_count[1][mode] + ((sModeHasPBits >> mode) & 1); 
1196 
1197 /* left shift endpoint components so that their MSB lies in bit 7 */ 
1198 endpoints[i][3] = endpoints[i][3] << (8 - j); 
1199 /* Replicate each component's MSB into the LSBs revealed by the left-shift operation above */ 
1200 endpoints[i][3] = endpoints[i][3] | (endpoints[i][3] >> j); 
1201
1202 
1203 /* If this mode does not explicitly define the alpha component */ 
1204 /* set alpha equal to 1.0 */ 
1205 if (!actual_bits_count[1][mode]) { 
1206 for (j = 0; j < numEndpoints; ++j) { 
1207 endpoints[j][3] = 0xFF
1208
1209
1210 
1211 /* Determine weights tables */ 
1212 indexBits = (mode == 0 || mode == 1) ? 3 : ((mode == 6) ? 4 : 2); 
1213 indexBits2 = (mode == 4) ? 3 : ((mode == 5) ? 2 : 0); 
1214 weights = (indexBits == 2) ? aWeight2 : ((indexBits == 3) ? aWeight3 : aWeight4); 
1215 weights2 = (indexBits2 == 2) ? aWeight2 : aWeight3; 
1216 
1217 /* Quite inconvenient that indices aren't interleaved so we have to make 2 passes here */ 
1218 /* Pass #1: collecting color indices */ 
1219 for (i = 0; i < 4; ++i) { 
1220 for (j = 0; j < 4; ++j) { 
1221 partitionSet = (numPartitions == 1) ? ((i | j) ? 0 : 128) : partition_sets[numPartitions - 2][partition][i][j]; 
1222 
1223 indexBits = (mode == 0 || mode == 1) ? 3 : ((mode == 6) ? 4 : 2); 
1224 /* fix-up index is specified with one less bit */ 
1225 /* The fix-up index for subset 0 is always index 0 */ 
1226 if (partitionSet & 0x80) { 
1227 indexBits--; 
1228
1229 
1230 indices[i][j] = bcdec__bitstream_read_bits(&bstream, indexBits); 
1231
1232
1233 
1234 /* Pass #2: reading alpha indices (if any) and interpolating & rotating */ 
1235 for (i = 0; i < 4; ++i) { 
1236 for (j = 0; j < 4; ++j) { 
1237 partitionSet = (numPartitions == 1) ? ((i|j) ? 0 : 128) : partition_sets[numPartitions - 2][partition][i][j]; 
1238 partitionSet &= 0x03
1239 
1240 index = indices[i][j]; 
1241 
1242 if (!indexBits2) { 
1243 r = bcdec__interpolate(endpoints[partitionSet * 2][0], endpoints[partitionSet * 2 + 1][0], weights, index); 
1244 g = bcdec__interpolate(endpoints[partitionSet * 2][1], endpoints[partitionSet * 2 + 1][1], weights, index); 
1245 b = bcdec__interpolate(endpoints[partitionSet * 2][2], endpoints[partitionSet * 2 + 1][2], weights, index); 
1246 a = bcdec__interpolate(endpoints[partitionSet * 2][3], endpoints[partitionSet * 2 + 1][3], weights, index); 
1247 } else
1248 index2 = bcdec__bitstream_read_bits(&bstream, (i|j) ? indexBits2 : (indexBits2 - 1)); 
1249 /* The index value for interpolating color comes from the secondary index bits for the texel 
1250 if the mode has an index selection bit and its value is one, and from the primary index bits otherwise. 
1251 The alpha index comes from the secondary index bits if the block has a secondary index and 
1252 the block either doesn’t have an index selection bit or that bit is zero, and from the primary index bits otherwise. */ 
1253 if (!indexSelectionBit) { 
1254 r = bcdec__interpolate(endpoints[partitionSet * 2][0], endpoints[partitionSet * 2 + 1][0], weights, index); 
1255 g = bcdec__interpolate(endpoints[partitionSet * 2][1], endpoints[partitionSet * 2 + 1][1], weights, index); 
1256 b = bcdec__interpolate(endpoints[partitionSet * 2][2], endpoints[partitionSet * 2 + 1][2], weights, index); 
1257 a = bcdec__interpolate(endpoints[partitionSet * 2][3], endpoints[partitionSet * 2 + 1][3], weights2, index2); 
1258 } else
1259 r = bcdec__interpolate(endpoints[partitionSet * 2][0], endpoints[partitionSet * 2 + 1][0], weights2, index2); 
1260 g = bcdec__interpolate(endpoints[partitionSet * 2][1], endpoints[partitionSet * 2 + 1][1], weights2, index2); 
1261 b = bcdec__interpolate(endpoints[partitionSet * 2][2], endpoints[partitionSet * 2 + 1][2], weights2, index2); 
1262 a = bcdec__interpolate(endpoints[partitionSet * 2][3], endpoints[partitionSet * 2 + 1][3], weights, index); 
1263
1264
1265 
1266 switch (rotation) { 
1267 case 1: { /* 01 – Block format is Scalar(R) Vector(AGB) - swap A and R */ 
1268 bcdec__swap_values(&a, &r); 
1269 } break
1270 case 2: { /* 10 – Block format is Scalar(G) Vector(RAB) - swap A and G */ 
1271 bcdec__swap_values(&a, &g); 
1272 } break
1273 case 3: { /* 11 - Block format is Scalar(B) Vector(RGA) - swap A and B */ 
1274 bcdec__swap_values(&a, &b); 
1275 } break
1276
1277 
1278 decompressed[j * 4 + 0] = r; 
1279 decompressed[j * 4 + 1] = g; 
1280 decompressed[j * 4 + 2] = b; 
1281 decompressed[j * 4 + 3] = a; 
1282
1283 
1284 decompressed += destinationPitch; 
1285
1286
1287 
1288#endif /* BCDEC_IMPLEMENTATION */ 
1289 
1290/* LICENSE: 
1291 
1292This software is available under 2 licenses -- choose whichever you prefer. 
1293 
1294------------------------------------------------------------------------------ 
1295ALTERNATIVE A - MIT License 
1296 
1297Copyright (c) 2022 Sergii Kudlai 
1298 
1299Permission is hereby granted, free of charge, to any person obtaining a copy of 
1300this software and associated documentation files (the "Software"), to deal in 
1301the Software without restriction, including without limitation the rights to 
1302use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
1303of the Software, and to permit persons to whom the Software is furnished to do 
1304so, subject to the following conditions: 
1305 
1306The above copyright notice and this permission notice shall be included in all 
1307copies or substantial portions of the Software. 
1308 
1309THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
1310IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
1311FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
1312AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
1313LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
1314OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
1315SOFTWARE. 
1316 
1317------------------------------------------------------------------------------ 
1318ALTERNATIVE B - The Unlicense 
1319 
1320This is free and unencumbered software released into the public domain. 
1321 
1322Anyone is free to copy, modify, publish, use, compile, sell, or 
1323distribute this software, either in source code form or as a compiled 
1324binary, for any purpose, commercial or non-commercial, and by any 
1325means. 
1326 
1327In jurisdictions that recognize copyright laws, the author or authors 
1328of this software dedicate any and all copyright interest in the 
1329software to the public domain. We make this dedication for the benefit 
1330of the public at large and to the detriment of our heirs and 
1331successors. We intend this dedication to be an overt act of 
1332relinquishment in perpetuity of all present and future rights to this 
1333software under copyright law. 
1334 
1335THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
1336EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
1337MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
1338IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
1339OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
1340ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
1341OTHER DEALINGS IN THE SOFTWARE. 
1342 
1343For more information, please refer to <https://unlicense.org> 
1344 
1345*/ 
1346