#include #include #include #define printf(...) (void)0; void garble(unsigned char*, unsigned char*, unsigned char*, unsigned char*, unsigned char*); unsigned char rol8(unsigned char input, int count) { return ((input << count) & 0xff) | (input & 0xff) >> (8-count); } uint32_t rol8x(unsigned char input, int count) { return ((input << count)) | (input) >> (8-count); } void sap_hash(unsigned char* blockIn, unsigned char* keyOut) { uint32_t* block_words = (uint32_t*)blockIn; uint32_t* out_words = (uint32_t*)keyOut; unsigned char buffer0[20] = {0x96, 0x5F, 0xC6, 0x53, 0xF8, 0x46, 0xCC, 0x18, 0xDF, 0xBE, 0xB2, 0xF8, 0x38, 0xD7, 0xEC, 0x22, 0x03, 0xD1, 0x20, 0x8F}; unsigned char buffer1[210]; unsigned char buffer2[35] = {0x43, 0x54, 0x62, 0x7A, 0x18, 0xC3, 0xD6, 0xB3, 0x9A, 0x56, 0xF6, 0x1C, 0x14, 0x3F, 0x0C, 0x1D, 0x3B, 0x36, 0x83, 0xB1, 0x39, 0x51, 0x4A, 0xAA, 0x09, 0x3E, 0xFE, 0x44, 0xAF, 0xDE, 0xC3, 0x20, 0x9D, 0x42, 0x3A}; unsigned char buffer3[132]; unsigned char buffer4[21] = {0xED, 0x25, 0xD1, 0xBB, 0xBC, 0x27, 0x9F, 0x02, 0xA2, 0xA9, 0x11, 0x00, 0x0C, 0xB3, 0x52, 0xC0, 0xBD, 0xE3, 0x1B, 0x49, 0xC7}; int i0_index[11] = {18, 22, 23, 0, 5, 19, 32, 31, 10, 21, 30}; uint8_t w,x,y,z; int i, j; // Load the input into the buffer for (i = 0; i < 210; i++) { // We need to swap the byte order around so it is the right endianness uint32_t in_word = block_words[((i % 64)>>2)]; uint32_t in_byte = (in_word >> ((3-(i % 4)) << 3)) & 0xff; buffer1[i] = in_byte; } // Next a scrambling for (i = 0; i < 840; i++) { // We have to do unsigned, 32-bit modulo, or we get the wrong indices x = buffer1[((i-155) & 0xffffffff) % 210]; y = buffer1[((i-57) & 0xffffffff) % 210]; z = buffer1[((i-13) & 0xffffffff) % 210]; w = buffer1[(i & 0xffffffff) % 210]; buffer1[i % 210] = (rol8(y, 5) + (rol8(z, 3) ^ w) - rol8(x,7)) & 0xff; } printf("Garbling...\n"); // I have no idea what this is doing (yet), but it gives the right output garble(buffer0, buffer1, buffer2, buffer3, buffer4); // Fill the output with 0xE1 for (i = 0; i < 16; i++) keyOut[i] = 0xE1; // Now we use all the buffers we have calculated to grind out the output. First buffer3 for (i = 0; i < 11; i++) { // Note that this is addition (mod 255) and not XOR // Also note that we only use certain indices // And that index 3 is hard-coded to be 0x3d (Maybe we can hack this up by changing buffer3[0] to be 0xdc? if (i == 3) keyOut[i] = 0x3d; else keyOut[i] = ((keyOut[i] + buffer3[i0_index[i] * 4]) & 0xff); } // Then buffer0 for (i = 0; i < 20; i++) keyOut[i % 16] ^= buffer0[i]; // Then buffer2 for (i = 0; i < 35; i++) keyOut[i % 16] ^= buffer2[i]; // Do buffer1 for (i = 0; i < 210; i++) keyOut[(i % 16)] ^= buffer1[i]; // Now we do a kind of reverse-scramble for (j = 0; j < 16; j++) { for (i = 0; i < 16; i++) { x = keyOut[((i-7) & 0xffffffff) % 16]; y = keyOut[i % 16]; z = keyOut[((i-37) & 0xffffffff) % 16]; w = keyOut[((i-177) & 0xffffffff) % 16]; keyOut[i] = rol8(x, 1) ^ y ^ rol8(z, 6) ^ rol8(w, 5); } } }