Crypto++
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2.
4 // Both are in the public domain.
5 
6 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
7 
8 #include "pch.h"
9 
10 #ifndef CRYPTOPP_IMPORTS
11 #ifndef CRYPTOPP_GENERATE_X64_MASM
12 
13 #include "sha.h"
14 #include "misc.h"
15 #include "cpu.h"
16 
17 NAMESPACE_BEGIN(CryptoPP)
18 
19 // start of Steve Reid's code
20 
21 #define blk0(i) (W[i] = data[i])
22 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
23 
24 void SHA1::InitState(HashWordType *state)
25 {
26  state[0] = 0x67452301L;
27  state[1] = 0xEFCDAB89L;
28  state[2] = 0x98BADCFEL;
29  state[3] = 0x10325476L;
30  state[4] = 0xC3D2E1F0L;
31 }
32 
33 #define f1(x,y,z) (z^(x&(y^z)))
34 #define f2(x,y,z) (x^y^z)
35 #define f3(x,y,z) ((x&y)|(z&(x|y)))
36 #define f4(x,y,z) (x^y^z)
37 
38 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
39 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
40 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
41 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30);
42 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
43 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
44 
45 void SHA1::Transform(word32 *state, const word32 *data)
46 {
47  word32 W[16];
48  /* Copy context->state[] to working vars */
49  word32 a = state[0];
50  word32 b = state[1];
51  word32 c = state[2];
52  word32 d = state[3];
53  word32 e = state[4];
54  /* 4 rounds of 20 operations each. Loop unrolled. */
55  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
56  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
57  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
58  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
59  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
60  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
61  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
62  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
63  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
64  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
65  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
66  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
67  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
68  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
69  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
70  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
71  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
72  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
73  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
74  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
75  /* Add the working vars back into context.state[] */
76  state[0] += a;
77  state[1] += b;
78  state[2] += c;
79  state[3] += d;
80  state[4] += e;
81 }
82 
83 // end of Steve Reid's code
84 
85 // *************************************************************
86 
87 void SHA224::InitState(HashWordType *state)
88 {
89  static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
90  memcpy(state, s, sizeof(s));
91 }
92 
93 void SHA256::InitState(HashWordType *state)
94 {
95  static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
96  memcpy(state, s, sizeof(s));
97 }
98 
99 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
100 CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
101 #else
102 extern const word32 SHA256_K[64] = {
103 #endif
104  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
105  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
106  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
107  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
108  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
109  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
110  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
111  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
112  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
113  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
114  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
115  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
116  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
117  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
118  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
119  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
120 };
121 
122 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
123 
124 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)
125 
126 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
127 
128 static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
129 #if defined(_MSC_VER) && (_MSC_VER == 1200)
130  , ... // VC60 workaround: prevent VC 6 from inlining this function
131 #endif
132  )
133 {
134 #if defined(_MSC_VER) && (_MSC_VER == 1200)
135  AS2(mov ecx, [state])
136  AS2(mov edx, [data])
137 #endif
138 
139  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
140  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
141  #define G(i) H(i+1)
142  #define F(i) H(i+2)
143  #define E(i) H(i+3)
144  #define D(i) H(i+4)
145  #define C(i) H(i+5)
146  #define B(i) H(i+6)
147  #define A(i) H(i+7)
148  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
149  #define Wt_2(i) Wt((i)-2)
150  #define Wt_15(i) Wt((i)-15)
151  #define Wt_7(i) Wt((i)-7)
152  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
153  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
154  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
155  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
156  #define Kt(i) WORD_REG(si)+(i)*4
157 #if CRYPTOPP_BOOL_X86
158  #define BASE esp+4
159 #elif defined(__GNUC__)
160  #define BASE r8
161 #else
162  #define BASE rsp
163 #endif
164 
165 #define RA0(i, edx, edi) \
166  AS2( add edx, [Kt(i)] )\
167  AS2( add edx, [Wt(i)] )\
168  AS2( add edx, H(i) )\
169 
170 #define RA1(i, edx, edi)
171 
172 #define RB0(i, edx, edi)
173 
174 #define RB1(i, edx, edi) \
175  AS2( mov AS_REG_7d, [Wt_2(i)] )\
176  AS2( mov edi, [Wt_15(i)])\
177  AS2( mov ebx, AS_REG_7d )\
178  AS2( shr AS_REG_7d, 10 )\
179  AS2( ror ebx, 17 )\
180  AS2( xor AS_REG_7d, ebx )\
181  AS2( ror ebx, 2 )\
182  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
183  AS2( add ebx, [Wt_7(i)])\
184  AS2( mov AS_REG_7d, edi )\
185  AS2( shr AS_REG_7d, 3 )\
186  AS2( ror edi, 7 )\
187  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
188  AS2( xor AS_REG_7d, edi )\
189  AS2( add edx, [Kt(i)])\
190  AS2( ror edi, 11 )\
191  AS2( add edx, H(i) )\
192  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
193  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
194  AS2( mov [Wt(i)], AS_REG_7d)\
195  AS2( add edx, AS_REG_7d )\
196 
197 #define ROUND(i, r, eax, ecx, edi, edx)\
198  /* in: edi = E */\
199  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
200  AS2( mov edx, F(i) )\
201  AS2( xor edx, G(i) )\
202  AS2( and edx, edi )\
203  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
204  AS2( mov AS_REG_7d, edi )\
205  AS2( ror edi, 6 )\
206  AS2( ror AS_REG_7d, 25 )\
207  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
208  AS2( xor AS_REG_7d, edi )\
209  AS2( ror edi, 5 )\
210  AS2( xor AS_REG_7d, edi )/* S1(E) */\
211  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
212  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
213  /* in: ecx = A, eax = B^C, edx = T1 */\
214  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
215  AS2( mov ebx, ecx )\
216  AS2( xor ecx, B(i) )/* A^B */\
217  AS2( and eax, ecx )\
218  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
219  AS2( mov AS_REG_7d, ebx )\
220  AS2( ror ebx, 2 )\
221  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
222  AS2( add edx, D(i) )\
223  AS2( mov D(i), edx )\
224  AS2( ror AS_REG_7d, 22 )\
225  AS2( xor AS_REG_7d, ebx )\
226  AS2( ror ebx, 11 )\
227  AS2( xor AS_REG_7d, ebx )\
228  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
229  AS2( mov H(i), eax )\
230 
231 #define SWAP_COPY(i) \
232  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
233  AS1( bswap WORD_REG(bx))\
234  AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx))
235 
236 #if defined(__GNUC__)
237  #if CRYPTOPP_BOOL_X64
239  #endif
240  __asm__ __volatile__
241  (
242  #if CRYPTOPP_BOOL_X64
243  "lea %4, %%r8;"
244  #endif
245  ".intel_syntax noprefix;"
246 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
247  ALIGN 8
248  X86_SHA256_HashBlocks PROC FRAME
249  rex_push_reg rsi
250  push_reg rdi
251  push_reg rbx
252  push_reg rbp
253  alloc_stack(LOCALS_SIZE+8)
254  .endprolog
255  mov rdi, r8
256  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
257 #endif
258 
259 #if CRYPTOPP_BOOL_X86
260  #ifndef __GNUC__
261  AS2( mov edi, [len])
262  AS2( lea WORD_REG(si), [SHA256_K+48*4])
263  #endif
264  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
265  AS_PUSH_IF86(bx)
266  #endif
267 
268  AS_PUSH_IF86(bp)
269  AS2( mov ebx, esp)
270  AS2( and esp, -16)
271  AS2( sub WORD_REG(sp), LOCALS_SIZE)
272  AS_PUSH_IF86(bx)
273 #endif
274  AS2( mov STATE_SAVE, WORD_REG(cx))
275  AS2( mov DATA_SAVE, WORD_REG(dx))
276  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
277  AS2( mov DATA_END, WORD_REG(ax))
278  AS2( mov K_END, WORD_REG(si))
279 
280 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
281 #if CRYPTOPP_BOOL_X86
282  AS2( test edi, 1)
283  ASJ( jnz, 2, f)
284  AS1( dec DWORD PTR K_END)
285 #endif
286  AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
287  AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
288 #endif
289 
290 #if CRYPTOPP_BOOL_X86
291 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
292  ASJ( jmp, 0, f)
293 #endif
294  ASL(2) // non-SSE2
295  AS2( mov esi, ecx)
296  AS2( lea edi, A(0))
297  AS2( mov ecx, 8)
298  AS1( rep movsd)
299  AS2( mov esi, K_END)
300  ASJ( jmp, 3, f)
301 #endif
302 
303 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
304  ASL(0)
305  AS2( movdqa E(0), xmm1)
306  AS2( movdqa A(0), xmm0)
307 #endif
308 #if CRYPTOPP_BOOL_X86
309  ASL(3)
310 #endif
311  AS2( sub WORD_REG(si), 48*4)
312  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
313  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
314 #if CRYPTOPP_BOOL_X86
315  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
316  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
317 #endif
318  AS2( mov edi, E(0)) // E
319  AS2( mov eax, B(0)) // B
320  AS2( xor eax, C(0)) // B^C
321  AS2( mov ecx, A(0)) // A
322 
323  ROUND(0, 0, eax, ecx, edi, edx)
324  ROUND(1, 0, ecx, eax, edx, edi)
325  ROUND(2, 0, eax, ecx, edi, edx)
326  ROUND(3, 0, ecx, eax, edx, edi)
327  ROUND(4, 0, eax, ecx, edi, edx)
328  ROUND(5, 0, ecx, eax, edx, edi)
329  ROUND(6, 0, eax, ecx, edi, edx)
330  ROUND(7, 0, ecx, eax, edx, edi)
331  ROUND(8, 0, eax, ecx, edi, edx)
332  ROUND(9, 0, ecx, eax, edx, edi)
333  ROUND(10, 0, eax, ecx, edi, edx)
334  ROUND(11, 0, ecx, eax, edx, edi)
335  ROUND(12, 0, eax, ecx, edi, edx)
336  ROUND(13, 0, ecx, eax, edx, edi)
337  ROUND(14, 0, eax, ecx, edi, edx)
338  ROUND(15, 0, ecx, eax, edx, edi)
339 
340  ASL(1)
341  AS2(add WORD_REG(si), 4*16)
342  ROUND(0, 1, eax, ecx, edi, edx)
343  ROUND(1, 1, ecx, eax, edx, edi)
344  ROUND(2, 1, eax, ecx, edi, edx)
345  ROUND(3, 1, ecx, eax, edx, edi)
346  ROUND(4, 1, eax, ecx, edi, edx)
347  ROUND(5, 1, ecx, eax, edx, edi)
348  ROUND(6, 1, eax, ecx, edi, edx)
349  ROUND(7, 1, ecx, eax, edx, edi)
350  ROUND(8, 1, eax, ecx, edi, edx)
351  ROUND(9, 1, ecx, eax, edx, edi)
352  ROUND(10, 1, eax, ecx, edi, edx)
353  ROUND(11, 1, ecx, eax, edx, edi)
354  ROUND(12, 1, eax, ecx, edi, edx)
355  ROUND(13, 1, ecx, eax, edx, edi)
356  ROUND(14, 1, eax, ecx, edi, edx)
357  ROUND(15, 1, ecx, eax, edx, edi)
358  AS2( cmp WORD_REG(si), K_END)
359  ASJ( jb, 1, b)
360 
361  AS2( mov WORD_REG(dx), DATA_SAVE)
362  AS2( add WORD_REG(dx), 64)
363  AS2( mov AS_REG_7, STATE_SAVE)
364  AS2( mov DATA_SAVE, WORD_REG(dx))
365 
366 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
367 #if CRYPTOPP_BOOL_X86
368  AS2( test DWORD PTR K_END, 1)
369  ASJ( jz, 4, f)
370 #endif
371  AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
372  AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
373  AS2( paddd xmm1, E(0))
374  AS2( paddd xmm0, A(0))
375  AS2( movdqa [AS_REG_7+1*16], xmm1)
376  AS2( movdqa [AS_REG_7+0*16], xmm0)
377  AS2( cmp WORD_REG(dx), DATA_END)
378  ASJ( jb, 0, b)
379 #endif
380 
381 #if CRYPTOPP_BOOL_X86
382 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
383  ASJ( jmp, 5, f)
384  ASL(4) // non-SSE2
385 #endif
386  AS2( add [AS_REG_7+0*4], ecx) // A
387  AS2( add [AS_REG_7+4*4], edi) // E
388  AS2( mov eax, B(0))
389  AS2( mov ebx, C(0))
390  AS2( mov ecx, D(0))
391  AS2( add [AS_REG_7+1*4], eax)
392  AS2( add [AS_REG_7+2*4], ebx)
393  AS2( add [AS_REG_7+3*4], ecx)
394  AS2( mov eax, F(0))
395  AS2( mov ebx, G(0))
396  AS2( mov ecx, H(0))
397  AS2( add [AS_REG_7+5*4], eax)
398  AS2( add [AS_REG_7+6*4], ebx)
399  AS2( add [AS_REG_7+7*4], ecx)
400  AS2( mov ecx, AS_REG_7d)
401  AS2( cmp WORD_REG(dx), DATA_END)
402  ASJ( jb, 2, b)
403 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
404  ASL(5)
405 #endif
406 #endif
407 
408  AS_POP_IF86(sp)
409  AS_POP_IF86(bp)
410  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
411  AS_POP_IF86(bx)
412  #endif
413 
414 #ifdef CRYPTOPP_GENERATE_X64_MASM
415  add rsp, LOCALS_SIZE+8
416  pop rbp
417  pop rbx
418  pop rdi
419  pop rsi
420  ret
421  X86_SHA256_HashBlocks ENDP
422 #endif
423 
424 #ifdef __GNUC__
425  ".att_syntax prefix;"
426  :
427  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
428  #if CRYPTOPP_BOOL_X64
429  , "m" (workspace[0])
430  #endif
431  : "memory", "cc", "%eax"
432  #if CRYPTOPP_BOOL_X64
433  , "%rbx", "%r8", "%r10"
434  #endif
435  );
436 #endif
437 }
438 
439 #endif // #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)
440 
441 #ifndef CRYPTOPP_GENERATE_X64_MASM
442 
443 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
444 extern "C" {
445 void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len);
446 }
447 #endif
448 
449 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
450 
451 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
452 {
453  X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
454  return length % BLOCKSIZE;
455 }
456 
457 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
458 {
459  X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
460  return length % BLOCKSIZE;
461 }
462 
463 #endif
464 
465 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
466 
467 #define Ch(x,y,z) (z^(x&(y^z)))
468 #define Maj(x,y,z) (y^((x^y)&(y^z)))
469 
470 #define a(i) T[(0-i)&7]
471 #define b(i) T[(1-i)&7]
472 #define c(i) T[(2-i)&7]
473 #define d(i) T[(3-i)&7]
474 #define e(i) T[(4-i)&7]
475 #define f(i) T[(5-i)&7]
476 #define g(i) T[(6-i)&7]
477 #define h(i) T[(7-i)&7]
478 
479 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
480  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
481 
482 // for SHA256
483 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
484 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
485 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
486 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
487 
488 void SHA256::Transform(word32 *state, const word32 *data)
489 {
490  word32 W[16];
491 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
492  // this byte reverse is a waste of time, but this function is only called by MDC
493  ByteReverse(W, data, BLOCKSIZE);
494  X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2());
495 #else
496  word32 T[8];
497  /* Copy context->state[] to working vars */
498  memcpy(T, state, sizeof(T));
499  /* 64 operations, partially loop unrolled */
500  for (unsigned int j=0; j<64; j+=16)
501  {
502  R( 0); R( 1); R( 2); R( 3);
503  R( 4); R( 5); R( 6); R( 7);
504  R( 8); R( 9); R(10); R(11);
505  R(12); R(13); R(14); R(15);
506  }
507  /* Add the working vars back into context.state[] */
508  state[0] += a(0);
509  state[1] += b(0);
510  state[2] += c(0);
511  state[3] += d(0);
512  state[4] += e(0);
513  state[5] += f(0);
514  state[6] += g(0);
515  state[7] += h(0);
516 #endif
517 }
518 
519 /*
520 // smaller but slower
521 void SHA256::Transform(word32 *state, const word32 *data)
522 {
523  word32 T[20];
524  word32 W[32];
525  unsigned int i = 0, j = 0;
526  word32 *t = T+8;
527 
528  memcpy(t, state, 8*4);
529  word32 e = t[4], a = t[0];
530 
531  do
532  {
533  word32 w = data[j];
534  W[j] = w;
535  w += SHA256_K[j];
536  w += t[7];
537  w += S1(e);
538  w += Ch(e, t[5], t[6]);
539  e = t[3] + w;
540  t[3] = t[3+8] = e;
541  w += S0(t[0]);
542  a = w + Maj(a, t[1], t[2]);
543  t[-1] = t[7] = a;
544  --t;
545  ++j;
546  if (j%8 == 0)
547  t += 8;
548  } while (j<16);
549 
550  do
551  {
552  i = j&0xf;
553  word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
554  W[i+16] = W[i] = w;
555  w += SHA256_K[j];
556  w += t[7];
557  w += S1(e);
558  w += Ch(e, t[5], t[6]);
559  e = t[3] + w;
560  t[3] = t[3+8] = e;
561  w += S0(t[0]);
562  a = w + Maj(a, t[1], t[2]);
563  t[-1] = t[7] = a;
564 
565  w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
566  W[(i+1)+16] = W[(i+1)] = w;
567  w += SHA256_K[j+1];
568  w += (t-1)[7];
569  w += S1(e);
570  w += Ch(e, (t-1)[5], (t-1)[6]);
571  e = (t-1)[3] + w;
572  (t-1)[3] = (t-1)[3+8] = e;
573  w += S0((t-1)[0]);
574  a = w + Maj(a, (t-1)[1], (t-1)[2]);
575  (t-1)[-1] = (t-1)[7] = a;
576 
577  t-=2;
578  j+=2;
579  if (j%8 == 0)
580  t += 8;
581  } while (j<64);
582 
583  state[0] += a;
584  state[1] += t[1];
585  state[2] += t[2];
586  state[3] += t[3];
587  state[4] += e;
588  state[5] += t[5];
589  state[6] += t[6];
590  state[7] += t[7];
591 }
592 */
593 
594 #undef S0
595 #undef S1
596 #undef s0
597 #undef s1
598 #undef R
599 
600 // *************************************************************
601 
602 void SHA384::InitState(HashWordType *state)
603 {
604  static const word64 s[8] = {
605  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
606  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
607  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
608  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
609  memcpy(state, s, sizeof(s));
610 }
611 
612 void SHA512::InitState(HashWordType *state)
613 {
614  static const word64 s[8] = {
615  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
616  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
617  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
618  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
619  memcpy(state, s, sizeof(s));
620 }
621 
622 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
623 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
624 #else
625 static const word64 SHA512_K[80] = {
626 #endif
627  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
628  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
629  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
630  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
631  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
632  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
633  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
634  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
635  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
636  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
637  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
638  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
639  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
640  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
641  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
642  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
643  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
644  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
645  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
646  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
647  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
648  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
649  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
650  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
651  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
652  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
653  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
654  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
655  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
656  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
657  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
658  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
659  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
660  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
661  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
662  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
663  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
664  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
665  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
666  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
667 };
668 
669 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
670 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
671 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
672 {
673 #ifdef __GNUC__
674  __asm__ __volatile__
675  (
676  ".intel_syntax noprefix;"
677  AS1( push ebx)
678  AS2( mov ebx, eax)
679 #else
680  AS1( push ebx)
681  AS1( push esi)
682  AS1( push edi)
683  AS2( lea ebx, SHA512_K)
684 #endif
685 
686  AS2( mov eax, esp)
687  AS2( and esp, 0xfffffff0)
688  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
689  AS1( push eax)
690  AS2( xor eax, eax)
691  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
692  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
693 
694  AS2( movdqa xmm0, [ecx+0*16])
695  AS2( movdq2q mm4, xmm0)
696  AS2( movdqa [edi+0*16], xmm0)
697  AS2( movdqa xmm0, [ecx+1*16])
698  AS2( movdqa [edi+1*16], xmm0)
699  AS2( movdqa xmm0, [ecx+2*16])
700  AS2( movdq2q mm5, xmm0)
701  AS2( movdqa [edi+2*16], xmm0)
702  AS2( movdqa xmm0, [ecx+3*16])
703  AS2( movdqa [edi+3*16], xmm0)
704  ASJ( jmp, 0, f)
705 
706 #define SSE2_S0_S1(r, a, b, c) \
707  AS2( movq mm6, r)\
708  AS2( psrlq r, a)\
709  AS2( movq mm7, r)\
710  AS2( psllq mm6, 64-c)\
711  AS2( pxor mm7, mm6)\
712  AS2( psrlq r, b-a)\
713  AS2( pxor mm7, r)\
714  AS2( psllq mm6, c-b)\
715  AS2( pxor mm7, mm6)\
716  AS2( psrlq r, c-b)\
717  AS2( pxor r, mm7)\
718  AS2( psllq mm6, b-a)\
719  AS2( pxor r, mm6)
720 
721 #define SSE2_s0(r, a, b, c) \
722  AS2( movdqa xmm6, r)\
723  AS2( psrlq r, a)\
724  AS2( movdqa xmm7, r)\
725  AS2( psllq xmm6, 64-c)\
726  AS2( pxor xmm7, xmm6)\
727  AS2( psrlq r, b-a)\
728  AS2( pxor xmm7, r)\
729  AS2( psrlq r, c-b)\
730  AS2( pxor r, xmm7)\
731  AS2( psllq xmm6, c-a)\
732  AS2( pxor r, xmm6)
733 
734 #define SSE2_s1(r, a, b, c) \
735  AS2( movdqa xmm6, r)\
736  AS2( psrlq r, a)\
737  AS2( movdqa xmm7, r)\
738  AS2( psllq xmm6, 64-c)\
739  AS2( pxor xmm7, xmm6)\
740  AS2( psrlq r, b-a)\
741  AS2( pxor xmm7, r)\
742  AS2( psllq xmm6, c-b)\
743  AS2( pxor xmm7, xmm6)\
744  AS2( psrlq r, c-b)\
745  AS2( pxor r, xmm7)
746 
747  ASL(SHA512_Round)
748  // k + w is in mm0, a is in mm4, e is in mm5
749  AS2( paddq mm0, [edi+7*8]) // h
750  AS2( movq mm2, [edi+5*8]) // f
751  AS2( movq mm3, [edi+6*8]) // g
752  AS2( pxor mm2, mm3)
753  AS2( pand mm2, mm5)
754  SSE2_S0_S1(mm5,14,18,41)
755  AS2( pxor mm2, mm3)
756  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
757  AS2( paddq mm5, mm0) // h += S1(e)
758  AS2( movq mm2, [edi+1*8]) // b
759  AS2( movq mm1, mm2)
760  AS2( por mm2, mm4)
761  AS2( pand mm2, [edi+2*8]) // c
762  AS2( pand mm1, mm4)
763  AS2( por mm1, mm2)
764  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
765  AS2( paddq mm5, [edi+3*8]) // e = d + h
766  AS2( movq [edi+3*8], mm5)
767  AS2( movq [edi+11*8], mm5)
768  SSE2_S0_S1(mm4,28,34,39) // S0(a)
769  AS2( paddq mm4, mm1) // a = temp + S0(a)
770  AS2( movq [edi-8], mm4)
771  AS2( movq [edi+7*8], mm4)
772  AS1( ret)
773 
774  // first 16 rounds
775  ASL(0)
776  AS2( movq mm0, [edx+eax*8])
777  AS2( movq [esi+eax*8], mm0)
778  AS2( movq [esi+eax*8+16*8], mm0)
779  AS2( paddq mm0, [ebx+eax*8])
780  ASC( call, SHA512_Round)
781  AS1( inc eax)
782  AS2( sub edi, 8)
783  AS2( test eax, 7)
784  ASJ( jnz, 0, b)
785  AS2( add edi, 8*8)
786  AS2( cmp eax, 16)
787  ASJ( jne, 0, b)
788 
789  // rest of the rounds
790  AS2( movdqu xmm0, [esi+(16-2)*8])
791  ASL(1)
792  // data expansion, W[i-2] already in xmm0
793  AS2( movdqu xmm3, [esi])
794  AS2( paddq xmm3, [esi+(16-7)*8])
795  AS2( movdqa xmm2, [esi+(16-15)*8])
796  SSE2_s1(xmm0, 6, 19, 61)
797  AS2( paddq xmm0, xmm3)
798  SSE2_s0(xmm2, 1, 7, 8)
799  AS2( paddq xmm0, xmm2)
800  AS2( movdq2q mm0, xmm0)
801  AS2( movhlps xmm1, xmm0)
802  AS2( paddq mm0, [ebx+eax*8])
803  AS2( movlps [esi], xmm0)
804  AS2( movlps [esi+8], xmm1)
805  AS2( movlps [esi+8*16], xmm0)
806  AS2( movlps [esi+8*17], xmm1)
807  // 2 rounds
808  ASC( call, SHA512_Round)
809  AS2( sub edi, 8)
810  AS2( movdq2q mm0, xmm1)
811  AS2( paddq mm0, [ebx+eax*8+8])
812  ASC( call, SHA512_Round)
813  // update indices and loop
814  AS2( add esi, 16)
815  AS2( add eax, 2)
816  AS2( sub edi, 8)
817  AS2( test eax, 7)
818  ASJ( jnz, 1, b)
819  // do housekeeping every 8 rounds
820  AS2( mov esi, 0xf)
821  AS2( and esi, eax)
822  AS2( lea esi, [esp+4+20*8+8+esi*8])
823  AS2( add edi, 8*8)
824  AS2( cmp eax, 80)
825  ASJ( jne, 1, b)
826 
827 #define SSE2_CombineState(i) \
828  AS2( movdqa xmm0, [edi+i*16])\
829  AS2( paddq xmm0, [ecx+i*16])\
830  AS2( movdqa [ecx+i*16], xmm0)
831 
832  SSE2_CombineState(0)
833  SSE2_CombineState(1)
834  SSE2_CombineState(2)
835  SSE2_CombineState(3)
836 
837  AS1( pop esp)
838  AS1( emms)
839 
840 #if defined(__GNUC__)
841  AS1( pop ebx)
842  ".att_syntax prefix;"
843  :
844  : "a" (SHA512_K), "c" (state), "d" (data)
845  : "%esi", "%edi", "memory", "cc"
846  );
847 #else
848  AS1( pop edi)
849  AS1( pop esi)
850  AS1( pop ebx)
851  AS1( ret)
852 #endif
853 }
854 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
855 
856 void SHA512::Transform(word64 *state, const word64 *data)
857 {
858 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
859  if (HasSSE2())
860  {
861  SHA512_SSE2_Transform(state, data);
862  return;
863  }
864 #endif
865 
866 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
867 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
868 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
869 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
870 
871 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
872  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
873 
874  word64 W[16];
875  word64 T[8];
876  /* Copy context->state[] to working vars */
877  memcpy(T, state, sizeof(T));
878  /* 80 operations, partially loop unrolled */
879  for (unsigned int j=0; j<80; j+=16)
880  {
881  R( 0); R( 1); R( 2); R( 3);
882  R( 4); R( 5); R( 6); R( 7);
883  R( 8); R( 9); R(10); R(11);
884  R(12); R(13); R(14); R(15);
885  }
886  /* Add the working vars back into context.state[] */
887  state[0] += a(0);
888  state[1] += b(0);
889  state[2] += c(0);
890  state[3] += d(0);
891  state[4] += e(0);
892  state[5] += f(0);
893  state[6] += g(0);
894  state[7] += h(0);
895 }
896 
897 NAMESPACE_END
898 
899 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
900 #endif // #ifndef CRYPTOPP_IMPORTS