8 #ifndef CRYPTOPP_GENERATE_X64_MASM 15 #if CRYPTOPP_MSC_VERSION 16 # pragma warning(disable: 4702 4740) 21 #if defined(CRYPTOPP_DISABLE_SALSA_ASM) 22 # undef CRYPTOPP_X86_ASM_AVAILABLE 23 # undef CRYPTOPP_X32_ASM_AVAILABLE 24 # undef CRYPTOPP_X64_ASM_AVAILABLE 25 # undef CRYPTOPP_SSE2_ASM_AVAILABLE 26 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE 31 #if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) 32 void Salsa20_TestInstantiations()
44 CRYPTOPP_ALIGN_DATA(16) word32 x[16];
46 for (
size_t i = 0; i < 16; ++i)
50 for (
size_t i = 0; i < rounds; i += 2)
52 x[ 4] ^= rotlConstant< 7>(x[ 0]+x[12]);
53 x[ 8] ^= rotlConstant< 9>(x[ 4]+x[ 0]);
54 x[12] ^= rotlConstant<13>(x[ 8]+x[ 4]);
55 x[ 0] ^= rotlConstant<18>(x[12]+x[ 8]);
57 x[ 9] ^= rotlConstant< 7>(x[ 5]+x[ 1]);
58 x[13] ^= rotlConstant< 9>(x[ 9]+x[ 5]);
59 x[ 1] ^= rotlConstant<13>(x[13]+x[ 9]);
60 x[ 5] ^= rotlConstant<18>(x[ 1]+x[13]);
62 x[14] ^= rotlConstant< 7>(x[10]+x[ 6]);
63 x[ 2] ^= rotlConstant< 9>(x[14]+x[10]);
64 x[ 6] ^= rotlConstant<13>(x[ 2]+x[14]);
65 x[10] ^= rotlConstant<18>(x[ 6]+x[ 2]);
67 x[ 3] ^= rotlConstant< 7>(x[15]+x[11]);
68 x[ 7] ^= rotlConstant< 9>(x[ 3]+x[15]);
69 x[11] ^= rotlConstant<13>(x[ 7]+x[ 3]);
70 x[15] ^= rotlConstant<18>(x[11]+x[ 7]);
72 x[ 1] ^= rotlConstant< 7>(x[ 0]+x[ 3]);
73 x[ 2] ^= rotlConstant< 9>(x[ 1]+x[ 0]);
74 x[ 3] ^= rotlConstant<13>(x[ 2]+x[ 1]);
75 x[ 0] ^= rotlConstant<18>(x[ 3]+x[ 2]);
77 x[ 6] ^= rotlConstant< 7>(x[ 5]+x[ 4]);
78 x[ 7] ^= rotlConstant< 9>(x[ 6]+x[ 5]);
79 x[ 4] ^= rotlConstant<13>(x[ 7]+x[ 6]);
80 x[ 5] ^= rotlConstant<18>(x[ 4]+x[ 7]);
82 x[11] ^= rotlConstant< 7>(x[10]+x[ 9]);
83 x[ 8] ^= rotlConstant< 9>(x[11]+x[10]);
84 x[ 9] ^= rotlConstant<13>(x[ 8]+x[11]);
85 x[10] ^= rotlConstant<18>(x[ 9]+x[ 8]);
87 x[12] ^= rotlConstant< 7>(x[15]+x[14]);
88 x[13] ^= rotlConstant< 9>(x[12]+x[15]);
89 x[14] ^= rotlConstant<13>(x[13]+x[12]);
90 x[15] ^= rotlConstant<18>(x[14]+x[13]);
94 for (
size_t i = 0; i < 16; ++i)
98 void Salsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
102 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
103 throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds);
107 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
109 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
112 m_state[0] = 0x61707865;
113 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
114 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
115 m_state[3] = 0x6b206574;
118 void Salsa20_Policy::CipherResynchronize(
byte *keystreamBuffer,
const byte *
IV,
size_t length)
120 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
124 get(m_state[14])(m_state[11]);
125 m_state[8] = m_state[5] = 0;
128 void Salsa20_Policy::SeekToIteration(lword iterationCount)
130 m_state[8] = (word32)iterationCount;
131 m_state[5] = (word32)SafeRightShift<32>(iterationCount);
134 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) 137 #if CRYPTOPP_SSE2_ASM_AVAILABLE 142 return GetAlignmentOf<word32>();
147 #if CRYPTOPP_SSE2_ASM_AVAILABLE 149 return 4*BYTES_PER_ITERATION;
152 return BYTES_PER_ITERATION;
156 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 158 void Salsa20_OperateKeystream(
byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
162 #if CRYPTOPP_MSC_VERSION 163 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 166 void Salsa20_Policy::OperateKeystream(
KeystreamOperation operation,
byte *output,
const byte *input,
size_t iterationCount)
168 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 170 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 171 Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.
data());
175 #if CRYPTOPP_SSE2_ASM_AVAILABLE 176 #ifdef CRYPTOPP_GENERATE_X64_MASM 178 Salsa20_OperateKeystream PROC FRAME
179 mov r10, [rsp + 5*8] ; state
180 alloc_stack(10*16 + 32*16 + 8)
181 save_xmm128 xmm6, 0200h
182 save_xmm128 xmm7, 0210h
183 save_xmm128 xmm8, 0220h
184 save_xmm128 xmm9, 0230h
185 save_xmm128 xmm10, 0240h
186 save_xmm128 xmm11, 0250h
187 save_xmm128 xmm12, 0260h
188 save_xmm128 xmm13, 0270h
189 save_xmm128 xmm14, 0280h
190 save_xmm128 xmm15, 0290h
193 #define REG_output rcx 194 #define REG_input rdx 195 #define REG_iterationCount r8 196 #define REG_state r10 197 #define REG_rounds e9d 198 #define REG_roundsLeft eax 199 #define REG_temp32 r11d 201 #define SSE2_WORKSPACE rsp 205 #if CRYPTOPP_BOOL_X64 206 #define REG_output %1 208 #define REG_iterationCount %2 210 #define REG_rounds %3 211 #define REG_roundsLeft eax 212 #define REG_temp32 edx 214 #define SSE2_WORKSPACE %5 216 CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
218 #define REG_output edi 219 #define REG_input eax 220 #define REG_iterationCount ecx 221 #define REG_state esi 222 #define REG_rounds edx 223 #define REG_roundsLeft ebx 224 #define REG_temp32 ebp 226 #define SSE2_WORKSPACE esp + WORD_SZ 235 void *s = m_state.
data();
238 AS2( mov REG_iterationCount, iterationCount)
239 AS2( mov REG_input, input)
240 AS2( mov REG_output, output)
241 AS2( mov REG_state, s)
242 AS2( mov REG_rounds, r)
244 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 247 AS2( cmp REG_iterationCount, 4)
250 #if CRYPTOPP_BOOL_X86 257 #define SSE2_EXPAND_S(i, j) \ 258 ASS( pshufd xmm4, xmm##i, j, j, j, j) \ 259 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) 261 AS2( movdqa xmm0, [REG_state + 0*16])
262 AS2( movdqa xmm1, [REG_state + 1*16])
263 AS2( movdqa xmm2, [REG_state + 2*16])
264 AS2( movdqa xmm3, [REG_state + 3*16])
280 #define SSE2_EXPAND_S85(i) \ 281 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ 282 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ 283 AS2( add REG_roundsLeft, 1) \ 284 AS2( adc REG_temp32, 0) 287 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
288 AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
293 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
294 AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
296 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 297 AS2( movdqa xmm4, xmm##d) \ 298 AS2( paddd xmm4, xmm##a) \ 299 AS2( movdqa xmm5, xmm4) \ 300 AS2( pslld xmm4, i) \ 301 AS2( psrld xmm5, 32-i) \ 302 AS2( pxor xmm##b, xmm4) \ 303 AS2( pxor xmm##b, xmm5) 305 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 306 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) 307 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 308 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 309 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) 310 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) 311 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 312 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 313 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) 314 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 315 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 316 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 317 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) 318 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) 319 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 320 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 321 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) 322 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 323 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) 324 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 325 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) 326 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) 327 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 328 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 329 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) 330 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) 331 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 332 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) 333 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) 334 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 335 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 336 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) 338 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ 339 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ 340 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ 341 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ 342 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ 343 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ 344 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ 345 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ 346 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ 347 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ 348 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ 349 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ 350 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ 351 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ 352 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ 353 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ 354 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ 355 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ 356 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ 357 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ 358 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ 359 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ 360 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ 361 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ 362 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ 363 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ 364 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ 365 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ 366 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ 367 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ 368 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ 369 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ 370 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) 372 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ 373 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ 374 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ 375 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ 376 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ 377 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ 378 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ 379 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ 380 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ 381 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ 382 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ 383 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ 384 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ 385 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ 386 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ 387 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ 388 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ 389 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ 390 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ 391 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ 392 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ 393 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ 394 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ 395 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ 396 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ 397 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ 398 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ 399 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ 400 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ 401 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ 402 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ 403 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ 404 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) 406 #if CRYPTOPP_BOOL_X64 407 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
409 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
410 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
412 AS2( mov REG_roundsLeft, REG_rounds)
415 ASL(SSE2_Salsa_Output)
416 AS2( movdqa xmm0, xmm4)
417 AS2( punpckldq xmm4, xmm5)
418 AS2( movdqa xmm1, xmm6)
419 AS2( punpckldq xmm6, xmm7)
420 AS2( movdqa xmm2, xmm4)
421 AS2( punpcklqdq xmm4, xmm6)
422 AS2( punpckhqdq xmm2, xmm6)
423 AS2( punpckhdq xmm0, xmm5)
424 AS2( punpckhdq xmm1, xmm7)
425 AS2( movdqa xmm6, xmm0)
426 AS2( punpcklqdq xmm0, xmm1)
427 AS2( punpckhqdq xmm6, xmm1)
428 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
432 #if CRYPTOPP_BOOL_X64 433 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
435 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
437 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
438 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
440 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
441 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
443 AS2( sub REG_roundsLeft, 2)
446 #define SSE2_OUTPUT_4(a, b, c, d) \ 447 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ 448 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ 449 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ 450 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ 451 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ 452 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ 453 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ 454 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ 455 ASC( call, SSE2_Salsa_Output) 457 SSE2_OUTPUT_4(0, 13, 10, 7)
458 SSE2_OUTPUT_4(4, 1, 14, 11)
459 SSE2_OUTPUT_4(8, 5, 2, 15)
460 SSE2_OUTPUT_4(12, 9, 6, 3)
461 AS2( test REG_input, REG_input)
463 AS2( add REG_input, 12*16)
465 AS2( add REG_output, 12*16)
466 AS2( sub REG_iterationCount, 4)
467 AS2( cmp REG_iterationCount, 4)
472 AS2( sub REG_iterationCount, 1)
474 AS2( movdqa xmm0, [REG_state + 0*16])
475 AS2( movdqa xmm1, [REG_state + 1*16])
476 AS2( movdqa xmm2, [REG_state + 2*16])
477 AS2( movdqa xmm3, [REG_state + 3*16])
478 AS2( mov REG_roundsLeft, REG_rounds)
481 SSE2_QUARTER_ROUND(0, 1, 3, 7)
482 SSE2_QUARTER_ROUND(1, 2, 0, 9)
483 SSE2_QUARTER_ROUND(2, 3, 1, 13)
484 SSE2_QUARTER_ROUND(3, 0, 2, 18)
485 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
486 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
487 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
488 SSE2_QUARTER_ROUND(0, 3, 1, 7)
489 SSE2_QUARTER_ROUND(3, 2, 0, 9)
490 SSE2_QUARTER_ROUND(2, 1, 3, 13)
491 SSE2_QUARTER_ROUND(1, 0, 2, 18)
492 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
493 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
494 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
495 AS2( sub REG_roundsLeft, 2)
498 AS2( paddd xmm0, [REG_state + 0*16])
499 AS2( paddd xmm1, [REG_state + 1*16])
500 AS2( paddd xmm2, [REG_state + 2*16])
501 AS2( paddd xmm3, [REG_state + 3*16])
503 AS2( add dword ptr [REG_state + 8*4], 1)
504 AS2( adc dword ptr [REG_state + 5*4], 0)
506 AS2( pcmpeqb xmm6, xmm6)
508 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
509 AS2( movdqa xmm4, xmm0)
510 AS2( movdqa xmm5, xmm3)
511 AS2( pand xmm0, xmm7)
512 AS2( pand xmm4, xmm6)
513 AS2( pand xmm3, xmm6)
514 AS2( pand xmm5, xmm7)
516 AS2( movdqa xmm5, xmm1)
517 AS2( pand xmm1, xmm7)
518 AS2( pand xmm5, xmm6)
520 AS2( pand xmm6, xmm2)
521 AS2( pand xmm2, xmm7)
525 AS2( movdqa xmm5, xmm4)
526 AS2( movdqa xmm6, xmm0)
527 AS3( shufpd xmm4, xmm1, 2)
528 AS3( shufpd xmm0, xmm2, 2)
529 AS3( shufpd xmm1, xmm5, 2)
530 AS3( shufpd xmm2, xmm6, 2)
533 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
541 #if CRYPTOPP_BOOL_X64 542 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
543 :
"r" (m_rounds),
"r" (m_state.
begin()),
"r" (workspace)
544 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15" 546 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
547 :
"d" (m_rounds),
"S" (m_state.
begin())
552 #ifdef CRYPTOPP_GENERATE_X64_MASM 553 movdqa xmm6, [rsp + 0200h]
554 movdqa xmm7, [rsp + 0210h]
555 movdqa xmm8, [rsp + 0220h]
556 movdqa xmm9, [rsp + 0230h]
557 movdqa xmm10, [rsp + 0240h]
558 movdqa xmm11, [rsp + 0250h]
559 movdqa xmm12, [rsp + 0260h]
560 movdqa xmm13, [rsp + 0270h]
561 movdqa xmm14, [rsp + 0280h]
562 movdqa xmm15, [rsp + 0290h]
563 add rsp, 10*16 + 32*16 + 8
565 Salsa20_OperateKeystream ENDP
571 #ifndef CRYPTOPP_GENERATE_X64_MASM 573 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
575 while (iterationCount--)
577 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
578 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
579 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
580 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
582 for (
int i=m_rounds; i>0; i-=2)
584 #define QUARTER_ROUND(a, b, c, d) \ 585 b = b ^ rotlConstant<7>(a + d); \ 586 c = c ^ rotlConstant<9>(b + a); \ 587 d = d ^ rotlConstant<13>(c + b); \ 588 a = a ^ rotlConstant<18>(d + c); 590 QUARTER_ROUND(x0, x4, x8, x12)
591 QUARTER_ROUND(x1, x5, x9, x13)
592 QUARTER_ROUND(x2, x6, x10, x14)
593 QUARTER_ROUND(x3, x7, x11, x15)
595 QUARTER_ROUND(x0, x13, x10, x7)
596 QUARTER_ROUND(x1, x14, x11, x4)
597 QUARTER_ROUND(x2, x15, x8, x5)
598 QUARTER_ROUND(x3, x12, x9, x6)
601 #define SALSA_OUTPUT(x) {\ 602 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ 603 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ 604 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ 605 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ 606 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ 607 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ 608 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ 609 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ 610 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ 611 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ 612 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ 613 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ 614 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ 615 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ 616 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ 617 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} 619 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 623 if (++m_state[8] == 0)
633 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
634 throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
641 m_state[0] = 0x61707865;
642 m_state[1] = 0x3320646e;
643 m_state[2] = 0x79622d32;
644 m_state[3] = 0x6b206574;
649 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
652 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
655 get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
657 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
658 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
659 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
661 for (
int i=m_rounds; i>0; i-=2)
663 QUARTER_ROUND(x0, x4, x8, x12)
664 QUARTER_ROUND(x1, x5, x9, x13)
665 QUARTER_ROUND(x2, x6, x10, x14)
666 QUARTER_ROUND(x3, x7, x11, x15)
668 QUARTER_ROUND(x0, x13, x10, x7)
669 QUARTER_ROUND(x1, x14, x11, x4)
670 QUARTER_ROUND(x2, x15, x8, x5)
671 QUARTER_ROUND(x3, x12, x9, x6)
674 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3;
675 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5;
676 m_state[8] = m_state[5] = 0;
681 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Standard names for retrieving values by name when working with NameValuePairs.
Utility functions for the Crypto++ library.
Library configuration file.
virtual unsigned int GetOptimalBlockSize() const
Provides number of ideal bytes to process.
unsigned int GetAlignment() const
Provides data alignment requirements.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
byte order is little-endian
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
Exception thrown when an invalid number of rounds is encountered.
void Salsa20_Core(word32 *data, unsigned int rounds)
Salsa20 core transform.
A::pointer data()
Provides a pointer to the first element in the memory block.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Functions for CPU features and intrinsics.
Classes for Salsa and Salsa20 stream ciphers.
iterator begin()
Provides an iterator pointing to the first element in the memory block.
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
bool HasSSE2()
Determines SSE2 availability.
Access a block of memory.
KeystreamOperation
Keystream operation flags.
Crypto++ library namespace.
SymmetricCipher implementation.
size_type size() const
Provides the count of elements in the SecBlock.
Interface for retrieving values given their names.