Crypto++  7.0
Free C++ class library of cryptographic schemes
aria.cpp
1 // aria.cpp - written and placed in the public domain by Jeffrey Walton
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #include "aria.h"
7 #include "misc.h"
8 #include "cpu.h"
9 
10 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE
11 # define CRYPTOPP_ENABLE_ARIA_SSE2_INTRINSICS 1
12 #endif
13 
14 #if CRYPTOPP_SSSE3_AVAILABLE
15 # define CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS 1
16 #endif
17 
18 // GCC cast warning. Note: this is used on round key table,
19 // which is word32 and naturally aligned.
20 #define UINT32_CAST(x) ((word32 *)(void *)(x))
21 
22 NAMESPACE_BEGIN(CryptoPP)
23 NAMESPACE_BEGIN(ARIATab)
24 
25 extern const word32 S1[256];
26 extern const word32 S2[256];
27 extern const word32 X1[256];
28 extern const word32 X2[256];
29 extern const word32 KRK[3][4];
30 
31 NAMESPACE_END
32 NAMESPACE_END
33 
34 NAMESPACE_BEGIN(CryptoPP)
35 
36 using CryptoPP::ARIATab::S1;
37 using CryptoPP::ARIATab::S2;
38 using CryptoPP::ARIATab::X1;
39 using CryptoPP::ARIATab::X2;
40 using CryptoPP::ARIATab::KRK;
41 
42 inline byte ARIA_BRF(const word32 x, const int y) {
43  return GETBYTE(x, y);
44 }
45 
46 // Key XOR Layer
47 #define ARIA_KXL { \
48  typedef BlockGetAndPut<word32, NativeByteOrder, true, true> NativeBlock; \
49  NativeBlock::Put(rk, t)(t[0])(t[1])(t[2])(t[3]); \
50  }
51 
52 // S-Box Layer 1 + M
53 #define SBL1_M(T0,T1,T2,T3) { \
54  T0=S1[ARIA_BRF(T0,3)]^S2[ARIA_BRF(T0,2)]^X1[ARIA_BRF(T0,1)]^X2[ARIA_BRF(T0,0)]; \
55  T1=S1[ARIA_BRF(T1,3)]^S2[ARIA_BRF(T1,2)]^X1[ARIA_BRF(T1,1)]^X2[ARIA_BRF(T1,0)]; \
56  T2=S1[ARIA_BRF(T2,3)]^S2[ARIA_BRF(T2,2)]^X1[ARIA_BRF(T2,1)]^X2[ARIA_BRF(T2,0)]; \
57  T3=S1[ARIA_BRF(T3,3)]^S2[ARIA_BRF(T3,2)]^X1[ARIA_BRF(T3,1)]^X2[ARIA_BRF(T3,0)]; \
58  }
59 
60 // S-Box Layer 2 + M
61 #define SBL2_M(T0,T1,T2,T3) { \
62  T0=X1[ARIA_BRF(T0,3)]^X2[ARIA_BRF(T0,2)]^S1[ARIA_BRF(T0,1)]^S2[ARIA_BRF(T0,0)]; \
63  T1=X1[ARIA_BRF(T1,3)]^X2[ARIA_BRF(T1,2)]^S1[ARIA_BRF(T1,1)]^S2[ARIA_BRF(T1,0)]; \
64  T2=X1[ARIA_BRF(T2,3)]^X2[ARIA_BRF(T2,2)]^S1[ARIA_BRF(T2,1)]^S2[ARIA_BRF(T2,0)]; \
65  T3=X1[ARIA_BRF(T3,3)]^X2[ARIA_BRF(T3,2)]^S1[ARIA_BRF(T3,1)]^S2[ARIA_BRF(T3,0)]; \
66  }
67 
68 #define ARIA_P(T0,T1,T2,T3) { \
69  (T1) = (((T1)<< 8)&0xff00ff00) ^ (((T1)>> 8)&0x00ff00ff); \
70  (T2) = rotrConstant<16>(T2); \
71  (T3) = ByteReverse((T3)); \
72  }
73 
74 #define ARIA_M(X,Y) { \
75  Y=(X)<<8 ^ (X)>>8 ^ (X)<<16 ^ (X)>>16 ^ (X)<<24 ^ (X)>>24; \
76  }
77 
78 #define ARIA_MM(T0,T1,T2,T3) { \
79  (T1)^=(T2); (T2)^=(T3); (T0)^=(T1); \
80  (T3)^=(T1); (T2)^=(T0); (T1)^=(T2); \
81  }
82 
83 #define ARIA_FO {SBL1_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3])}
84 #define ARIA_FE {SBL2_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[2],t[3],t[0],t[1]) ARIA_MM(t[0],t[1],t[2],t[3])}
85 
86 #if (CRYPTOPP_ARM_NEON_AVAILABLE)
87 extern void ARIA_UncheckedSetKey_Schedule_NEON(byte* rk, word32* ws, unsigned int keylen);
88 extern void ARIA_ProcessAndXorBlock_Xor_NEON(const byte* xorBlock, byte* outblock);
89 #endif
90 
91 #if (CRYPTOPP_SSSE3_AVAILABLE)
92 extern void ARIA_ProcessAndXorBlock_Xor_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t);
93 #endif
94 
95 // n-bit right shift of Y XORed to X
96 template <unsigned int N>
97 inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
98 {
99  // MSVC is not generating a "rotate immediate". Constify to help it along.
100  static const unsigned int Q = 4-(N/32);
101  static const unsigned int R = N % 32;
102  UINT32_CAST(RK)[0] = (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R));
103  UINT32_CAST(RK)[1] = (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R));
104  UINT32_CAST(RK)[2] = (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R));
105  UINT32_CAST(RK)[3] = (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R));
106 }
107 
108 void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &params)
109 {
110  CRYPTOPP_UNUSED(params);
111 
112  m_rk.New(16*17); // round keys
113  m_w.New(4*7); // w0, w1, w2, w3, t and u
114 
115  const byte *mk = key;
116  byte *rk = m_rk.data();
117  int Q, q, R, r;
118 
119  switch (keylen)
120  {
121  case 16:
122  R = r = m_rounds = 12;
123  Q = q = 0;
124  break;
125  case 32:
126  R = r = m_rounds = 16;
127  Q = q = 2;
128  break;
129  case 24:
130  R = r = m_rounds = 14;
131  Q = q = 1;
132  break;
133  default:
134  Q = q = R = r = m_rounds = 0;
135  CRYPTOPP_ASSERT(0);
136  }
137 
138  // w0 has room for 32 bytes. w1-w3 each has room for 16 bytes. t and u are 16 byte temp areas.
139  word32 *w0 = m_w.data(), *w1 = m_w.data()+8, *w2 = m_w.data()+12, *w3 = m_w.data()+16, *t = m_w.data()+20;
140 
142  block(w0[0])(w0[1])(w0[2])(w0[3]);
143 
144  t[0]=w0[0]^KRK[q][0]; t[1]=w0[1]^KRK[q][1];
145  t[2]=w0[2]^KRK[q][2]; t[3]=w0[3]^KRK[q][3];
146 
147  ARIA_FO;
148 
149  if (keylen == 32)
150  {
152  block(w1[0])(w1[1])(w1[2])(w1[3]);
153  }
154  else if (keylen == 24)
155  {
157  block(w1[0])(w1[1]); w1[2] = w1[3] = 0;
158  }
159  else
160  {
161  w1[0]=w1[1]=w1[2]=w1[3]=0;
162  }
163 
164  w1[0]^=t[0]; w1[1]^=t[1]; w1[2]^=t[2]; w1[3]^=t[3];
165  ::memcpy(t, w1, 16);
166 
167  q = (q==2) ? 0 : (q+1);
168  t[0]^=KRK[q][0]; t[1]^=KRK[q][1]; t[2]^=KRK[q][2]; t[3]^=KRK[q][3];
169 
170  ARIA_FE;
171 
172  t[0]^=w0[0]; t[1]^=w0[1]; t[2]^=w0[2]; t[3]^=w0[3];
173  ::memcpy(w2, t, 16);
174 
175  q = (q==2) ? 0 : (q+1);
176  t[0]^=KRK[q][0]; t[1]^=KRK[q][1]; t[2]^=KRK[q][2]; t[3]^=KRK[q][3];
177 
178  ARIA_FO;
179 
180  w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3];
181 
182 #if CRYPTOPP_ARM_NEON_AVAILABLE
183  if (HasNEON())
184  {
185  ARIA_UncheckedSetKey_Schedule_NEON(rk, m_w, keylen);
186  }
187  else
188 #endif // CRYPTOPP_ARM_NEON_AVAILABLE
189  {
190  ARIA_GSRK<19>(w0, w1, rk + 0);
191  ARIA_GSRK<19>(w1, w2, rk + 16);
192  ARIA_GSRK<19>(w2, w3, rk + 32);
193  ARIA_GSRK<19>(w3, w0, rk + 48);
194  ARIA_GSRK<31>(w0, w1, rk + 64);
195  ARIA_GSRK<31>(w1, w2, rk + 80);
196  ARIA_GSRK<31>(w2, w3, rk + 96);
197  ARIA_GSRK<31>(w3, w0, rk + 112);
198  ARIA_GSRK<67>(w0, w1, rk + 128);
199  ARIA_GSRK<67>(w1, w2, rk + 144);
200  ARIA_GSRK<67>(w2, w3, rk + 160);
201  ARIA_GSRK<67>(w3, w0, rk + 176);
202  ARIA_GSRK<97>(w0, w1, rk + 192);
203 
204  if (keylen > 16)
205  {
206  ARIA_GSRK<97>(w1, w2, rk + 208);
207  ARIA_GSRK<97>(w2, w3, rk + 224);
208 
209  if (keylen > 24)
210  {
211  ARIA_GSRK< 97>(w3, w0, rk + 240);
212  ARIA_GSRK<109>(w0, w1, rk + 256);
213  }
214  }
215  }
216 
217  // Decryption operation
218  if (!IsForwardTransformation())
219  {
220  word32 *a, *z, *s;
221  rk = m_rk.data();
222  r = R; q = Q;
223 
224  a=UINT32_CAST(rk); s=m_w.data()+24; z=a+r*4;
225  ::memcpy(t, a, 16); ::memcpy(a, z, 16); ::memcpy(z, t, 16);
226 
227  a+=4; z-=4;
228  for (; a<z; a+=4, z-=4)
229  {
230  ARIA_M(a[0],t[0]); ARIA_M(a[1],t[1]); ARIA_M(a[2],t[2]); ARIA_M(a[3],t[3]);
231  ARIA_MM(t[0],t[1],t[2],t[3]); ARIA_P(t[0],t[1],t[2],t[3]); ARIA_MM(t[0],t[1],t[2],t[3]);
232  ::memcpy(s, t, 16);
233 
234  ARIA_M(z[0],t[0]); ARIA_M(z[1],t[1]); ARIA_M(z[2],t[2]); ARIA_M(z[3],t[3]);
235  ARIA_MM(t[0],t[1],t[2],t[3]); ARIA_P(t[0],t[1],t[2],t[3]); ARIA_MM(t[0],t[1],t[2],t[3]);
236  ::memcpy(a, t, 16); ::memcpy(z, s, 16);
237  }
238 
239  ARIA_M(a[0],t[0]); ARIA_M(a[1],t[1]); ARIA_M(a[2],t[2]); ARIA_M(a[3],t[3]);
240  ARIA_MM(t[0],t[1],t[2],t[3]); ARIA_P(t[0],t[1],t[2],t[3]); ARIA_MM(t[0],t[1],t[2],t[3]);
241  ::memcpy(z, t, 16);
242  }
243 
244  // Silence warnings
245  CRYPTOPP_UNUSED(Q); CRYPTOPP_UNUSED(R);
246  CRYPTOPP_UNUSED(q); CRYPTOPP_UNUSED(r);
247 }
248 
249 void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
250 {
251  const byte *rk = reinterpret_cast<const byte*>(m_rk.data());
252  word32 *t = const_cast<word32*>(m_w.data()+20);
253 
254  // Timing attack countermeasure. See comments in Rijndael for more details.
255  // We used Yun's 32-bit implementation, so we use words rather than bytes.
256  const int cacheLineSize = GetCacheLineSize();
257  unsigned int i;
258  volatile word32 _u = 0;
259  word32 u = _u;
260 
261  for (i=0; i<COUNTOF(S1); i+=cacheLineSize/(sizeof(S1[0])))
262  u |= *(S1+i);
263  t[0] |= u;
264 
265  GetBlock<word32, BigEndian>block(inBlock);
266  block(t[0])(t[1])(t[2])(t[3]);
267 
268  if (m_rounds > 12) {
269  ARIA_KXL; rk+= 16; ARIA_FO;
270  ARIA_KXL; rk+= 16; ARIA_FE;
271  }
272 
273  if (m_rounds > 14) {
274  ARIA_KXL; rk+= 16; ARIA_FO;
275  ARIA_KXL; rk+= 16; ARIA_FE;
276  }
277 
278  ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
279  ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
280  ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
281  ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
282  ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
283  ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16;
284 
285 #if CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
286  if (HasSSSE3())
287  {
288  ARIA_ProcessAndXorBlock_Xor_SSSE3(xorBlock, outBlock, rk, t);
289  return;
290  }
291  else
292 #endif // CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
293 
294 #ifdef CRYPTOPP_LITTLE_ENDIAN
295  {
296  outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ) ^ rk[ 3];
297  outBlock[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8) ^ rk[ 2];
298  outBlock[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] ) ^ rk[ 1];
299  outBlock[ 3] = (byte)(S2[ARIA_BRF(t[0],0)] ) ^ rk[ 0];
300  outBlock[ 4] = (byte)(X1[ARIA_BRF(t[1],3)] ) ^ rk[ 7];
301  outBlock[ 5] = (byte)(X2[ARIA_BRF(t[1],2)]>>8) ^ rk[ 6];
302  outBlock[ 6] = (byte)(S1[ARIA_BRF(t[1],1)] ) ^ rk[ 5];
303  outBlock[ 7] = (byte)(S2[ARIA_BRF(t[1],0)] ) ^ rk[ 4];
304  outBlock[ 8] = (byte)(X1[ARIA_BRF(t[2],3)] ) ^ rk[11];
305  outBlock[ 9] = (byte)(X2[ARIA_BRF(t[2],2)]>>8) ^ rk[10];
306  outBlock[10] = (byte)(S1[ARIA_BRF(t[2],1)] ) ^ rk[ 9];
307  outBlock[11] = (byte)(S2[ARIA_BRF(t[2],0)] ) ^ rk[ 8];
308  outBlock[12] = (byte)(X1[ARIA_BRF(t[3],3)] ) ^ rk[15];
309  outBlock[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8) ^ rk[14];
310  outBlock[14] = (byte)(S1[ARIA_BRF(t[3],1)] ) ^ rk[13];
311  outBlock[15] = (byte)(S2[ARIA_BRF(t[3],0)] ) ^ rk[12];
312  }
313 #else
314  {
315  outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ) ^ rk[ 0];
316  outBlock[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8) ^ rk[ 1];
317  outBlock[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] ) ^ rk[ 2];
318  outBlock[ 3] = (byte)(S2[ARIA_BRF(t[0],0)] ) ^ rk[ 3];
319  outBlock[ 4] = (byte)(X1[ARIA_BRF(t[1],3)] ) ^ rk[ 4];
320  outBlock[ 5] = (byte)(X2[ARIA_BRF(t[1],2)]>>8) ^ rk[ 5];
321  outBlock[ 6] = (byte)(S1[ARIA_BRF(t[1],1)] ) ^ rk[ 6];
322  outBlock[ 7] = (byte)(S2[ARIA_BRF(t[1],0)] ) ^ rk[ 7];
323  outBlock[ 8] = (byte)(X1[ARIA_BRF(t[2],3)] ) ^ rk[ 8];
324  outBlock[ 9] = (byte)(X2[ARIA_BRF(t[2],2)]>>8) ^ rk[ 9];
325  outBlock[10] = (byte)(S1[ARIA_BRF(t[2],1)] ) ^ rk[10];
326  outBlock[11] = (byte)(S2[ARIA_BRF(t[2],0)] ) ^ rk[11];
327  outBlock[12] = (byte)(X1[ARIA_BRF(t[3],3)] ) ^ rk[12];
328  outBlock[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8) ^ rk[13];
329  outBlock[14] = (byte)(S1[ARIA_BRF(t[3],1)] ) ^ rk[14];
330  outBlock[15] = (byte)(S2[ARIA_BRF(t[3],0)] ) ^ rk[15];
331  }
332 #endif // CRYPTOPP_LITTLE_ENDIAN
333 
334 #if CRYPTOPP_ARM_NEON_AVAILABLE
335  if (HasNEON())
336  {
337  if (xorBlock != NULLPTR)
338  ARIA_ProcessAndXorBlock_Xor_NEON(xorBlock, outBlock);
339  }
340  else
341 #endif // CRYPTOPP_ARM_NEON_AVAILABLE
342  {
343  if (xorBlock != NULLPTR)
344  for (unsigned int n=0; n<ARIA::BLOCKSIZE; ++n)
345  outBlock[n] ^= xorBlock[n];
346  }
347 }
348 
349 NAMESPACE_END
Utility functions for the Crypto++ library.
bool HasSSSE3()
Determines SSSE3 availability.
Definition: cpu.h:129
Library configuration file.
int GetCacheLineSize()
Provides the cache line size.
Definition: cpu.h:298
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:729
A::pointer data()
Provides a pointer to the first element in the memory block.
Definition: secblock.h:553
Precompiled header file.
#define COUNTOF(arr)
Counts elements in an array.
Definition: misc.h:181
Classes for the ARIA block cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
Functions for CPU features and intrinsics.
Access a block of memory.
Definition: misc.h:2324
Crypto++ library namespace.
static const int BLOCKSIZE
The block size of the algorithm provided as a constant.
Definition: seckey.h:44
bool HasNEON()
Determine if an ARM processor has Advanced SIMD available.
Definition: cpu.h:329
Interface for retrieving values given their names.
Definition: cryptlib.h:290