Crypto++  7.0
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - originally written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 // Notes by JW: The Integer class needs to do two things. First, it needs to set function
5 // pointers on some platforms, like X86 and X64. The function pointers select a fast multiply
6 // and addition based on the cpu. Second, it wants to create Integer::Zero(), Integer::One()
7 // and Integer::Two().
8 // The function pointers are initialized in the InitializeInteger class by calling
9 // SetFunctionPointers(). The call to SetFunctionPointers() is guarded to run once. If C++11
10 // dynamic initialization is available, then a standard run_once is used. Otherwise, and simple
11 // flag is used. The flag suffers a race, but the worse case is the same function pointers
12 // get written twice without leaking memory.
13 // For Integer::Zero(), Integer::One() and Integer::Two(), we use one of two strategies. First,
14 // if C++11 dynamic initialization is available, then we use a static variable. Second, if
15 // C++11 dynamic initialization is not available, then we fall back to Wei's original code of
16 // a Singleton.
17 // Wei's original code was much simpler. It simply used the Singleton pattern, but it always
18 // produced memory findings on some platforms. The Singleton generates memory findings because
19 // it uses a Create On First Use pattern (a dumb Nifty Counter) and the compiler had to be smart
20 // enough to fold them to return the same object. Unix and Linux compilers do a good job of folding
21 // objects, but Microsoft compilers do a rather poor job for some versions of the compilers.
22 // Another problem with the Singleton is resource destruction requires running resource acquisition
23 // in reverse. For resources provided through the Singletons, there is no way to express the
24 // dependency order to safely destroy resources. (That's one of the problems C++11 dynamic
25 // intitialization with concurrent execution is supposed to solve).
26 
27 #include "pch.h"
28 #include "config.h"
29 
30 #if CRYPTOPP_MSC_VERSION
31 # pragma warning(disable: 4100)
32 #endif
33 
34 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
35 # pragma GCC diagnostic ignored "-Wunused"
36 #if !defined(__clang__)
37 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
38 #endif
39 #endif
40 
41 // Issue 340
42 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
43 # pragma GCC diagnostic ignored "-Wconversion"
44 # pragma GCC diagnostic ignored "-Wsign-conversion"
45 #endif
46 
47 // Define this to statically initialize Integer Zero(), One()
48 // and Two() using Microsoft init_seg(). This is useful for
49 // testing Integer code for leaks when the MSC compiler
50 // does not fold use of the Singletons.
51 // #define USE_MSC_INIT_PRIORITY 1
52 
53 #ifndef CRYPTOPP_IMPORTS
54 
55 #include "integer.h"
56 #include "secblock.h"
57 #include "modarith.h"
58 #include "nbtheory.h"
59 #include "smartptr.h"
60 #include "algparam.h"
61 #include "filters.h"
62 #include "stdcpp.h"
63 #include "asn.h"
64 #include "oids.h"
65 #include "words.h"
66 #include "pubkey.h" // for P1363_KDF2
67 #include "sha.h"
68 #include "cpu.h"
69 #include "misc.h"
70 
71 #include <iostream>
72 
73 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
74  #include <intrin.h>
75 #endif
76 
77 #ifdef __DECCXX
78  #include <c_asm.h>
79 #endif
80 
81 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
82 #if (__SUNPRO_CC >= 0x5130)
83 # define MAYBE_CONST
84 # define MAYBE_UNCONST_CAST(x) const_cast<word*>(x)
85 #else
86 # define MAYBE_CONST const
87 # define MAYBE_UNCONST_CAST(x) x
88 #endif
89 
90 // "Inline assembly operands don't work with .intel_syntax",
91 // http://llvm.org/bugs/show_bug.cgi?id=24232
92 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
93 # undef CRYPTOPP_X86_ASM_AVAILABLE
94 # undef CRYPTOPP_X32_ASM_AVAILABLE
95 # undef CRYPTOPP_X64_ASM_AVAILABLE
96 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
97 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE
98 #else
99 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86))
100 #endif
101 
102 // ***************** C++ Static Initialization ********************
103 
104 NAMESPACE_BEGIN(CryptoPP)
105 static void SetFunctionPointers();
106 InitializeInteger::InitializeInteger()
107 {
108 #if !(HAVE_GCC_INIT_PRIORITY || HAVE_MSC_INIT_PRIORITY)
109 #if defined(CRYPTOPP_CXX11_SYNCHRONIZATION) && defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
110  static std::once_flag s_flag;
111  std::call_once(s_flag, []() {
112  SetFunctionPointers();
113  });
114 #else
115  static bool s_flag;
116  MEMORY_BARRIER();
117  if (s_flag == false)
118  {
119  SetFunctionPointers();
120  s_flag = true;
121  MEMORY_BARRIER();
122  }
123 #endif // C++11 or C++03 flag
124 #endif // not GCC and MSC init priorities
125 }
126 
127 template <long i>
129 {
130  Integer * operator()() const
131  {
132  return new Integer(i);
133  }
134 };
135 
136 // ***************** Library code ********************
137 
138 inline static int Compare(const word *A, const word *B, size_t N)
139 {
140  while (N--)
141  if (A[N] > B[N])
142  return 1;
143  else if (A[N] < B[N])
144  return -1;
145 
146  return 0;
147 }
148 
149 inline static int Increment(word *A, size_t N, word B=1)
150 {
151  CRYPTOPP_ASSERT(N);
152  word t = A[0];
153  A[0] = t+B;
154  if (A[0] >= t)
155  return 0;
156  for (unsigned i=1; i<N; i++)
157  if (++A[i])
158  return 0;
159  return 1;
160 }
161 
162 inline static int Decrement(word *A, size_t N, word B=1)
163 {
164  CRYPTOPP_ASSERT(N);
165  word t = A[0];
166  A[0] = t-B;
167  if (A[0] <= t)
168  return 0;
169  for (unsigned i=1; i<N; i++)
170  if (A[i]--)
171  return 0;
172  return 1;
173 }
174 
175 static void TwosComplement(word *A, size_t N)
176 {
177  Decrement(A, N);
178  for (unsigned i=0; i<N; i++)
179  A[i] = ~A[i];
180 }
181 
182 static word AtomicInverseModPower2(word A)
183 {
184  CRYPTOPP_ASSERT(A%2==1);
185 
186  word R=A%8;
187 
188  for (unsigned i=3; i<WORD_BITS; i*=2)
189  R = R*(2-R*A);
190 
191  CRYPTOPP_ASSERT(R*A==1);
192  return R;
193 }
194 
195 // ********************************************************
196 
197 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
198  #define Declare2Words(x) word x##0, x##1;
199  #define AssignWord(a, b) a##0 = b; a##1 = 0;
200  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
201  #define LowWord(a) a##0
202  #define HighWord(a) a##1
203  #ifdef _MSC_VER
204  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
205  #ifndef __INTEL_COMPILER
206  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
207  #endif
208  #elif defined(__DECCXX)
209  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
210  #elif defined(__x86_64__)
211  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
212  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
213  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
214  #else
215  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
216  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
217  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
218  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
219  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
220  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
221  #endif
222  #endif
223  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
224  #ifndef Double3Words
225  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
226  #endif
227  #ifndef Acc2WordsBy2
228  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
229  #endif
230  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
231  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
232  #define GetCarry(u) u##1
233  #define GetBorrow(u) u##1
234 #else
235  #define Declare2Words(x) dword x;
236  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
237  #define MultiplyWords(p, a, b) p = __emulu(a, b);
238  #else
239  #define MultiplyWords(p, a, b) p = (dword)a*b;
240  #endif
241  #define AssignWord(a, b) a = b;
242  #define Add2WordsBy1(a, b, c) a = b + c;
243  #define Acc2WordsBy2(a, b) a += b;
244  #define LowWord(a) word(a)
245  #define HighWord(a) word(a>>WORD_BITS)
246  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
247  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
248  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
249  #define GetCarry(u) HighWord(u)
250  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
251 #endif
252 #ifndef MulAcc
253  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
254 #endif
255 #ifndef Acc2WordsBy1
256  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
257 #endif
258 #ifndef Acc3WordsBy2
259  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
260 #endif
261 
262 class DWord
263 {
264 public:
265 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
266  DWord() {std::memset(&m_whole, 0x00, sizeof(m_whole));}
267 #else
268  DWord() {std::memset(&m_halfs, 0x00, sizeof(m_halfs));}
269 #endif
270 
271 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
272  explicit DWord(word low) : m_whole(low) { }
273 #else
274  explicit DWord(word low)
275  {
276  m_halfs.high = 0;
277  m_halfs.low = low;
278  }
279 #endif
280 
281 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
282  DWord(word low, word high) : m_whole()
283 #else
284  DWord(word low, word high) : m_halfs()
285 #endif
286  {
287 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
288 # if defined(CRYPTOPP_LITTLE_ENDIAN)
289  const word t[2] = {low,high};
290  memcpy(&m_whole, t, sizeof(m_whole));
291 # else
292  const word t[2] = {high,low};
293  memcpy(&m_whole, t, sizeof(m_whole));
294 # endif
295 #else
296  m_halfs.low = low;
297  m_halfs.high = high;
298 #endif
299  }
300 
301  static DWord Multiply(word a, word b)
302  {
303  DWord r;
304  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
305  r.m_whole = (dword)a * b;
306  #elif defined(MultiplyWordsLoHi)
307  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
308  #else
309  CRYPTOPP_ASSERT(0);
310  #endif
311  return r;
312  }
313 
314  static DWord MultiplyAndAdd(word a, word b, word c)
315  {
316  DWord r = Multiply(a, b);
317  return r += c;
318  }
319 
320  DWord & operator+=(word a)
321  {
322  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
323  m_whole = m_whole + a;
324  #else
325  m_halfs.low += a;
326  m_halfs.high += (m_halfs.low < a);
327  #endif
328  return *this;
329  }
330 
331  DWord operator+(word a)
332  {
333  DWord r;
334  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
335  r.m_whole = m_whole + a;
336  #else
337  r.m_halfs.low = m_halfs.low + a;
338  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
339  #endif
340  return r;
341  }
342 
343  DWord operator-(DWord a)
344  {
345  DWord r;
346  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
347  r.m_whole = m_whole - a.m_whole;
348  #else
349  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
350  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
351  #endif
352  return r;
353  }
354 
355  DWord operator-(word a)
356  {
357  DWord r;
358  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
359  r.m_whole = m_whole - a;
360  #else
361  r.m_halfs.low = m_halfs.low - a;
362  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
363  #endif
364  return r;
365  }
366 
367  // returns quotient, which must fit in a word
368  word operator/(word divisor);
369 
370  word operator%(word a);
371 
372  bool operator!() const
373  {
374  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
375  return !m_whole;
376  #else
377  return !m_halfs.high && !m_halfs.low;
378  #endif
379  }
380 
381  // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
382  // union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
383  word GetLowHalf() const {return m_halfs.low;}
384  word GetHighHalf() const {return m_halfs.high;}
385  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
386 
387 private:
388  // Issue 274, "Types cannot be declared in anonymous union",
389  // http://github.com/weidai11/cryptopp/issues/274
390  // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
391  struct half_words
392  {
393  #ifdef CRYPTOPP_LITTLE_ENDIAN
394  word low;
395  word high;
396  #else
397  word high;
398  word low;
399  #endif
400  };
401  union
402  {
403  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
404  dword m_whole;
405  #endif
406  half_words m_halfs;
407  };
408 };
409 
410 class Word
411 {
412 public:
413  Word() : m_whole(0) {}
414  Word(word value) : m_whole(value) {}
415  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
416 
417  static Word Multiply(hword a, hword b)
418  {
419  Word r;
420  r.m_whole = (word)a * b;
421  return r;
422  }
423 
424  Word operator-(Word a)
425  {
426  Word r;
427  r.m_whole = m_whole - a.m_whole;
428  return r;
429  }
430 
431  Word operator-(hword a)
432  {
433  Word r;
434  r.m_whole = m_whole - a;
435  return r;
436  }
437 
438  // returns quotient, which must fit in a word
439  hword operator/(hword divisor)
440  {
441  return hword(m_whole / divisor);
442  }
443 
444  bool operator!() const
445  {
446  return !m_whole;
447  }
448 
449  word GetWhole() const {return m_whole;}
450  hword GetLowHalf() const {return hword(m_whole);}
451  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
452  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
453 
454 private:
455  word m_whole;
456 };
457 
458 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
459 template <class S, class D>
460 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
461 {
462  CRYPTOPP_UNUSED(dummy);
463 
464  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
465  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
466 
467  // estimate the quotient: do a 2 S by 1 S divide.
468  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
469  // The code change occurred at Commit dc99266599a0e72d.
470 
471  S Q; bool pre = (S(B1+1) == 0);
472  if (B1 > 0 && !pre)
473  Q = D(A[1], A[2]) / S(B1+1);
474  else if (pre)
475  Q = A[2];
476  else
477  Q = D(A[0], A[1]) / B0;
478 
479  // now subtract Q*B from A
480  D p = D::Multiply(B0, Q);
481  D u = (D) A[0] - p.GetLowHalf();
482  A[0] = u.GetLowHalf();
483  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
484  A[1] = u.GetLowHalf();
485  A[2] += u.GetHighHalf();
486 
487  // Q <= actual quotient, so fix it
488  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
489  {
490  u = (D) A[0] - B0;
491  A[0] = u.GetLowHalf();
492  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
493  A[1] = u.GetLowHalf();
494  A[2] += u.GetHighHalf();
495  Q++;
496  CRYPTOPP_ASSERT(Q); // shouldn't overflow
497  }
498 
499  return Q;
500 }
501 
502 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
503 template <class S, class D>
504 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
505 {
506  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
507  // The code change occurred at Commit dc99266599a0e72d.
508 
509  if (!!B)
510  {
511  S Q[2];
512  T[0] = Al.GetLowHalf();
513  T[1] = Al.GetHighHalf();
514  T[2] = Ah.GetLowHalf();
515  T[3] = Ah.GetHighHalf();
516  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
517  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
518  return D(Q[0], Q[1]);
519  }
520  else // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
521  {
522  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
523  }
524 }
525 
526 // returns quotient, which must fit in a word
527 inline word DWord::operator/(word a)
528 {
529  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
530  return word(m_whole / a);
531  #else
532  hword r[4];
533  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
534  #endif
535 }
536 
537 inline word DWord::operator%(word a)
538 {
539  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
540  return word(m_whole % a);
541  #else
542  if (a < (word(1) << (WORD_BITS/2)))
543  {
544  hword h = hword(a);
545  word r = m_halfs.high % h;
546  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
547  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
548  }
549  else
550  {
551  hword r[4];
552  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
553  return Word(r[0], r[1]).GetWhole();
554  }
555  #endif
556 }
557 
558 // ********************************************************
559 
560 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
561 #if defined(__GNUC__)
562  #define AddPrologue \
563  int result; \
564  __asm__ __volatile__ \
565  ( \
566  INTEL_NOPREFIX
567  #define AddEpilogue \
568  ATT_PREFIX \
569  : "=a" (result)\
570  : "d" (C), "a" (A), "D" (B), "c" (N) \
571  : "%esi", "memory", "cc" \
572  );\
573  return result;
574  #define MulPrologue \
575  __asm__ __volatile__ \
576  ( \
577  INTEL_NOPREFIX \
578  AS1( push ebx) \
579  AS2( mov ebx, edx)
580  #define MulEpilogue \
581  AS1( pop ebx) \
582  ATT_PREFIX \
583  : \
584  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
585  : "%esi", "memory", "cc" \
586  );
587  #define SquPrologue MulPrologue
588  #define SquEpilogue \
589  AS1( pop ebx) \
590  ATT_PREFIX \
591  : \
592  : "d" (s_maskLow16), "c" (C), "a" (A) \
593  : "%esi", "%edi", "memory", "cc" \
594  );
595  #define TopPrologue MulPrologue
596  #define TopEpilogue \
597  AS1( pop ebx) \
598  ATT_PREFIX \
599  : \
600  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
601  : "memory", "cc" \
602  );
603 #else
604  #define AddPrologue \
605  __asm push edi \
606  __asm push esi \
607  __asm mov eax, [esp+12] \
608  __asm mov edi, [esp+16]
609  #define AddEpilogue \
610  __asm pop esi \
611  __asm pop edi \
612  __asm ret 8
613  #define SaveEBX
614  #define RestoreEBX
615  #define SquPrologue \
616  AS2( mov eax, A) \
617  AS2( mov ecx, C) \
618  SaveEBX \
619  AS2( lea ebx, s_maskLow16)
620  #define MulPrologue \
621  AS2( mov eax, A) \
622  AS2( mov edi, B) \
623  AS2( mov ecx, C) \
624  SaveEBX \
625  AS2( lea ebx, s_maskLow16)
626  #define TopPrologue \
627  AS2( mov eax, A) \
628  AS2( mov edi, B) \
629  AS2( mov ecx, C) \
630  AS2( mov esi, L) \
631  SaveEBX \
632  AS2( lea ebx, s_maskLow16)
633  #define SquEpilogue RestoreEBX
634  #define MulEpilogue RestoreEBX
635  #define TopEpilogue RestoreEBX
636 #endif
637 
638 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
639 extern "C" {
640 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
641 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
642 }
643 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
644 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
645 {
646  word result;
647  __asm__ __volatile__
648  (
649  INTEL_NOPREFIX
650  AS1( neg %1)
651  ASJ( jz, 1, f)
652  AS2( mov %0,[%3+8*%1])
653  AS2( add %0,[%4+8*%1])
654  AS2( mov [%2+8*%1],%0)
655  ASL(0)
656  AS2( mov %0,[%3+8*%1+8])
657  AS2( adc %0,[%4+8*%1+8])
658  AS2( mov [%2+8*%1+8],%0)
659  AS2( lea %1,[%1+2])
660  ASJ( jrcxz, 1, f)
661  AS2( mov %0,[%3+8*%1])
662  AS2( adc %0,[%4+8*%1])
663  AS2( mov [%2+8*%1],%0)
664  ASJ( jmp, 0, b)
665  ASL(1)
666  AS2( mov %0, 0)
667  AS2( adc %0, %0)
668  ATT_NOPREFIX
669  : "=&r" (result), "+c" (N)
670  : "r" (C+N), "r" (A+N), "r" (B+N)
671  : "memory", "cc"
672  );
673  return (int)result;
674 }
675 
676 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
677 {
678  word result;
679  __asm__ __volatile__
680  (
681  INTEL_NOPREFIX
682  AS1( neg %1)
683  ASJ( jz, 1, f)
684  AS2( mov %0,[%3+8*%1])
685  AS2( sub %0,[%4+8*%1])
686  AS2( mov [%2+8*%1],%0)
687  ASL(0)
688  AS2( mov %0,[%3+8*%1+8])
689  AS2( sbb %0,[%4+8*%1+8])
690  AS2( mov [%2+8*%1+8],%0)
691  AS2( lea %1,[%1+2])
692  ASJ( jrcxz, 1, f)
693  AS2( mov %0,[%3+8*%1])
694  AS2( sbb %0,[%4+8*%1])
695  AS2( mov [%2+8*%1],%0)
696  ASJ( jmp, 0, b)
697  ASL(1)
698  AS2( mov %0, 0)
699  AS2( adc %0, %0)
700  ATT_NOPREFIX
701  : "=&r" (result), "+c" (N)
702  : "r" (C+N), "r" (A+N), "r" (B+N)
703  : "memory", "cc"
704  );
705  return (int)result;
706 }
707 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
708 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
709 {
710  AddPrologue
711 
712  // now: eax = A, edi = B, edx = C, ecx = N
713  AS2( lea eax, [eax+4*ecx])
714  AS2( lea edi, [edi+4*ecx])
715  AS2( lea edx, [edx+4*ecx])
716 
717  AS1( neg ecx) // ecx is negative index
718  AS2( test ecx, 2) // this clears carry flag
719  ASJ( jz, 0, f)
720  AS2( sub ecx, 2)
721  ASJ( jmp, 1, f)
722 
723  ASL(0)
724  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
725  AS2( mov esi,[eax+4*ecx])
726  AS2( adc esi,[edi+4*ecx])
727  AS2( mov [edx+4*ecx],esi)
728  AS2( mov esi,[eax+4*ecx+4])
729  AS2( adc esi,[edi+4*ecx+4])
730  AS2( mov [edx+4*ecx+4],esi)
731  ASL(1)
732  AS2( mov esi,[eax+4*ecx+8])
733  AS2( adc esi,[edi+4*ecx+8])
734  AS2( mov [edx+4*ecx+8],esi)
735  AS2( mov esi,[eax+4*ecx+12])
736  AS2( adc esi,[edi+4*ecx+12])
737  AS2( mov [edx+4*ecx+12],esi)
738 
739  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
740  ASJ( jmp, 0, b)
741 
742  ASL(2)
743  AS2( mov eax, 0)
744  AS1( setc al) // store carry into eax (return result register)
745 
746  AddEpilogue
747 }
748 
749 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
750 {
751  AddPrologue
752 
753  // now: eax = A, edi = B, edx = C, ecx = N
754  AS2( lea eax, [eax+4*ecx])
755  AS2( lea edi, [edi+4*ecx])
756  AS2( lea edx, [edx+4*ecx])
757 
758  AS1( neg ecx) // ecx is negative index
759  AS2( test ecx, 2) // this clears carry flag
760  ASJ( jz, 0, f)
761  AS2( sub ecx, 2)
762  ASJ( jmp, 1, f)
763 
764  ASL(0)
765  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
766  AS2( mov esi,[eax+4*ecx])
767  AS2( sbb esi,[edi+4*ecx])
768  AS2( mov [edx+4*ecx],esi)
769  AS2( mov esi,[eax+4*ecx+4])
770  AS2( sbb esi,[edi+4*ecx+4])
771  AS2( mov [edx+4*ecx+4],esi)
772  ASL(1)
773  AS2( mov esi,[eax+4*ecx+8])
774  AS2( sbb esi,[edi+4*ecx+8])
775  AS2( mov [edx+4*ecx+8],esi)
776  AS2( mov esi,[eax+4*ecx+12])
777  AS2( sbb esi,[edi+4*ecx+12])
778  AS2( mov [edx+4*ecx+12],esi)
779 
780  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
781  ASJ( jmp, 0, b)
782 
783  ASL(2)
784  AS2( mov eax, 0)
785  AS1( setc al) // store carry into eax (return result register)
786 
787  AddEpilogue
788 }
789 
790 #if CRYPTOPP_INTEGER_SSE2
791 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
792 {
793  AddPrologue
794 
795  // now: eax = A, edi = B, edx = C, ecx = N
796  AS2( lea eax, [eax+4*ecx])
797  AS2( lea edi, [edi+4*ecx])
798  AS2( lea edx, [edx+4*ecx])
799 
800  AS1( neg ecx) // ecx is negative index
801  AS2( pxor mm2, mm2)
802  ASJ( jz, 2, f)
803  AS2( test ecx, 2) // this clears carry flag
804  ASJ( jz, 0, f)
805  AS2( sub ecx, 2)
806  ASJ( jmp, 1, f)
807 
808  ASL(0)
809  AS2( movd mm0, DWORD PTR [eax+4*ecx])
810  AS2( movd mm1, DWORD PTR [edi+4*ecx])
811  AS2( paddq mm0, mm1)
812  AS2( paddq mm2, mm0)
813  AS2( movd DWORD PTR [edx+4*ecx], mm2)
814  AS2( psrlq mm2, 32)
815 
816  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
817  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
818  AS2( paddq mm0, mm1)
819  AS2( paddq mm2, mm0)
820  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
821  AS2( psrlq mm2, 32)
822 
823  ASL(1)
824  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
825  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
826  AS2( paddq mm0, mm1)
827  AS2( paddq mm2, mm0)
828  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
829  AS2( psrlq mm2, 32)
830 
831  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
832  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
833  AS2( paddq mm0, mm1)
834  AS2( paddq mm2, mm0)
835  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
836  AS2( psrlq mm2, 32)
837 
838  AS2( add ecx, 4)
839  ASJ( jnz, 0, b)
840 
841  ASL(2)
842  AS2( movd eax, mm2)
843  AS1( emms)
844 
845  AddEpilogue
846 }
847 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
848 {
849  AddPrologue
850 
851  // now: eax = A, edi = B, edx = C, ecx = N
852  AS2( lea eax, [eax+4*ecx])
853  AS2( lea edi, [edi+4*ecx])
854  AS2( lea edx, [edx+4*ecx])
855 
856  AS1( neg ecx) // ecx is negative index
857  AS2( pxor mm2, mm2)
858  ASJ( jz, 2, f)
859  AS2( test ecx, 2) // this clears carry flag
860  ASJ( jz, 0, f)
861  AS2( sub ecx, 2)
862  ASJ( jmp, 1, f)
863 
864  ASL(0)
865  AS2( movd mm0, DWORD PTR [eax+4*ecx])
866  AS2( movd mm1, DWORD PTR [edi+4*ecx])
867  AS2( psubq mm0, mm1)
868  AS2( psubq mm0, mm2)
869  AS2( movd DWORD PTR [edx+4*ecx], mm0)
870  AS2( psrlq mm0, 63)
871 
872  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
873  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
874  AS2( psubq mm2, mm1)
875  AS2( psubq mm2, mm0)
876  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
877  AS2( psrlq mm2, 63)
878 
879  ASL(1)
880  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
881  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
882  AS2( psubq mm0, mm1)
883  AS2( psubq mm0, mm2)
884  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
885  AS2( psrlq mm0, 63)
886 
887  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
888  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
889  AS2( psubq mm2, mm1)
890  AS2( psubq mm2, mm0)
891  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
892  AS2( psrlq mm2, 63)
893 
894  AS2( add ecx, 4)
895  ASJ( jnz, 0, b)
896 
897  ASL(2)
898  AS2( movd eax, mm2)
899  AS1( emms)
900 
901  AddEpilogue
902 }
903 #endif // CRYPTOPP_INTEGER_SSE2
904 #else // CRYPTOPP_SSE2_ASM_AVAILABLE
905 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
906 {
907  CRYPTOPP_ASSERT (N%2 == 0);
908 
909  Declare2Words(u);
910  AssignWord(u, 0);
911  for (size_t i=0; i<N; i+=2)
912  {
913  AddWithCarry(u, A[i], B[i]);
914  C[i] = LowWord(u);
915  AddWithCarry(u, A[i+1], B[i+1]);
916  C[i+1] = LowWord(u);
917  }
918  return int(GetCarry(u));
919 }
920 
921 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
922 {
923  CRYPTOPP_ASSERT (N%2 == 0);
924 
925  Declare2Words(u);
926  AssignWord(u, 0);
927  for (size_t i=0; i<N; i+=2)
928  {
929  SubtractWithBorrow(u, A[i], B[i]);
930  C[i] = LowWord(u);
931  SubtractWithBorrow(u, A[i+1], B[i+1]);
932  C[i+1] = LowWord(u);
933  }
934  return int(GetBorrow(u));
935 }
936 #endif
937 
938 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
939 {
940  // http://github.com/weidai11/cryptopp/issues/188
941  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
942 
943  word carry=0;
944  for(unsigned i=0; i<N; i++)
945  {
946  Declare2Words(p);
947  MultiplyWords(p, A[i], B);
948  Acc2WordsBy1(p, carry);
949  C[i] = LowWord(p);
950  carry = HighWord(p);
951  }
952  return carry;
953 }
954 
955 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
956 
957 #define Mul_2 \
958  Mul_Begin(2) \
959  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
960  Mul_End(1, 1)
961 
962 #define Mul_4 \
963  Mul_Begin(4) \
964  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
965  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
966  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
967  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
968  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
969  Mul_End(5, 3)
970 
971 #define Mul_8 \
972  Mul_Begin(8) \
973  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
974  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
975  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
976  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
977  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
978  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
979  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
980  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
981  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
982  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
983  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
984  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
985  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
986  Mul_End(13, 7)
987 
988 #define Mul_16 \
989  Mul_Begin(16) \
990  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
991  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
992  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
993  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
994  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
995  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
996  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
997  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
998  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
999  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1000  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1001  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1002  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1003  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1004  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1005  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1006  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1007  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1008  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1009  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1010  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1011  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1012  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1013  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1014  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1015  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1016  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1017  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1018  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
1019  Mul_End(29, 15)
1020 
1021 #define Squ_2 \
1022  Squ_Begin(2) \
1023  Squ_End(2)
1024 
1025 #define Squ_4 \
1026  Squ_Begin(4) \
1027  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1028  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1029  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
1030  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
1031  Squ_End(4)
1032 
1033 #define Squ_8 \
1034  Squ_Begin(8) \
1035  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1036  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1037  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1038  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1039  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1040  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1041  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1042  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1043  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1044  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1045  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1046  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1047  Squ_End(8)
1048 
1049 #define Squ_16 \
1050  Squ_Begin(16) \
1051  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1052  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1053  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1054  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1055  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1056  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1057  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1058  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1059  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1060  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1061  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1062  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1063  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1064  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1065  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1066  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1067  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1068  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1069  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1070  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1071  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1072  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1073  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1074  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1075  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1076  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1077  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1078  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1079  Squ_End(16)
1080 
1081 #define Bot_2 \
1082  Mul_Begin(2) \
1083  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1084  Bot_End(2)
1085 
1086 #define Bot_4 \
1087  Mul_Begin(4) \
1088  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1089  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1090  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1091  Bot_End(4)
1092 
1093 #define Bot_8 \
1094  Mul_Begin(8) \
1095  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1096  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1097  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1098  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1099  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1100  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1101  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1102  Bot_End(8)
1103 
1104 #define Bot_16 \
1105  Mul_Begin(16) \
1106  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1107  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1108  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1109  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1110  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1111  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1112  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1113  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1114  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1115  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1116  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1117  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1118  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1119  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1120  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1121  Bot_End(16)
1122 
1123 #endif
1124 
1125 #if 0
1126 #define Mul_Begin(n) \
1127  Declare2Words(p) \
1128  Declare2Words(c) \
1129  Declare2Words(d) \
1130  MultiplyWords(p, A[0], B[0]) \
1131  AssignWord(c, LowWord(p)) \
1132  AssignWord(d, HighWord(p))
1133 
1134 #define Mul_Acc(i, j) \
1135  MultiplyWords(p, A[i], B[j]) \
1136  Acc2WordsBy1(c, LowWord(p)) \
1137  Acc2WordsBy1(d, HighWord(p))
1138 
1139 #define Mul_SaveAcc(k, i, j) \
1140  R[k] = LowWord(c); \
1141  Add2WordsBy1(c, d, HighWord(c)) \
1142  MultiplyWords(p, A[i], B[j]) \
1143  AssignWord(d, HighWord(p)) \
1144  Acc2WordsBy1(c, LowWord(p))
1145 
1146 #define Mul_End(n) \
1147  R[2*n-3] = LowWord(c); \
1148  Acc2WordsBy1(d, HighWord(c)) \
1149  MultiplyWords(p, A[n-1], B[n-1])\
1150  Acc2WordsBy2(d, p) \
1151  R[2*n-2] = LowWord(d); \
1152  R[2*n-1] = HighWord(d);
1153 
1154 #define Bot_SaveAcc(k, i, j) \
1155  R[k] = LowWord(c); \
1156  word e = LowWord(d) + HighWord(c); \
1157  e += A[i] * B[j];
1158 
1159 #define Bot_Acc(i, j) \
1160  e += A[i] * B[j];
1161 
1162 #define Bot_End(n) \
1163  R[n-1] = e;
1164 #else
1165 #define Mul_Begin(n) \
1166  Declare2Words(p) \
1167  word c; \
1168  Declare2Words(d) \
1169  MultiplyWords(p, A[0], B[0]) \
1170  c = LowWord(p); \
1171  AssignWord(d, HighWord(p))
1172 
1173 #define Mul_Acc(i, j) \
1174  MulAcc(c, d, A[i], B[j])
1175 
1176 #define Mul_SaveAcc(k, i, j) \
1177  R[k] = c; \
1178  c = LowWord(d); \
1179  AssignWord(d, HighWord(d)) \
1180  MulAcc(c, d, A[i], B[j])
1181 
1182 #define Mul_End(k, i) \
1183  R[k] = c; \
1184  MultiplyWords(p, A[i], B[i]) \
1185  Acc2WordsBy2(p, d) \
1186  R[k+1] = LowWord(p); \
1187  R[k+2] = HighWord(p);
1188 
1189 #define Bot_SaveAcc(k, i, j) \
1190  R[k] = c; \
1191  c = LowWord(d); \
1192  c += A[i] * B[j];
1193 
1194 #define Bot_Acc(i, j) \
1195  c += A[i] * B[j];
1196 
1197 #define Bot_End(n) \
1198  R[n-1] = c;
1199 #endif
1200 
1201 #define Squ_Begin(n) \
1202  Declare2Words(p) \
1203  word c; \
1204  Declare2Words(d) \
1205  Declare2Words(e) \
1206  MultiplyWords(p, A[0], A[0]) \
1207  R[0] = LowWord(p); \
1208  AssignWord(e, HighWord(p)) \
1209  MultiplyWords(p, A[0], A[1]) \
1210  c = LowWord(p); \
1211  AssignWord(d, HighWord(p)) \
1212  Squ_NonDiag \
1213 
1214 #define Squ_NonDiag \
1215  Double3Words(c, d)
1216 
1217 #define Squ_SaveAcc(k, i, j) \
1218  Acc3WordsBy2(c, d, e) \
1219  R[k] = c; \
1220  MultiplyWords(p, A[i], A[j]) \
1221  c = LowWord(p); \
1222  AssignWord(d, HighWord(p)) \
1223 
1224 #define Squ_Acc(i, j) \
1225  MulAcc(c, d, A[i], A[j])
1226 
1227 #define Squ_Diag(i) \
1228  Squ_NonDiag \
1229  MulAcc(c, d, A[i], A[i])
1230 
1231 #define Squ_End(n) \
1232  Acc3WordsBy2(c, d, e) \
1233  R[2*n-3] = c; \
1234  MultiplyWords(p, A[n-1], A[n-1])\
1235  Acc2WordsBy2(p, e) \
1236  R[2*n-2] = LowWord(p); \
1237  R[2*n-1] = HighWord(p);
1238 
1239 
1240 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1241 {
1242  // http://github.com/weidai11/cryptopp/issues/188
1243  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1244  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1245 
1246  Mul_2
1247 }
1248 
1249 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1250 {
1251  // http://github.com/weidai11/cryptopp/issues/188
1252  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1253  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1254 
1255  Mul_4
1256 }
1257 
1258 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1259 {
1260  // http://github.com/weidai11/cryptopp/issues/188
1261  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1262  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1263 
1264  Mul_8
1265 }
1266 
1267 void Baseline_Square2(word *R, const word *AA)
1268 {
1269  // http://github.com/weidai11/cryptopp/issues/188
1270  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1271 
1272  Squ_2
1273 }
1274 
1275 void Baseline_Square4(word *R, const word *AA)
1276 {
1277  // http://github.com/weidai11/cryptopp/issues/188
1278  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1279 
1280  Squ_4
1281 }
1282 
1283 void Baseline_Square8(word *R, const word *AA)
1284 {
1285  // http://github.com/weidai11/cryptopp/issues/188
1286  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1287 
1288  Squ_8
1289 }
1290 
1291 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1292 {
1293  // http://github.com/weidai11/cryptopp/issues/188
1294  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1295  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1296 
1297  Bot_2
1298 }
1299 
1300 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1301 {
1302  // http://github.com/weidai11/cryptopp/issues/188
1303  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1304  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1305 
1306  Bot_4
1307 }
1308 
1309 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1310 {
1311  // http://github.com/weidai11/cryptopp/issues/188
1312  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1313  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1314 
1315  Bot_8
1316 }
1317 
1318 #define Top_Begin(n) \
1319  Declare2Words(p) \
1320  word c; \
1321  Declare2Words(d) \
1322  MultiplyWords(p, A[0], B[n-2]);\
1323  AssignWord(d, HighWord(p));
1324 
1325 #define Top_Acc(i, j) \
1326  MultiplyWords(p, A[i], B[j]);\
1327  Acc2WordsBy1(d, HighWord(p));
1328 
1329 #define Top_SaveAcc0(i, j) \
1330  c = LowWord(d); \
1331  AssignWord(d, HighWord(d)) \
1332  MulAcc(c, d, A[i], B[j])
1333 
1334 #define Top_SaveAcc1(i, j) \
1335  c = L<c; \
1336  Acc2WordsBy1(d, c); \
1337  c = LowWord(d); \
1338  AssignWord(d, HighWord(d)) \
1339  MulAcc(c, d, A[i], B[j])
1340 
1341 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1342 {
1343  CRYPTOPP_UNUSED(L);
1344  word T[4];
1345  Baseline_Multiply2(T, A, B);
1346  R[0] = T[2];
1347  R[1] = T[3];
1348 }
1349 
1350 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1351 {
1352  // http://github.com/weidai11/cryptopp/issues/188
1353  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1354  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1355 
1356  Top_Begin(4)
1357  Top_Acc(1, 1) Top_Acc(2, 0) \
1358  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1359  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1360  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1361  Mul_End(1, 3)
1362 }
1363 
1364 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1365 {
1366  // http://github.com/weidai11/cryptopp/issues/188
1367  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1368  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1369 
1370  Top_Begin(8)
1371  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1372  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1373  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1374  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1375  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1376  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1377  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1378  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1379  Mul_End(5, 7)
1380 }
1381 
1382 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1383 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1384 {
1385  // http://github.com/weidai11/cryptopp/issues/188
1386  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1387  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1388 
1389  Mul_16
1390 }
1391 
1392 void Baseline_Square16(word *R, const word *AA)
1393 {
1394  // http://github.com/weidai11/cryptopp/issues/188
1395  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1396 
1397  Squ_16
1398 }
1399 
1400 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1401 {
1402  // http://github.com/weidai11/cryptopp/issues/188
1403  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1404  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1405 
1406  Bot_16
1407 }
1408 
1409 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1410 {
1411  // http://github.com/weidai11/cryptopp/issues/188
1412  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1413  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1414 
1415  Top_Begin(16)
1416  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1417  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1418  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1419  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1420  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1421  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1422  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1423  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1424  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1425  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1426  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1427  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1428  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1429  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1430  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1431  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1432  Mul_End(13, 15)
1433 }
1434 #endif
1435 
1436 // ********************************************************
1437 
1438 #if CRYPTOPP_INTEGER_SSE2
1439 
1440 CRYPTOPP_ALIGN_DATA(16)
1441 CRYPTOPP_TABLE
1442 const word32 s_maskLow16[4] = {
1443  0xffff,0xffff,0xffff,0xffff
1444 };
1445 
1446 #undef Mul_Begin
1447 #undef Mul_Acc
1448 #undef Top_Begin
1449 #undef Top_Acc
1450 #undef Squ_Acc
1451 #undef Squ_NonDiag
1452 #undef Squ_Diag
1453 #undef Squ_SaveAcc
1454 #undef Squ_Begin
1455 #undef Mul_SaveAcc
1456 #undef Bot_Acc
1457 #undef Bot_SaveAcc
1458 #undef Bot_End
1459 #undef Squ_End
1460 #undef Mul_End
1461 
1462 #define SSE2_FinalSave(k) \
1463  AS2( psllq xmm5, 16) \
1464  AS2( paddq xmm4, xmm5) \
1465  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1466 
1467 #define SSE2_SaveShift(k) \
1468  AS2( movq xmm0, xmm6) \
1469  AS2( punpckhqdq xmm6, xmm0) \
1470  AS2( movq xmm1, xmm7) \
1471  AS2( punpckhqdq xmm7, xmm1) \
1472  AS2( paddd xmm6, xmm0) \
1473  AS2( pslldq xmm6, 4) \
1474  AS2( paddd xmm7, xmm1) \
1475  AS2( paddd xmm4, xmm6) \
1476  AS2( pslldq xmm7, 4) \
1477  AS2( movq xmm6, xmm4) \
1478  AS2( paddd xmm5, xmm7) \
1479  AS2( movq xmm7, xmm5) \
1480  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1481  AS2( psrlq xmm6, 16) \
1482  AS2( paddq xmm6, xmm7) \
1483  AS2( punpckhqdq xmm4, xmm0) \
1484  AS2( punpckhqdq xmm5, xmm0) \
1485  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1486  AS2( psrlq xmm6, 3*16) \
1487  AS2( paddd xmm4, xmm6) \
1488 
1489 #define Squ_SSE2_SaveShift(k) \
1490  AS2( movq xmm0, xmm6) \
1491  AS2( punpckhqdq xmm6, xmm0) \
1492  AS2( movq xmm1, xmm7) \
1493  AS2( punpckhqdq xmm7, xmm1) \
1494  AS2( paddd xmm6, xmm0) \
1495  AS2( pslldq xmm6, 4) \
1496  AS2( paddd xmm7, xmm1) \
1497  AS2( paddd xmm4, xmm6) \
1498  AS2( pslldq xmm7, 4) \
1499  AS2( movhlps xmm6, xmm4) \
1500  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1501  AS2( paddd xmm5, xmm7) \
1502  AS2( movhps QWORD PTR [esp+12], xmm5)\
1503  AS2( psrlq xmm4, 16) \
1504  AS2( paddq xmm4, xmm5) \
1505  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1506  AS2( psrlq xmm4, 3*16) \
1507  AS2( paddd xmm4, xmm6) \
1508  AS2( movq QWORD PTR [esp+4], xmm4)\
1509 
1510 #define SSE2_FirstMultiply(i) \
1511  AS2( movdqa xmm7, [esi+(i)*16])\
1512  AS2( movdqa xmm5, [edi-(i)*16])\
1513  AS2( pmuludq xmm5, xmm7) \
1514  AS2( movdqa xmm4, [ebx])\
1515  AS2( movdqa xmm6, xmm4) \
1516  AS2( pand xmm4, xmm5) \
1517  AS2( psrld xmm5, 16) \
1518  AS2( pmuludq xmm7, [edx-(i)*16])\
1519  AS2( pand xmm6, xmm7) \
1520  AS2( psrld xmm7, 16)
1521 
1522 #define Squ_Begin(n) \
1523  SquPrologue \
1524  AS2( mov esi, esp)\
1525  AS2( and esp, 0xfffffff0)\
1526  AS2( lea edi, [esp-32*n])\
1527  AS2( sub esp, 32*n+16)\
1528  AS1( push esi)\
1529  AS2( mov esi, edi) \
1530  AS2( xor edx, edx) \
1531  ASL(1) \
1532  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1533  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1534  AS2( movdqa [edi+2*edx], xmm0) \
1535  AS2( psrlq xmm0, 32) \
1536  AS2( movdqa [edi+2*edx+16], xmm0) \
1537  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1538  AS2( psrlq xmm1, 32) \
1539  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1540  AS2( add edx, 16) \
1541  AS2( cmp edx, 8*(n)) \
1542  ASJ( jne, 1, b) \
1543  AS2( lea edx, [edi+16*n])\
1544  SSE2_FirstMultiply(0) \
1545 
1546 #define Squ_Acc(i) \
1547  ASL(LSqu##i) \
1548  AS2( movdqa xmm1, [esi+(i)*16]) \
1549  AS2( movdqa xmm0, [edi-(i)*16]) \
1550  AS2( movdqa xmm2, [ebx]) \
1551  AS2( pmuludq xmm0, xmm1) \
1552  AS2( pmuludq xmm1, [edx-(i)*16]) \
1553  AS2( movdqa xmm3, xmm2) \
1554  AS2( pand xmm2, xmm0) \
1555  AS2( psrld xmm0, 16) \
1556  AS2( paddd xmm4, xmm2) \
1557  AS2( paddd xmm5, xmm0) \
1558  AS2( pand xmm3, xmm1) \
1559  AS2( psrld xmm1, 16) \
1560  AS2( paddd xmm6, xmm3) \
1561  AS2( paddd xmm7, xmm1) \
1562 
1563 #define Squ_Acc1(i)
1564 #define Squ_Acc2(i) ASC(call, LSqu##i)
1565 #define Squ_Acc3(i) Squ_Acc2(i)
1566 #define Squ_Acc4(i) Squ_Acc2(i)
1567 #define Squ_Acc5(i) Squ_Acc2(i)
1568 #define Squ_Acc6(i) Squ_Acc2(i)
1569 #define Squ_Acc7(i) Squ_Acc2(i)
1570 #define Squ_Acc8(i) Squ_Acc2(i)
1571 
1572 #define SSE2_End(E, n) \
1573  SSE2_SaveShift(2*(n)-3) \
1574  AS2( movdqa xmm7, [esi+16]) \
1575  AS2( movdqa xmm0, [edi]) \
1576  AS2( pmuludq xmm0, xmm7) \
1577  AS2( movdqa xmm2, [ebx]) \
1578  AS2( pmuludq xmm7, [edx]) \
1579  AS2( movdqa xmm6, xmm2) \
1580  AS2( pand xmm2, xmm0) \
1581  AS2( psrld xmm0, 16) \
1582  AS2( paddd xmm4, xmm2) \
1583  AS2( paddd xmm5, xmm0) \
1584  AS2( pand xmm6, xmm7) \
1585  AS2( psrld xmm7, 16) \
1586  SSE2_SaveShift(2*(n)-2) \
1587  SSE2_FinalSave(2*(n)-1) \
1588  AS1( pop esp)\
1589  E
1590 
1591 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1592 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1593 #define Top_End(n) SSE2_End(TopEpilogue, n)
1594 
1595 #define Squ_Column1(k, i) \
1596  Squ_SSE2_SaveShift(k) \
1597  AS2( add esi, 16) \
1598  SSE2_FirstMultiply(1)\
1599  Squ_Acc##i(i) \
1600  AS2( paddd xmm4, xmm4) \
1601  AS2( paddd xmm5, xmm5) \
1602  AS2( movdqa xmm3, [esi]) \
1603  AS2( movq xmm1, QWORD PTR [esi+8]) \
1604  AS2( pmuludq xmm1, xmm3) \
1605  AS2( pmuludq xmm3, xmm3) \
1606  AS2( movdqa xmm0, [ebx])\
1607  AS2( movdqa xmm2, xmm0) \
1608  AS2( pand xmm0, xmm1) \
1609  AS2( psrld xmm1, 16) \
1610  AS2( paddd xmm6, xmm0) \
1611  AS2( paddd xmm7, xmm1) \
1612  AS2( pand xmm2, xmm3) \
1613  AS2( psrld xmm3, 16) \
1614  AS2( paddd xmm6, xmm6) \
1615  AS2( paddd xmm7, xmm7) \
1616  AS2( paddd xmm4, xmm2) \
1617  AS2( paddd xmm5, xmm3) \
1618  AS2( movq xmm0, QWORD PTR [esp+4])\
1619  AS2( movq xmm1, QWORD PTR [esp+12])\
1620  AS2( paddd xmm4, xmm0)\
1621  AS2( paddd xmm5, xmm1)\
1622 
1623 #define Squ_Column0(k, i) \
1624  Squ_SSE2_SaveShift(k) \
1625  AS2( add edi, 16) \
1626  AS2( add edx, 16) \
1627  SSE2_FirstMultiply(1)\
1628  Squ_Acc##i(i) \
1629  AS2( paddd xmm6, xmm6) \
1630  AS2( paddd xmm7, xmm7) \
1631  AS2( paddd xmm4, xmm4) \
1632  AS2( paddd xmm5, xmm5) \
1633  AS2( movq xmm0, QWORD PTR [esp+4])\
1634  AS2( movq xmm1, QWORD PTR [esp+12])\
1635  AS2( paddd xmm4, xmm0)\
1636  AS2( paddd xmm5, xmm1)\
1637 
1638 #define SSE2_MulAdd45 \
1639  AS2( movdqa xmm7, [esi]) \
1640  AS2( movdqa xmm0, [edi]) \
1641  AS2( pmuludq xmm0, xmm7) \
1642  AS2( movdqa xmm2, [ebx]) \
1643  AS2( pmuludq xmm7, [edx]) \
1644  AS2( movdqa xmm6, xmm2) \
1645  AS2( pand xmm2, xmm0) \
1646  AS2( psrld xmm0, 16) \
1647  AS2( paddd xmm4, xmm2) \
1648  AS2( paddd xmm5, xmm0) \
1649  AS2( pand xmm6, xmm7) \
1650  AS2( psrld xmm7, 16)
1651 
1652 #define Mul_Begin(n) \
1653  MulPrologue \
1654  AS2( mov esi, esp)\
1655  AS2( and esp, 0xfffffff0)\
1656  AS2( sub esp, 48*n+16)\
1657  AS1( push esi)\
1658  AS2( xor edx, edx) \
1659  ASL(1) \
1660  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1661  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1662  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1663  AS2( movdqa [esp+20+2*edx], xmm0) \
1664  AS2( psrlq xmm0, 32) \
1665  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1666  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1667  AS2( psrlq xmm1, 32) \
1668  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1669  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1670  AS2( psrlq xmm2, 32) \
1671  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1672  AS2( add edx, 16) \
1673  AS2( cmp edx, 8*(n)) \
1674  ASJ( jne, 1, b) \
1675  AS2( lea edi, [esp+20])\
1676  AS2( lea edx, [esp+20+16*n])\
1677  AS2( lea esi, [esp+20+32*n])\
1678  SSE2_FirstMultiply(0) \
1679 
1680 #define Mul_Acc(i) \
1681  ASL(LMul##i) \
1682  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1683  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1684  AS2( movdqa xmm2, [ebx]) \
1685  AS2( pmuludq xmm0, xmm1) \
1686  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1687  AS2( movdqa xmm3, xmm2) \
1688  AS2( pand xmm2, xmm0) \
1689  AS2( psrld xmm0, 16) \
1690  AS2( paddd xmm4, xmm2) \
1691  AS2( paddd xmm5, xmm0) \
1692  AS2( pand xmm3, xmm1) \
1693  AS2( psrld xmm1, 16) \
1694  AS2( paddd xmm6, xmm3) \
1695  AS2( paddd xmm7, xmm1) \
1696 
1697 #define Mul_Acc1(i)
1698 #define Mul_Acc2(i) ASC(call, LMul##i)
1699 #define Mul_Acc3(i) Mul_Acc2(i)
1700 #define Mul_Acc4(i) Mul_Acc2(i)
1701 #define Mul_Acc5(i) Mul_Acc2(i)
1702 #define Mul_Acc6(i) Mul_Acc2(i)
1703 #define Mul_Acc7(i) Mul_Acc2(i)
1704 #define Mul_Acc8(i) Mul_Acc2(i)
1705 #define Mul_Acc9(i) Mul_Acc2(i)
1706 #define Mul_Acc10(i) Mul_Acc2(i)
1707 #define Mul_Acc11(i) Mul_Acc2(i)
1708 #define Mul_Acc12(i) Mul_Acc2(i)
1709 #define Mul_Acc13(i) Mul_Acc2(i)
1710 #define Mul_Acc14(i) Mul_Acc2(i)
1711 #define Mul_Acc15(i) Mul_Acc2(i)
1712 #define Mul_Acc16(i) Mul_Acc2(i)
1713 
1714 #define Mul_Column1(k, i) \
1715  SSE2_SaveShift(k) \
1716  AS2( add esi, 16) \
1717  SSE2_MulAdd45\
1718  Mul_Acc##i(i) \
1719 
1720 #define Mul_Column0(k, i) \
1721  SSE2_SaveShift(k) \
1722  AS2( add edi, 16) \
1723  AS2( add edx, 16) \
1724  SSE2_MulAdd45\
1725  Mul_Acc##i(i) \
1726 
1727 #define Bot_Acc(i) \
1728  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1729  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1730  AS2( pmuludq xmm0, xmm1) \
1731  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1732  AS2( paddq xmm4, xmm0) \
1733  AS2( paddd xmm6, xmm1)
1734 
1735 #define Bot_SaveAcc(k) \
1736  SSE2_SaveShift(k) \
1737  AS2( add edi, 16) \
1738  AS2( add edx, 16) \
1739  AS2( movdqa xmm6, [esi]) \
1740  AS2( movdqa xmm0, [edi]) \
1741  AS2( pmuludq xmm0, xmm6) \
1742  AS2( paddq xmm4, xmm0) \
1743  AS2( psllq xmm5, 16) \
1744  AS2( paddq xmm4, xmm5) \
1745  AS2( pmuludq xmm6, [edx])
1746 
1747 #define Bot_End(n) \
1748  AS2( movhlps xmm7, xmm6) \
1749  AS2( paddd xmm6, xmm7) \
1750  AS2( psllq xmm6, 32) \
1751  AS2( paddd xmm4, xmm6) \
1752  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1753  AS1( pop esp)\
1754  MulEpilogue
1755 
1756 #define Top_Begin(n) \
1757  TopPrologue \
1758  AS2( mov edx, esp)\
1759  AS2( and esp, 0xfffffff0)\
1760  AS2( sub esp, 48*n+16)\
1761  AS1( push edx)\
1762  AS2( xor edx, edx) \
1763  ASL(1) \
1764  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1765  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1766  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1767  AS2( movdqa [esp+20+2*edx], xmm0) \
1768  AS2( psrlq xmm0, 32) \
1769  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1770  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1771  AS2( psrlq xmm1, 32) \
1772  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1773  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1774  AS2( psrlq xmm2, 32) \
1775  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1776  AS2( add edx, 16) \
1777  AS2( cmp edx, 8*(n)) \
1778  ASJ( jne, 1, b) \
1779  AS2( mov eax, esi) \
1780  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1781  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1782  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1783  AS2( pxor xmm4, xmm4)\
1784  AS2( pxor xmm5, xmm5)
1785 
1786 #define Top_Acc(i) \
1787  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1788  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1789  AS2( psrlq xmm0, 48) \
1790  AS2( paddd xmm5, xmm0)\
1791 
1792 #define Top_Column0(i) \
1793  AS2( psllq xmm5, 32) \
1794  AS2( add edi, 16) \
1795  AS2( add edx, 16) \
1796  SSE2_MulAdd45\
1797  Mul_Acc##i(i) \
1798 
1799 #define Top_Column1(i) \
1800  SSE2_SaveShift(0) \
1801  AS2( add esi, 16) \
1802  SSE2_MulAdd45\
1803  Mul_Acc##i(i) \
1804  AS2( shr eax, 16) \
1805  AS2( movd xmm0, eax)\
1806  AS2( movd xmm1, [ecx+4])\
1807  AS2( psrld xmm1, 16)\
1808  AS2( pcmpgtd xmm1, xmm0)\
1809  AS2( psrld xmm1, 31)\
1810  AS2( paddd xmm4, xmm1)\
1811 
1812 void SSE2_Square4(word *C, const word *A)
1813 {
1814  Squ_Begin(2)
1815  Squ_Column0(0, 1)
1816  Squ_End(2)
1817 }
1818 
1819 void SSE2_Square8(word *C, const word *A)
1820 {
1821  Squ_Begin(4)
1822 #ifndef __GNUC__
1823  ASJ( jmp, 0, f)
1824  Squ_Acc(2)
1825  AS1( ret) ASL(0)
1826 #endif
1827  Squ_Column0(0, 1)
1828  Squ_Column1(1, 1)
1829  Squ_Column0(2, 2)
1830  Squ_Column1(3, 1)
1831  Squ_Column0(4, 1)
1832  Squ_End(4)
1833 }
1834 
1835 void SSE2_Square16(word *C, const word *A)
1836 {
1837  Squ_Begin(8)
1838 #ifndef __GNUC__
1839  ASJ( jmp, 0, f)
1840  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1841  AS1( ret) ASL(0)
1842 #endif
1843  Squ_Column0(0, 1)
1844  Squ_Column1(1, 1)
1845  Squ_Column0(2, 2)
1846  Squ_Column1(3, 2)
1847  Squ_Column0(4, 3)
1848  Squ_Column1(5, 3)
1849  Squ_Column0(6, 4)
1850  Squ_Column1(7, 3)
1851  Squ_Column0(8, 3)
1852  Squ_Column1(9, 2)
1853  Squ_Column0(10, 2)
1854  Squ_Column1(11, 1)
1855  Squ_Column0(12, 1)
1856  Squ_End(8)
1857 }
1858 
1859 void SSE2_Square32(word *C, const word *A)
1860 {
1861  Squ_Begin(16)
1862  ASJ( jmp, 0, f)
1863  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1864  AS1( ret) ASL(0)
1865  Squ_Column0(0, 1)
1866  Squ_Column1(1, 1)
1867  Squ_Column0(2, 2)
1868  Squ_Column1(3, 2)
1869  Squ_Column0(4, 3)
1870  Squ_Column1(5, 3)
1871  Squ_Column0(6, 4)
1872  Squ_Column1(7, 4)
1873  Squ_Column0(8, 5)
1874  Squ_Column1(9, 5)
1875  Squ_Column0(10, 6)
1876  Squ_Column1(11, 6)
1877  Squ_Column0(12, 7)
1878  Squ_Column1(13, 7)
1879  Squ_Column0(14, 8)
1880  Squ_Column1(15, 7)
1881  Squ_Column0(16, 7)
1882  Squ_Column1(17, 6)
1883  Squ_Column0(18, 6)
1884  Squ_Column1(19, 5)
1885  Squ_Column0(20, 5)
1886  Squ_Column1(21, 4)
1887  Squ_Column0(22, 4)
1888  Squ_Column1(23, 3)
1889  Squ_Column0(24, 3)
1890  Squ_Column1(25, 2)
1891  Squ_Column0(26, 2)
1892  Squ_Column1(27, 1)
1893  Squ_Column0(28, 1)
1894  Squ_End(16)
1895 }
1896 
1897 void SSE2_Multiply4(word *C, const word *A, const word *B)
1898 {
1899  Mul_Begin(2)
1900 #ifndef __GNUC__
1901  ASJ( jmp, 0, f)
1902  Mul_Acc(2)
1903  AS1( ret) ASL(0)
1904 #endif
1905  Mul_Column0(0, 2)
1906  Mul_End(2)
1907 }
1908 
1909 void SSE2_Multiply8(word *C, const word *A, const word *B)
1910 {
1911  Mul_Begin(4)
1912 #ifndef __GNUC__
1913  ASJ( jmp, 0, f)
1914  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1915  AS1( ret) ASL(0)
1916 #endif
1917  Mul_Column0(0, 2)
1918  Mul_Column1(1, 3)
1919  Mul_Column0(2, 4)
1920  Mul_Column1(3, 3)
1921  Mul_Column0(4, 2)
1922  Mul_End(4)
1923 }
1924 
1925 void SSE2_Multiply16(word *C, const word *A, const word *B)
1926 {
1927  Mul_Begin(8)
1928 #ifndef __GNUC__
1929  ASJ( jmp, 0, f)
1930  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1931  AS1( ret) ASL(0)
1932 #endif
1933  Mul_Column0(0, 2)
1934  Mul_Column1(1, 3)
1935  Mul_Column0(2, 4)
1936  Mul_Column1(3, 5)
1937  Mul_Column0(4, 6)
1938  Mul_Column1(5, 7)
1939  Mul_Column0(6, 8)
1940  Mul_Column1(7, 7)
1941  Mul_Column0(8, 6)
1942  Mul_Column1(9, 5)
1943  Mul_Column0(10, 4)
1944  Mul_Column1(11, 3)
1945  Mul_Column0(12, 2)
1946  Mul_End(8)
1947 }
1948 
1949 void SSE2_Multiply32(word *C, const word *A, const word *B)
1950 {
1951  Mul_Begin(16)
1952  ASJ( jmp, 0, f)
1953  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1954  AS1( ret) ASL(0)
1955  Mul_Column0(0, 2)
1956  Mul_Column1(1, 3)
1957  Mul_Column0(2, 4)
1958  Mul_Column1(3, 5)
1959  Mul_Column0(4, 6)
1960  Mul_Column1(5, 7)
1961  Mul_Column0(6, 8)
1962  Mul_Column1(7, 9)
1963  Mul_Column0(8, 10)
1964  Mul_Column1(9, 11)
1965  Mul_Column0(10, 12)
1966  Mul_Column1(11, 13)
1967  Mul_Column0(12, 14)
1968  Mul_Column1(13, 15)
1969  Mul_Column0(14, 16)
1970  Mul_Column1(15, 15)
1971  Mul_Column0(16, 14)
1972  Mul_Column1(17, 13)
1973  Mul_Column0(18, 12)
1974  Mul_Column1(19, 11)
1975  Mul_Column0(20, 10)
1976  Mul_Column1(21, 9)
1977  Mul_Column0(22, 8)
1978  Mul_Column1(23, 7)
1979  Mul_Column0(24, 6)
1980  Mul_Column1(25, 5)
1981  Mul_Column0(26, 4)
1982  Mul_Column1(27, 3)
1983  Mul_Column0(28, 2)
1984  Mul_End(16)
1985 }
1986 
1987 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1988 {
1989  Mul_Begin(2)
1990  Bot_SaveAcc(0) Bot_Acc(2)
1991  Bot_End(2)
1992 }
1993 
1994 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1995 {
1996  Mul_Begin(4)
1997 #ifndef __GNUC__
1998  ASJ( jmp, 0, f)
1999  Mul_Acc(3) Mul_Acc(2)
2000  AS1( ret) ASL(0)
2001 #endif
2002  Mul_Column0(0, 2)
2003  Mul_Column1(1, 3)
2004  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2005  Bot_End(4)
2006 }
2007 
2008 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
2009 {
2010  Mul_Begin(8)
2011 #ifndef __GNUC__
2012  ASJ( jmp, 0, f)
2013  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2014  AS1( ret) ASL(0)
2015 #endif
2016  Mul_Column0(0, 2)
2017  Mul_Column1(1, 3)
2018  Mul_Column0(2, 4)
2019  Mul_Column1(3, 5)
2020  Mul_Column0(4, 6)
2021  Mul_Column1(5, 7)
2022  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2023  Bot_End(8)
2024 }
2025 
2026 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
2027 {
2028  Mul_Begin(16)
2029 #ifndef __GNUC__
2030  ASJ( jmp, 0, f)
2031  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2032  AS1( ret) ASL(0)
2033 #endif
2034  Mul_Column0(0, 2)
2035  Mul_Column1(1, 3)
2036  Mul_Column0(2, 4)
2037  Mul_Column1(3, 5)
2038  Mul_Column0(4, 6)
2039  Mul_Column1(5, 7)
2040  Mul_Column0(6, 8)
2041  Mul_Column1(7, 9)
2042  Mul_Column0(8, 10)
2043  Mul_Column1(9, 11)
2044  Mul_Column0(10, 12)
2045  Mul_Column1(11, 13)
2046  Mul_Column0(12, 14)
2047  Mul_Column1(13, 15)
2048  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2049  Bot_End(16)
2050 }
2051 
2052 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2053 {
2054  Top_Begin(4)
2055  Top_Acc(3) Top_Acc(2) Top_Acc(1)
2056 #ifndef __GNUC__
2057  ASJ( jmp, 0, f)
2058  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2059  AS1( ret) ASL(0)
2060 #endif
2061  Top_Column0(4)
2062  Top_Column1(3)
2063  Mul_Column0(0, 2)
2064  Top_End(2)
2065 }
2066 
2067 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2068 {
2069  Top_Begin(8)
2070  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2071 #ifndef __GNUC__
2072  ASJ( jmp, 0, f)
2073  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2074  AS1( ret) ASL(0)
2075 #endif
2076  Top_Column0(8)
2077  Top_Column1(7)
2078  Mul_Column0(0, 6)
2079  Mul_Column1(1, 5)
2080  Mul_Column0(2, 4)
2081  Mul_Column1(3, 3)
2082  Mul_Column0(4, 2)
2083  Top_End(4)
2084 }
2085 
2086 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2087 {
2088  Top_Begin(16)
2089  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2090 #ifndef __GNUC__
2091  ASJ( jmp, 0, f)
2092  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2093  AS1( ret) ASL(0)
2094 #endif
2095  Top_Column0(16)
2096  Top_Column1(15)
2097  Mul_Column0(0, 14)
2098  Mul_Column1(1, 13)
2099  Mul_Column0(2, 12)
2100  Mul_Column1(3, 11)
2101  Mul_Column0(4, 10)
2102  Mul_Column1(5, 9)
2103  Mul_Column0(6, 8)
2104  Mul_Column1(7, 7)
2105  Mul_Column0(8, 6)
2106  Mul_Column1(9, 5)
2107  Mul_Column0(10, 4)
2108  Mul_Column1(11, 3)
2109  Mul_Column0(12, 2)
2110  Top_End(8)
2111 }
2112 
2113 #endif // #if CRYPTOPP_INTEGER_SSE2
2114 
2115 // ********************************************************
2116 
2117 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2118 typedef void (* PMul)(word *C, const word *A, const word *B);
2119 typedef void (* PSqu)(word *C, const word *A);
2120 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2121 
2122 #if CRYPTOPP_INTEGER_SSE2
2123 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2124 static size_t s_recursionLimit = 8;
2125 #else
2126 static const size_t s_recursionLimit = 16;
2127 #endif // CRYPTOPP_INTEGER_SSE2
2128 
2129 static PMul s_pMul[9], s_pBot[9];
2130 static PSqu s_pSqu[9];
2131 static PMulTop s_pTop[9];
2132 
2133 void SetFunctionPointers()
2134 {
2135  s_pMul[0] = &Baseline_Multiply2;
2136  s_pBot[0] = &Baseline_MultiplyBottom2;
2137  s_pSqu[0] = &Baseline_Square2;
2138  s_pTop[0] = &Baseline_MultiplyTop2;
2139  s_pTop[1] = &Baseline_MultiplyTop4;
2140 
2141 #if CRYPTOPP_INTEGER_SSE2
2142  if (HasSSE2())
2143  {
2144  if (IsP4())
2145  {
2146  s_pAdd = &SSE2_Add;
2147  s_pSub = &SSE2_Sub;
2148  }
2149 
2150  s_recursionLimit = 32;
2151 
2152  s_pMul[1] = &SSE2_Multiply4;
2153  s_pMul[2] = &SSE2_Multiply8;
2154  s_pMul[4] = &SSE2_Multiply16;
2155  s_pMul[8] = &SSE2_Multiply32;
2156 
2157  s_pBot[1] = &SSE2_MultiplyBottom4;
2158  s_pBot[2] = &SSE2_MultiplyBottom8;
2159  s_pBot[4] = &SSE2_MultiplyBottom16;
2160  s_pBot[8] = &SSE2_MultiplyBottom32;
2161 
2162  s_pSqu[1] = &SSE2_Square4;
2163  s_pSqu[2] = &SSE2_Square8;
2164  s_pSqu[4] = &SSE2_Square16;
2165  s_pSqu[8] = &SSE2_Square32;
2166 
2167  s_pTop[2] = &SSE2_MultiplyTop8;
2168  s_pTop[4] = &SSE2_MultiplyTop16;
2169  s_pTop[8] = &SSE2_MultiplyTop32;
2170  }
2171  else
2172 #endif // CRYPTOPP_INTEGER_SSE2
2173  {
2174  s_pMul[1] = &Baseline_Multiply4;
2175  s_pMul[2] = &Baseline_Multiply8;
2176 
2177  s_pBot[1] = &Baseline_MultiplyBottom4;
2178  s_pBot[2] = &Baseline_MultiplyBottom8;
2179 
2180  s_pSqu[1] = &Baseline_Square4;
2181  s_pSqu[2] = &Baseline_Square8;
2182 
2183  s_pTop[2] = &Baseline_MultiplyTop8;
2184 
2185 #if !CRYPTOPP_INTEGER_SSE2
2186  s_pMul[4] = &Baseline_Multiply16;
2187  s_pBot[4] = &Baseline_MultiplyBottom16;
2188  s_pSqu[4] = &Baseline_Square16;
2189  s_pTop[4] = &Baseline_MultiplyTop16;
2190 #endif // !CRYPTOPP_INTEGER_SSE2
2191  }
2192 }
2193 
2194 inline int Add(word *C, const word *A, const word *B, size_t N)
2195 {
2196 #if CRYPTOPP_INTEGER_SSE2
2197  return s_pAdd(N, C, A, B);
2198 #else
2199  return Baseline_Add(N, C, A, B);
2200 #endif // CRYPTOPP_INTEGER_SSE2
2201 }
2202 
2203 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2204 {
2205 #if CRYPTOPP_INTEGER_SSE2
2206  return s_pSub(N, C, A, B);
2207 #else
2208  return Baseline_Sub(N, C, A, B);
2209 #endif // CRYPTOPP_INTEGER_SSE2
2210 }
2211 
2212 // ********************************************************
2213 
2214 
2215 #define A0 A
2216 #define A1 (A+N2)
2217 #define B0 B
2218 #define B1 (B+N2)
2219 
2220 #define T0 T
2221 #define T1 (T+N2)
2222 #define T2 (T+N)
2223 #define T3 (T+N+N2)
2224 
2225 #define R0 R
2226 #define R1 (R+N2)
2227 #define R2 (R+N)
2228 #define R3 (R+N+N2)
2229 
2230 // R[2*N] - result = A*B
2231 // T[2*N] - temporary work space
2232 // A[N] --- multiplier
2233 // B[N] --- multiplicant
2234 
2235 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2236 {
2237  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2238 
2239  if (N <= s_recursionLimit)
2240  s_pMul[N/4](R, A, B);
2241  else
2242  {
2243  const size_t N2 = N/2;
2244 
2245  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2246  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2247 
2248  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2249  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2250 
2251  RecursiveMultiply(R2, T2, A1, B1, N2);
2252  RecursiveMultiply(T0, T2, R0, R1, N2);
2253  RecursiveMultiply(R0, T2, A0, B0, N2);
2254 
2255  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2256 
2257  int c2 = Add(R2, R2, R1, N2);
2258  int c3 = c2;
2259  c2 += Add(R1, R2, R0, N2);
2260  c3 += Add(R2, R2, R3, N2);
2261 
2262  if (AN2 == BN2)
2263  c3 -= Subtract(R1, R1, T0, N);
2264  else
2265  c3 += Add(R1, R1, T0, N);
2266 
2267  c3 += Increment(R2, N2, c2);
2268  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2269  Increment(R3, N2, c3);
2270  }
2271 }
2272 
2273 // R[2*N] - result = A*A
2274 // T[2*N] - temporary work space
2275 // A[N] --- number to be squared
2276 
2277 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2278 {
2279  CRYPTOPP_ASSERT(N && N%2==0);
2280 
2281  if (N <= s_recursionLimit)
2282  s_pSqu[N/4](R, A);
2283  else
2284  {
2285  const size_t N2 = N/2;
2286 
2287  RecursiveSquare(R0, T2, A0, N2);
2288  RecursiveSquare(R2, T2, A1, N2);
2289  RecursiveMultiply(T0, T2, A0, A1, N2);
2290 
2291  int carry = Add(R1, R1, T0, N);
2292  carry += Add(R1, R1, T0, N);
2293  Increment(R3, N2, carry);
2294  }
2295 }
2296 
2297 // R[N] - bottom half of A*B
2298 // T[3*N/2] - temporary work space
2299 // A[N] - multiplier
2300 // B[N] - multiplicant
2301 
2302 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2303 {
2304  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2305 
2306  if (N <= s_recursionLimit)
2307  s_pBot[N/4](R, A, B);
2308  else
2309  {
2310  const size_t N2 = N/2;
2311 
2312  RecursiveMultiply(R, T, A0, B0, N2);
2313  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2314  Add(R1, R1, T0, N2);
2315  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2316  Add(R1, R1, T0, N2);
2317  }
2318 }
2319 
2320 // R[N] --- upper half of A*B
2321 // T[2*N] - temporary work space
2322 // L[N] --- lower half of A*B
2323 // A[N] --- multiplier
2324 // B[N] --- multiplicant
2325 
2326 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2327 {
2328  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2329 
2330  if (N <= s_recursionLimit)
2331  s_pTop[N/4](R, A, B, L[N-1]);
2332  else
2333  {
2334  const size_t N2 = N/2;
2335 
2336  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2337  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2338 
2339  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2340  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2341 
2342  RecursiveMultiply(T0, T2, R0, R1, N2);
2343  RecursiveMultiply(R0, T2, A1, B1, N2);
2344 
2345  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2346 
2347  int t, c3;
2348  int c2 = Subtract(T2, L+N2, L, N2);
2349 
2350  if (AN2 == BN2)
2351  {
2352  c2 -= Add(T2, T2, T0, N2);
2353  t = (Compare(T2, R0, N2) == -1);
2354  c3 = t - Subtract(T2, T2, T1, N2);
2355  }
2356  else
2357  {
2358  c2 += Subtract(T2, T2, T0, N2);
2359  t = (Compare(T2, R0, N2) == -1);
2360  c3 = t + Add(T2, T2, T1, N2);
2361  }
2362 
2363  c2 += t;
2364  if (c2 >= 0)
2365  c3 += Increment(T2, N2, c2);
2366  else
2367  c3 -= Decrement(T2, N2, -c2);
2368  c3 += Add(R0, T2, R1, N2);
2369 
2370  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2371  Increment(R1, N2, c3);
2372  }
2373 }
2374 
2375 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2376 {
2377  RecursiveMultiply(R, T, A, B, N);
2378 }
2379 
2380 inline void Square(word *R, word *T, const word *A, size_t N)
2381 {
2382  RecursiveSquare(R, T, A, N);
2383 }
2384 
2385 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2386 {
2387  RecursiveMultiplyBottom(R, T, A, B, N);
2388 }
2389 
2390 // R[NA+NB] - result = A*B
2391 // T[NA+NB] - temporary work space
2392 // A[NA] ---- multiplier
2393 // B[NB] ---- multiplicant
2394 
2395 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2396 {
2397  if (NA == NB)
2398  {
2399  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
2400  // The code change occurred at Commit dc99266599a0e72d.
2401  if (A != B)
2402  Multiply(R, T, A, B, NA);
2403  else
2404  Square(R, T, A, NA);
2405 
2406  return;
2407  }
2408 
2409  if (NA > NB)
2410  {
2411  std::swap(A, B);
2412  std::swap(NA, NB);
2413  }
2414 
2415  CRYPTOPP_ASSERT(NB % NA == 0);
2416 
2417  if (NA==2 && !A[1])
2418  {
2419  // Profiling tells us the original Default case was dominant, so it was promoted to the first Case statement.
2420  // The code change occurred at Commit dc99266599a0e72d.
2421  switch (A[0])
2422  {
2423  default:
2424  R[NB] = LinearMultiply(R, B, A[0], NB);
2425  R[NB+1] = 0;
2426  return;
2427  case 0:
2428  SetWords(R, 0, NB+2);
2429  return;
2430  case 1:
2431  CopyWords(R, B, NB);
2432  R[NB] = R[NB+1] = 0;
2433  return;
2434  }
2435  }
2436 
2437  size_t i;
2438  if ((NB/NA)%2 == 0)
2439  {
2440  Multiply(R, T, A, B, NA);
2441  CopyWords(T+2*NA, R+NA, NA);
2442 
2443  for (i=2*NA; i<NB; i+=2*NA)
2444  Multiply(T+NA+i, T, A, B+i, NA);
2445  for (i=NA; i<NB; i+=2*NA)
2446  Multiply(R+i, T, A, B+i, NA);
2447  }
2448  else
2449  {
2450  for (i=0; i<NB; i+=2*NA)
2451  Multiply(R+i, T, A, B+i, NA);
2452  for (i=NA; i<NB; i+=2*NA)
2453  Multiply(T+NA+i, T, A, B+i, NA);
2454  }
2455 
2456  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2457  Increment(R+NB, NA);
2458 }
2459 
2460 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2461 // T[3*N/2] - temporary work space
2462 // A[N] ----- an odd number as input
2463 
2464 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2465 {
2466  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
2467  // The code change occurred at Commit dc99266599a0e72d.
2468  if (N!=2)
2469  {
2470  const size_t N2 = N/2;
2471  RecursiveInverseModPower2(R0, T0, A0, N2);
2472  T0[0] = 1;
2473  SetWords(T0+1, 0, N2-1);
2474  MultiplyTop(R1, T1, T0, R0, A0, N2);
2475  MultiplyBottom(T0, T1, R0, A1, N2);
2476  Add(T0, R1, T0, N2);
2477  TwosComplement(T0, N2);
2478  MultiplyBottom(R1, T1, R0, T0, N2);
2479  }
2480  else
2481  {
2482  T[0] = AtomicInverseModPower2(A[0]);
2483  T[1] = 0;
2484  s_pBot[0](T+2, T, A);
2485  TwosComplement(T+2, 2);
2486  Increment(T+2, 2, 2);
2487  s_pBot[0](R, T, T+2);
2488  }
2489 }
2490 
2491 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2492 // T[3*N] - temporary work space
2493 // X[2*N] - number to be reduced
2494 // M[N] --- modulus
2495 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2496 
2497 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2498 {
2499 #if 1
2500  MultiplyBottom(R, T, X, U, N);
2501  MultiplyTop(T, T+N, X, R, M, N);
2502  word borrow = Subtract(T, X+N, T, N);
2503  // defend against timing attack by doing this Add even when not needed
2504  word carry = Add(T+N, T, M, N);
2505  CRYPTOPP_ASSERT(carry | !borrow);
2506  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2507  CopyWords(R, T + ((0-borrow) & N), N);
2508 #elif 0
2509  const word u = 0-U[0];
2510  Declare2Words(p)
2511  for (size_t i=0; i<N; i++)
2512  {
2513  const word t = u * X[i];
2514  word c = 0;
2515  for (size_t j=0; j<N; j+=2)
2516  {
2517  MultiplyWords(p, t, M[j]);
2518  Acc2WordsBy1(p, X[i+j]);
2519  Acc2WordsBy1(p, c);
2520  X[i+j] = LowWord(p);
2521  c = HighWord(p);
2522  MultiplyWords(p, t, M[j+1]);
2523  Acc2WordsBy1(p, X[i+j+1]);
2524  Acc2WordsBy1(p, c);
2525  X[i+j+1] = LowWord(p);
2526  c = HighWord(p);
2527  }
2528 
2529  if (Increment(X+N+i, N-i, c))
2530  while (!Subtract(X+N, X+N, M, N)) {}
2531  }
2532 
2533  memcpy(R, X+N, N*WORD_SIZE);
2534 #else
2535  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2536  for (size_t i=0; i<N; i++)
2537  {
2538  __m64 t = _mm_cvtsi32_si64(X[i]);
2539  t = _mm_mul_su32(t, u);
2540  __m64 c = _mm_setzero_si64();
2541  for (size_t j=0; j<N; j+=2)
2542  {
2543  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2544  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2545  c = _mm_add_si64(c, p);
2546  X[i+j] = _mm_cvtsi64_si32(c);
2547  c = _mm_srli_si64(c, 32);
2548  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2549  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2550  c = _mm_add_si64(c, p);
2551  X[i+j+1] = _mm_cvtsi64_si32(c);
2552  c = _mm_srli_si64(c, 32);
2553  }
2554 
2555  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2556  while (!Subtract(X+N, X+N, M, N)) {}
2557  }
2558 
2559  memcpy(R, X+N, N*WORD_SIZE);
2560  _mm_empty();
2561 #endif
2562 }
2563 
2564 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2565 // T[2*N] - temporary work space
2566 // X[2*N] - number to be reduced
2567 // M[N] --- modulus
2568 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2569 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2570 
2571 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2572 {
2573  CRYPTOPP_ASSERT(N%2==0 && N>=4);
2574 
2575 #define M0 M
2576 #define M1 (M+N2)
2577 #define V0 V
2578 #define V1 (V+N2)
2579 
2580 #define X0 X
2581 #define X1 (X+N2)
2582 #define X2 (X+N)
2583 #define X3 (X+N+N2)
2584 
2585  const size_t N2 = N/2;
2586  Multiply(T0, T2, V0, X3, N2);
2587  int c2 = Add(T0, T0, X0, N);
2588  MultiplyBottom(T3, T2, T0, U, N2);
2589  MultiplyTop(T2, R, T0, T3, M0, N2);
2590  c2 -= Subtract(T2, T1, T2, N2);
2591  Multiply(T0, R, T3, M1, N2);
2592  c2 -= Subtract(T0, T2, T0, N2);
2593  int c3 = -(int)Subtract(T1, X2, T1, N2);
2594  Multiply(R0, T2, V1, X3, N2);
2595  c3 += Add(R, R, T, N);
2596 
2597  if (c2>0)
2598  c3 += Increment(R1, N2);
2599  else if (c2<0)
2600  c3 -= Decrement(R1, N2, -c2);
2601 
2602  CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2603  if (c3>0)
2604  Subtract(R, R, M, N);
2605  else if (c3<0)
2606  Add(R, R, M, N);
2607 
2608 #undef M0
2609 #undef M1
2610 #undef V0
2611 #undef V1
2612 
2613 #undef X0
2614 #undef X1
2615 #undef X2
2616 #undef X3
2617 }
2618 
2619 #undef A0
2620 #undef A1
2621 #undef B0
2622 #undef B1
2623 
2624 #undef T0
2625 #undef T1
2626 #undef T2
2627 #undef T3
2628 
2629 #undef R0
2630 #undef R1
2631 #undef R2
2632 #undef R3
2633 
2634 /*
2635 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2636 static word SubatomicDivide(word *A, word B0, word B1)
2637 {
2638  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2639  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2640 
2641  // estimate the quotient: do a 2 word by 1 word divide
2642  word Q;
2643  if (B1+1 == 0)
2644  Q = A[2];
2645  else
2646  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2647 
2648  // now subtract Q*B from A
2649  DWord p = DWord::Multiply(B0, Q);
2650  DWord u = (DWord) A[0] - p.GetLowHalf();
2651  A[0] = u.GetLowHalf();
2652  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2653  A[1] = u.GetLowHalf();
2654  A[2] += u.GetHighHalf();
2655 
2656  // Q <= actual quotient, so fix it
2657  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2658  {
2659  u = (DWord) A[0] - B0;
2660  A[0] = u.GetLowHalf();
2661  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2662  A[1] = u.GetLowHalf();
2663  A[2] += u.GetHighHalf();
2664  Q++;
2665  CRYPTOPP_ASSERT(Q); // shouldn't overflow
2666  }
2667 
2668  return Q;
2669 }
2670 
2671 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2672 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2673 {
2674  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2675  {
2676  Q[0] = A[2];
2677  Q[1] = A[3];
2678  }
2679  else
2680  {
2681  word T[4];
2682  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2683  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2684  Q[0] = SubatomicDivide(T, B[0], B[1]);
2685 
2686 #if defined(CRYPTOPP_DEBUG)
2687  // multiply quotient and divisor and add remainder, make sure it equals dividend
2688  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2689  word P[4];
2690  LowLevel::Multiply2(P, Q, B);
2691  Add(P, P, T, 4);
2692  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2693 #endif
2694  }
2695 }
2696 */
2697 
2698 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2699 {
2700  word T[4];
2701  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2702  Q[0] = q.GetLowHalf();
2703  Q[1] = q.GetHighHalf();
2704 
2705 #if defined(CRYPTOPP_DEBUG)
2706  if (B[0] || B[1])
2707  {
2708  // multiply quotient and divisor and add remainder, make sure it equals dividend
2709  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2710  word P[4];
2711  s_pMul[0](P, Q, B);
2712  Add(P, P, T, 4);
2713  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2714  }
2715 #endif
2716 }
2717 
2718 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2719 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2720 {
2721  CRYPTOPP_ASSERT(N && N%2==0);
2722 
2723  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2724 
2725  word borrow = Subtract(R, R, T, N+2);
2726  CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2727  CRYPTOPP_UNUSED(borrow);
2728 
2729  while (R[N] || Compare(R, B, N) >= 0)
2730  {
2731  R[N] -= Subtract(R, R, B, N);
2732  Q[1] += (++Q[0]==0);
2733  CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2734  }
2735 }
2736 
2737 // R[NB] -------- remainder = A%B
2738 // Q[NA-NB+2] --- quotient = A/B
2739 // T[NA+3*(NB+2)] - temp work space
2740 // A[NA] -------- dividend
2741 // B[NB] -------- divisor
2742 
2743 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2744 {
2745  CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2746  CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2747  CRYPTOPP_ASSERT(NB <= NA);
2748 
2749  // set up temporary work space
2750  word *const TA=T;
2751  word *const TB=T+NA+2;
2752  word *const TP=T+NA+2+NB;
2753 
2754  // copy B into TB and normalize it so that TB has highest bit set to 1
2755  unsigned shiftWords = (B[NB-1]==0);
2756  TB[0] = TB[NB-1] = 0;
2757  CopyWords(TB+shiftWords, B, NB-shiftWords);
2758  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2759  CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2760  ShiftWordsLeftByBits(TB, NB, shiftBits);
2761 
2762  // copy A into TA and normalize it
2763  TA[0] = TA[NA] = TA[NA+1] = 0;
2764  CopyWords(TA+shiftWords, A, NA);
2765  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2766 
2767  if (TA[NA+1]==0 && TA[NA] <= 1)
2768  {
2769  Q[NA-NB+1] = Q[NA-NB] = 0;
2770  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2771  {
2772  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2773  ++Q[NA-NB];
2774  }
2775  }
2776  else
2777  {
2778  NA+=2;
2779  CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2780  }
2781 
2782  word BT[2];
2783  BT[0] = TB[NB-2] + 1;
2784  BT[1] = TB[NB-1] + (BT[0]==0);
2785 
2786  // start reducing TA mod TB, 2 words at a time
2787  for (size_t i=NA-2; i>=NB; i-=2)
2788  {
2789  AtomicDivide(Q+i-NB, TA+i-2, BT);
2790  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2791  }
2792 
2793  // copy TA into R, and denormalize it
2794  CopyWords(R, TA+shiftWords, NB);
2795  ShiftWordsRightByBits(R, NB, shiftBits);
2796 }
2797 
2798 static inline size_t EvenWordCount(const word *X, size_t N)
2799 {
2800  while (N && X[N-2]==0 && X[N-1]==0)
2801  N-=2;
2802  return N;
2803 }
2804 
2805 // return k
2806 // R[N] --- result = A^(-1) * 2^k mod M
2807 // T[4*N] - temporary work space
2808 // A[NA] -- number to take inverse of
2809 // M[N] --- modulus
2810 
2811 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2812 {
2813  CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2814 
2815  word *b = T;
2816  word *c = T+N;
2817  word *f = T+2*N;
2818  word *g = T+3*N;
2819  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2820  unsigned int k=0;
2821  bool s=false;
2822 
2823  SetWords(T, 0, 3*N);
2824  b[0]=1;
2825  CopyWords(f, A, NA);
2826  CopyWords(g, M, N);
2827 
2828  while (1)
2829  {
2830  word t=f[0];
2831  while (!t)
2832  {
2833  if (EvenWordCount(f, fgLen)==0)
2834  {
2835  SetWords(R, 0, N);
2836  return 0;
2837  }
2838 
2839  ShiftWordsRightByWords(f, fgLen, 1);
2840  bcLen += 2 * (c[bcLen-1] != 0);
2841  CRYPTOPP_ASSERT(bcLen <= N);
2842  ShiftWordsLeftByWords(c, bcLen, 1);
2843  k+=WORD_BITS;
2844  t=f[0];
2845  }
2846 
2847  // t must be non-0; otherwise undefined.
2848  unsigned int i = TrailingZeros(t);
2849  t >>= i;
2850  k += i;
2851 
2852  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2853  {
2854  if (s)
2855  Subtract(R, M, b, N);
2856  else
2857  CopyWords(R, b, N);
2858  return k;
2859  }
2860 
2861  ShiftWordsRightByBits(f, fgLen, i);
2862  t = ShiftWordsLeftByBits(c, bcLen, i);
2863  c[bcLen] += t;
2864  bcLen += 2 * (t!=0);
2865  CRYPTOPP_ASSERT(bcLen <= N);
2866 
2867  bool swap = Compare(f, g, fgLen)==-1;
2868  ConditionalSwapPointers(swap, f, g);
2869  ConditionalSwapPointers(swap, b, c);
2870  s ^= swap;
2871 
2872  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2873 
2874  Subtract(f, f, g, fgLen);
2875  t = Add(b, b, c, bcLen);
2876  b[bcLen] += t;
2877  bcLen += 2*t;
2878  CRYPTOPP_ASSERT(bcLen <= N);
2879  }
2880 }
2881 
2882 // R[N] - result = A/(2^k) mod M
2883 // A[N] - input
2884 // M[N] - modulus
2885 
2886 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2887 {
2888  CopyWords(R, A, N);
2889 
2890  while (k--)
2891  {
2892  if (R[0]%2==0)
2893  ShiftWordsRightByBits(R, N, 1);
2894  else
2895  {
2896  word carry = Add(R, R, M, N);
2897  ShiftWordsRightByBits(R, N, 1);
2898  R[N-1] += carry<<(WORD_BITS-1);
2899  }
2900  }
2901 }
2902 
2903 // R[N] - result = A*(2^k) mod M
2904 // A[N] - input
2905 // M[N] - modulus
2906 
2907 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2908 {
2909  CopyWords(R, A, N);
2910 
2911  while (k--)
2912  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2913  Subtract(R, R, M, N);
2914 }
2915 
2916 // ******************************************************************
2917 
2918 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2919 
2920 static inline size_t RoundupSize(size_t n)
2921 {
2922  if (n<=8)
2923  return RoundupSizeTable[n];
2924  else if (n<=16)
2925  return 16;
2926  else if (n<=32)
2927  return 32;
2928  else if (n<=64)
2929  return 64;
2930  else
2931  return size_t(1) << BitPrecision(n-1);
2932 }
2933 
2935  : reg(2), sign(POSITIVE)
2936 {
2937  reg[0] = reg[1] = 0;
2938 }
2939 
2941  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2942 {
2943  CopyWords(reg, t.reg, reg.size());
2944 }
2945 
2946 Integer::Integer(Sign s, lword value)
2947  : reg(2), sign(s)
2948 {
2949  reg[0] = word(value);
2950  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2951 }
2952 
2953 Integer::Integer(signed long value)
2954  : reg(2)
2955 {
2956  if (value >= 0)
2957  sign = POSITIVE;
2958  else
2959  {
2960  sign = NEGATIVE;
2961  value = -value;
2962  }
2963  reg[0] = word(value);
2964  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2965 }
2966 
2967 Integer::Integer(Sign s, word high, word low)
2968  : reg(2), sign(s)
2969 {
2970  reg[0] = low;
2971  reg[1] = high;
2972 }
2973 
2975 {
2976  if (ByteCount() > sizeof(long))
2977  return false;
2978 
2979  unsigned long value = (unsigned long)reg[0];
2980  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2981 
2982  if (sign==POSITIVE)
2983  return (signed long)value >= 0;
2984  else
2985  return -(signed long)value < 0;
2986 }
2987 
2988 signed long Integer::ConvertToLong() const
2989 {
2991 
2992  unsigned long value = (unsigned long)reg[0];
2993  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2994  return sign==POSITIVE ? value : -(signed long)value;
2995 }
2996 
2997 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2998 {
3000 
3001  if (o != LITTLE_ENDIAN_ORDER)
3002  {
3003  Decode(encodedInteger, byteCount, s);
3004  }
3005  else
3006  {
3007  SecByteBlock block(byteCount);
3008  encodedInteger.Get(block, block.size());
3009  std::reverse(block.begin(), block.begin()+block.size());
3010 
3011  Decode(block.begin(), block.size(), s);
3012  }
3013 }
3014 
3015 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3016 {
3017  CRYPTOPP_ASSERT(encodedInteger && byteCount); // NULL buffer
3019 
3020  if (o != LITTLE_ENDIAN_ORDER)
3021  {
3022  Decode(encodedInteger, byteCount, s);
3023  }
3024  else
3025  {
3026  SecByteBlock block(byteCount);
3027 #if (_MSC_FULL_VER >= 140050727)
3028  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
3029  stdext::make_checked_array_iterator(block.begin(), block.size()));
3030 #else
3031  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
3032 #endif
3033  Decode(block.begin(), block.size(), s);
3034  return;
3035  }
3036 }
3037 
3039 {
3040  // Make explicit call to avoid virtual-dispatch findings in ctor
3041  Integer::BERDecode(bt);
3042 }
3043 
3045 {
3046  Randomize(rng, bitcount);
3047 }
3048 
3049 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3050 {
3051  if (!Randomize(rng, min, max, rnType, equiv, mod))
3053 }
3054 
3056 {
3057  Integer r((word)0, BitsToWords(e+1));
3058  r.SetBit(e);
3059  return r;
3060 }
3061 
3063 {
3064  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3065 }
3066 
3068 {
3069  if (this != &t)
3070  {
3071  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3072  reg.New(RoundupSize(t.WordCount()));
3073  CopyWords(reg, t.reg, reg.size());
3074  sign = t.sign;
3075  }
3076  return *this;
3077 }
3078 
3079 bool Integer::GetBit(size_t n) const
3080 {
3081  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3082  // The code change occurred at Commit dc99266599a0e72d.
3083  if (n/WORD_BITS < reg.size())
3084  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3085  else
3086  return 0;
3087 }
3088 
3089 void Integer::SetBit(size_t n, bool value)
3090 {
3091  if (value)
3092  {
3093  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3094  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3095  }
3096  else
3097  {
3098  if (n/WORD_BITS < reg.size())
3099  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3100  }
3101 }
3102 
3103 byte Integer::GetByte(size_t n) const
3104 {
3105  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3106  // The code change occurred at Commit dc99266599a0e72d.
3107  if (n/WORD_SIZE < reg.size())
3108  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3109  else
3110  return 0;
3111 }
3112 
3113 void Integer::SetByte(size_t n, byte value)
3114 {
3115  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3116  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3117  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3118 }
3119 
3120 lword Integer::GetBits(size_t i, size_t n) const
3121 {
3122  lword v = 0;
3123  CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3124  for (unsigned int j=0; j<n; j++)
3125  v |= lword(GetBit(i+j)) << j;
3126  return v;
3127 }
3128 
3130 {
3131  Integer result(*this);
3132  result.Negate();
3133  return result;
3134 }
3135 
3137 {
3138  Integer result(*this);
3139  result.sign = POSITIVE;
3140  return result;
3141 }
3142 
3144 {
3145  reg.swap(a.reg);
3146  std::swap(sign, a.sign);
3147 }
3148 
3149 Integer::Integer(word value, size_t length)
3150  : reg(RoundupSize(length)), sign(POSITIVE)
3151 {
3152  reg[0] = value;
3153  SetWords(reg+1, 0, reg.size()-1);
3154 }
3155 
3156 template <class T>
3157 static Integer StringToInteger(const T *str, ByteOrder order)
3158 {
3159  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3160 
3161  int radix, sign = 1;
3162  // GCC workaround
3163  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3164  unsigned int length;
3165  for (length = 0; str[length] != 0; length++) {}
3166 
3167  Integer v;
3168 
3169  if (length == 0)
3170  return Integer::Zero();
3171 
3172  switch (str[length-1])
3173  {
3174  case 'h':
3175  case 'H':
3176  radix=16;
3177  break;
3178  case 'o':
3179  case 'O':
3180  radix=8;
3181  break;
3182  case 'b':
3183  case 'B':
3184  radix=2;
3185  break;
3186  default:
3187  radix=10;
3188  }
3189 
3190  // 'str' is of length 1 or more
3191  if (str[0] == '-')
3192  {
3193  sign = -1;
3194  str += 1, length -= 1;
3195  }
3196 
3197  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3198  {
3199  radix = 16;
3200  str += 2, length -= 2;
3201  }
3202 
3203  if (order == BIG_ENDIAN_ORDER)
3204  {
3205  for (unsigned int i=0; i<length; i++)
3206  {
3207  int digit, ch = static_cast<int>(str[i]);
3208 
3209  // Profiling showd the second and third Else needed to be swapped
3210  // The code change occurred at Commit dc99266599a0e72d.
3211  if (ch >= '0' && ch <= '9')
3212  digit = ch - '0';
3213  else if (ch >= 'a' && ch <= 'f')
3214  digit = ch - 'a' + 10;
3215  else if (ch >= 'A' && ch <= 'F')
3216  digit = ch - 'A' + 10;
3217  else
3218  digit = radix;
3219 
3220  if (digit < radix)
3221  {
3222  v *= radix;
3223  v += digit;
3224  }
3225  }
3226  }
3227  else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3228  {
3229  // Nibble high, low and count
3230  unsigned int nh = 0, nl = 0, nc = 0;
3231  Integer position(Integer::One());
3232 
3233  for (unsigned int i=0; i<length; i++)
3234  {
3235  int digit, ch = static_cast<int>(str[i]);
3236 
3237  if (ch >= '0' && ch <= '9')
3238  digit = ch - '0';
3239  else if (ch >= 'a' && ch <= 'f')
3240  digit = ch - 'a' + 10;
3241  else if (ch >= 'A' && ch <= 'F')
3242  digit = ch - 'A' + 10;
3243  else
3244  digit = radix;
3245 
3246  if (digit < radix)
3247  {
3248  if (nc++ == 0)
3249  nh = digit;
3250  else
3251  nl = digit;
3252 
3253  if (nc == 2)
3254  {
3255  v += position * (nh << 4 | nl);
3256  nc = 0, position <<= 8;
3257  }
3258  }
3259  }
3260 
3261  if (nc == 1)
3262  v += nh * position;
3263  }
3264  else // LITTLE_ENDIAN_ORDER && radix != 16
3265  {
3266  for (int i=static_cast<int>(length)-1; i>=0; i--)
3267  {
3268  int digit, ch = static_cast<int>(str[i]);
3269 
3270  if (ch >= '0' && ch <= '9')
3271  digit = ch - '0';
3272  else if (ch >= 'a' && ch <= 'f')
3273  digit = ch - 'a' + 10;
3274  else if (ch >= 'A' && ch <= 'F')
3275  digit = ch - 'A' + 10;
3276  else
3277  digit = radix;
3278 
3279  if (digit < radix)
3280  {
3281  v *= radix;
3282  v += digit;
3283  }
3284  }
3285  }
3286 
3287  if (sign == -1)
3288  v.Negate();
3289 
3290  return v;
3291 }
3292 
3293 Integer::Integer(const char *str, ByteOrder order)
3294  : reg(2), sign(POSITIVE)
3295 {
3296  *this = StringToInteger(str,order);
3297 }
3298 
3299 Integer::Integer(const wchar_t *str, ByteOrder order)
3300  : reg(2), sign(POSITIVE)
3301 {
3302  *this = StringToInteger(str,order);
3303 }
3304 
3305 unsigned int Integer::WordCount() const
3306 {
3307  return (unsigned int)CountWords(reg, reg.size());
3308 }
3309 
3310 unsigned int Integer::ByteCount() const
3311 {
3312  unsigned wordCount = WordCount();
3313  if (wordCount)
3314  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3315  else
3316  return 0;
3317 }
3318 
3319 unsigned int Integer::BitCount() const
3320 {
3321  unsigned wordCount = WordCount();
3322  if (wordCount)
3323  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3324  else
3325  return 0;
3326 }
3327 
3328 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3329 {
3330  CRYPTOPP_ASSERT(input && inputLen); // NULL buffer
3331  StringStore store(input, inputLen);
3332  Decode(store, inputLen, s);
3333 }
3334 
3336 {
3337  CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3338  if (bt.MaxRetrievable() < inputLen)
3339  throw InvalidArgument("Integer: input length is too small");
3340 
3341  byte b;
3342  bt.Peek(b);
3343  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3344 
3345  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3346  {
3347  bt.Skip(1);
3348  inputLen--;
3349  bt.Peek(b);
3350  }
3351 
3352  reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
3353  for (size_t i=inputLen; i > 0; i--)
3354  {
3355  (void)bt.Get(b);
3356  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3357  }
3358 
3359  if (sign == NEGATIVE)
3360  {
3361  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3362  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3363  TwosComplement(reg, reg.size());
3364  }
3365 }
3366 
3367 size_t Integer::MinEncodedSize(Signedness signedness) const
3368 {
3369  // Profiling tells us the original second If was dominant, so it was promoted to the first If statement.
3370  // The code change occurred at Commit dc99266599a0e72d.
3371  unsigned int outputLen = STDMAX(1U, ByteCount());
3372  const bool pre = (signedness == UNSIGNED);
3373  if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3374  outputLen++;
3375  if (pre)
3376  return outputLen;
3377  if (IsNegative() && *this < -Power2(outputLen*8-1))
3378  outputLen++;
3379  return outputLen;
3380 }
3381 
3382 // PKCS12_PBKDF and other classes use undersized buffers
3383 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3384 {
3385  CRYPTOPP_ASSERT(output && outputLen); // NULL buffer
3386  ArraySink sink(output, outputLen);
3387  Encode(sink, outputLen, signedness);
3388 }
3389 
3390 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3391 {
3392  if (signedness == UNSIGNED || NotNegative())
3393  {
3394  for (size_t i=outputLen; i > 0; i--)
3395  bt.Put(GetByte(i-1));
3396  }
3397  else
3398  {
3399  // take two's complement of *this
3400  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3401  temp.Encode(bt, outputLen, UNSIGNED);
3402  }
3403 }
3404 
3406 {
3407  DERGeneralEncoder enc(bt, INTEGER);
3409  enc.MessageEnd();
3410 }
3411 
3412 void Integer::BERDecode(const byte *input, size_t len)
3413 {
3414  CRYPTOPP_ASSERT(input && len); // NULL buffer
3415  StringStore store(input, len);
3416  BERDecode(store);
3417 }
3418 
3420 {
3421  BERGeneralDecoder dec(bt, INTEGER);
3422  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3423  BERDecodeError();
3424  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3425  dec.MessageEnd();
3426 }
3427 
3429 {
3430  DERGeneralEncoder enc(bt, OCTET_STRING);
3431  Encode(enc, length);
3432  enc.MessageEnd();
3433 }
3434 
3436 {
3437  BERGeneralDecoder dec(bt, OCTET_STRING);
3438  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3439  BERDecodeError();
3440  Decode(dec, length);
3441  dec.MessageEnd();
3442 }
3443 
3444 size_t Integer::OpenPGPEncode(byte *output, size_t bufferSize) const
3445 {
3446  CRYPTOPP_ASSERT(output && bufferSize); // NULL buffer
3447  CRYPTOPP_ASSERT(bufferSize >= MinEncodedSize()); // Undersized buffer
3448  ArraySink sink(output, bufferSize);
3449  return OpenPGPEncode(sink);
3450 }
3451 
3453 {
3454  word16 bitCount = word16(BitCount());
3455  bt.PutWord16(bitCount);
3456  size_t byteCount = BitsToBytes(bitCount);
3457  Encode(bt, byteCount);
3458  return 2 + byteCount;
3459 }
3460 
3461 void Integer::OpenPGPDecode(const byte *input, size_t len)
3462 {
3463  CRYPTOPP_ASSERT(input && len); // NULL buffer
3464  StringStore store(input, len);
3465  OpenPGPDecode(store);
3466 }
3467 
3469 {
3470  word16 bitCount;
3471  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3472  throw OpenPGPDecodeErr();
3473  Decode(bt, BitsToBytes(bitCount));
3474 }
3475 
3477 {
3478  const size_t nbytes = nbits/8 + 1;
3479  SecByteBlock buf(nbytes);
3480  rng.GenerateBlock(buf, nbytes);
3481  if (nbytes)
3482  buf[0] = (byte)Crop(buf[0], nbits % 8);
3483  Decode(buf, nbytes, UNSIGNED);
3484 }
3485 
3486 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3487 {
3488  if (min > max)
3489  throw InvalidArgument("Integer: Min must be no greater than Max");
3490 
3491  Integer range = max - min;
3492  const unsigned int nbits = range.BitCount();
3493 
3494  do
3495  {
3496  Randomize(rng, nbits);
3497  }
3498  while (*this > range);
3499 
3500  *this += min;
3501 }
3502 
3503 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3504 {
3505  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3506 }
3507 
3509 {
3510 public:
3511  KDF2_RNG(const byte *seed, size_t seedSize)
3512  : m_counter(0), m_counterAndSeed(seedSize + 4)
3513  {
3514  memcpy(m_counterAndSeed + 4, seed, seedSize);
3515  }
3516 
3517  void GenerateBlock(byte *output, size_t size)
3518  {
3519  CRYPTOPP_ASSERT(output && size); // NULL buffer
3520  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3521  ++m_counter;
3522  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULLPTR, 0);
3523  }
3524 
3525 private:
3526  word32 m_counter;
3527  SecByteBlock m_counterAndSeed;
3528 };
3529 
3531 {
3532  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3533  Integer max;
3534  if (!params.GetValue("Max", max))
3535  {
3536  int bitLength;
3537  if (params.GetIntValue("BitLength", bitLength))
3538  max = Integer::Power2(bitLength);
3539  else
3540  throw InvalidArgument("Integer: missing Max argument");
3541  }
3542  if (min > max)
3543  throw InvalidArgument("Integer: Min must be no greater than Max");
3544 
3545  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3546  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3547 
3548  if (equiv.IsNegative() || equiv >= mod)
3549  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3550 
3551  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3552 
3553  member_ptr<KDF2_RNG> kdf2Rng;
3555  if (params.GetValue(Name::Seed(), seed))
3556  {
3557  ByteQueue bq;
3558  DERSequenceEncoder seq(bq);
3559  min.DEREncode(seq);
3560  max.DEREncode(seq);
3561  equiv.DEREncode(seq);
3562  mod.DEREncode(seq);
3563  DEREncodeUnsigned(seq, rnType);
3564  DEREncodeOctetString(seq, seed.begin(), seed.size());
3565  seq.MessageEnd();
3566 
3567  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3568  bq.Get(finalSeed, finalSeed.size());
3569  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3570  }
3571  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3572 
3573  switch (rnType)
3574  {
3575  case ANY:
3576  if (mod == One())
3577  Randomize(rng, min, max);
3578  else
3579  {
3580  Integer min1 = min + (equiv-min)%mod;
3581  if (max < min1)
3582  return false;
3583  Randomize(rng, Zero(), (max - min1) / mod);
3584  *this *= mod;
3585  *this += min1;
3586  }
3587  return true;
3588 
3589  case PRIME:
3590  {
3591  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULLPTR);
3592 
3593  int i;
3594  i = 0;
3595  while (1)
3596  {
3597  if (++i==16)
3598  {
3599  // check if there are any suitable primes in [min, max]
3600  Integer first = min;
3601  if (FirstPrime(first, max, equiv, mod, pSelector))
3602  {
3603  // if there is only one suitable prime, we're done
3604  *this = first;
3605  if (!FirstPrime(first, max, equiv, mod, pSelector))
3606  return true;
3607  }
3608  else
3609  return false;
3610  }
3611 
3612  Randomize(rng, min, max);
3613  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3614  return true;
3615  }
3616  }
3617 
3618  default:
3619  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3620  }
3621 }
3622 
3623 std::istream& operator>>(std::istream& in, Integer &a)
3624 {
3625  char c;
3626  unsigned int length = 0;
3627  SecBlock<char> str(length + 16);
3628 
3629  std::ws(in);
3630 
3631  do
3632  {
3633  in.read(&c, 1);
3634  str[length++] = c;
3635  if (length >= str.size())
3636  str.Grow(length + 16);
3637  }
3638  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3639 
3640  if (in.gcount())
3641  in.putback(c);
3642  str[length-1] = '\0';
3643  a = Integer(str);
3644 
3645  return in;
3646 }
3647 
3648 // Ensure base 10 is default
3649 inline int FlagToBase(long f) {
3650  return f == std::ios::hex ? 16 : (f == std::ios::oct ? 8 : 10);
3651 }
3652 
3653 inline char FlagToSuffix(long f) {
3654  return f == std::ios::hex ? 'h' : (f == std::ios::oct ? 'o' : '.');
3655 }
3656 
3657 // Ensure base 10 is default
3658 std::ostream& operator<<(std::ostream& out, const Integer &a)
3659 {
3660  // Get relevant conversion specifications from ostream.
3661  const long f = out.flags() & std::ios::basefield;
3662  const int base = FlagToBase(f);
3663  const char suffix = FlagToSuffix(f);
3664 
3665  Integer temp1=a, temp2;
3666  if (a.IsNegative())
3667  {
3668  out << '-';
3669  temp1.Negate();
3670  }
3671 
3672  if (!a)
3673  out << '0';
3674 
3675  static const char upper[]="0123456789ABCDEF";
3676  static const char lower[]="0123456789abcdef";
3677 
3678  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3679  unsigned int i=0;
3680  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3681 
3682  while (!!temp1)
3683  {
3684  word digit;
3685  Integer::Divide(digit, temp2, temp1, base);
3686  s[i++]=vec[digit];
3687  temp1.swap(temp2);
3688  }
3689 
3690  while (i--)
3691  {
3692  out << s[i];
3693  }
3694 
3695 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3696  if (out.flags() & std::ios_base::showbase)
3697  out << suffix;
3698 
3699  return out;
3700 #else
3701  return out << suffix;
3702 #endif
3703 }
3704 
3706 {
3707  if (NotNegative())
3708  {
3709  if (Increment(reg, reg.size()))
3710  {
3711  reg.CleanGrow(2*reg.size());
3712  reg[reg.size()/2]=1;
3713  }
3714  }
3715  else
3716  {
3717  word borrow = Decrement(reg, reg.size());
3718  CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3719 
3720  if (WordCount()==0)
3721  *this = Zero();
3722  }
3723  return *this;
3724 }
3725 
3727 {
3728  if (IsNegative())
3729  {
3730  if (Increment(reg, reg.size()))
3731  {
3732  reg.CleanGrow(2*reg.size());
3733  reg[reg.size()/2]=1;
3734  }
3735  }
3736  else
3737  {
3738  if (Decrement(reg, reg.size()))
3739  *this = -One();
3740  }
3741  return *this;
3742 }
3743 
3744 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3745 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3747 {
3748  if (this == &t)
3749  {
3750  return AbsoluteValue();
3751  }
3752  else if (reg.size() >= t.reg.size())
3753  {
3754  Integer result(t);
3755  AndWords(result.reg, reg, t.reg.size());
3756 
3757  result.sign = POSITIVE;
3758  return result;
3759  }
3760  else // reg.size() < t.reg.size()
3761  {
3762  Integer result(*this);
3763  AndWords(result.reg, t.reg, reg.size());
3764 
3765  result.sign = POSITIVE;
3766  return result;
3767  }
3768 }
3769 
3770 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3771 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3772 Integer Integer::Or(const Integer& t) const
3773 {
3774  if (this == &t)
3775  {
3776  return AbsoluteValue();
3777  }
3778  else if (reg.size() >= t.reg.size())
3779  {
3780  Integer result(*this);
3781  OrWords(result.reg, t.reg, t.reg.size());
3782 
3783  result.sign = POSITIVE;
3784  return result;
3785  }
3786  else // reg.size() < t.reg.size()
3787  {
3788  Integer result(t);
3789  OrWords(result.reg, reg, reg.size());
3790 
3791  result.sign = POSITIVE;
3792  return result;
3793  }
3794 }
3795 
3796 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3797 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3799 {
3800  if (this == &t)
3801  {
3802  return Integer::Zero();
3803  }
3804  else if (reg.size() >= t.reg.size())
3805  {
3806  Integer result(*this);
3807  XorWords(result.reg, t.reg, t.reg.size());
3808 
3809  result.sign = POSITIVE;
3810  return result;
3811  }
3812  else // reg.size() < t.reg.size()
3813  {
3814  Integer result(t);
3815  XorWords(result.reg, reg, reg.size());
3816 
3817  result.sign = POSITIVE;
3818  return result;
3819  }
3820 }
3821 
3822 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3823 {
3824  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3825  // The code change occurred at Commit dc99266599a0e72d.
3826  int carry; const bool pre = (a.reg.size() == b.reg.size());
3827  if (!pre && a.reg.size() > b.reg.size())
3828  {
3829  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3830  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3831  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3832  }
3833  else if (pre)
3834  {
3835  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3836  }
3837  else
3838  {
3839  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3840  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3841  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3842  }
3843 
3844  if (carry)
3845  {
3846  sum.reg.CleanGrow(2*sum.reg.size());
3847  sum.reg[sum.reg.size()/2] = 1;
3848  }
3849  sum.sign = Integer::POSITIVE;
3850 }
3851 
3852 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3853 {
3854  unsigned aSize = a.WordCount();
3855  aSize += aSize%2;
3856  unsigned bSize = b.WordCount();
3857  bSize += bSize%2;
3858 
3859  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3860  // The code change occurred at Commit dc99266599a0e72d.
3861  if (aSize > bSize)
3862  {
3863  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3864  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3865  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3866  CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3867  diff.sign = Integer::POSITIVE;
3868  }
3869  else if (aSize == bSize)
3870  {
3871  if (Compare(a.reg, b.reg, aSize) >= 0)
3872  {
3873  Subtract(diff.reg, a.reg, b.reg, aSize);
3874  diff.sign = Integer::POSITIVE;
3875  }
3876  else
3877  {
3878  Subtract(diff.reg, b.reg, a.reg, aSize);
3879  diff.sign = Integer::NEGATIVE;
3880  }
3881  }
3882  else
3883  {
3884  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3885  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3886  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3887  CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3888  diff.sign = Integer::NEGATIVE;
3889  }
3890 }
3891 
3892 // MSVC .NET 2003 workaround
3893 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3894 {
3895  return a < b ? b : a;
3896 }
3897 
3899 {
3900  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3901  if (NotNegative())
3902  {
3903  if (b.NotNegative())
3904  PositiveAdd(sum, *this, b);
3905  else
3906  PositiveSubtract(sum, *this, b);
3907  }
3908  else
3909  {
3910  if (b.NotNegative())
3911  PositiveSubtract(sum, b, *this);
3912  else
3913  {
3914  PositiveAdd(sum, *this, b);
3915  sum.sign = Integer::NEGATIVE;
3916  }
3917  }
3918  return sum;
3919 }
3920 
3922 {
3923  reg.CleanGrow(t.reg.size());
3924  if (NotNegative())
3925  {
3926  if (t.NotNegative())
3927  PositiveAdd(*this, *this, t);
3928  else
3929  PositiveSubtract(*this, *this, t);
3930  }
3931  else
3932  {
3933  if (t.NotNegative())
3934  PositiveSubtract(*this, t, *this);
3935  else
3936  {
3937  PositiveAdd(*this, *this, t);
3938  sign = Integer::NEGATIVE;
3939  }
3940  }
3941  return *this;
3942 }
3943 
3945 {
3946  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3947  if (NotNegative())
3948  {
3949  if (b.NotNegative())
3950  PositiveSubtract(diff, *this, b);
3951  else
3952  PositiveAdd(diff, *this, b);
3953  }
3954  else
3955  {
3956  if (b.NotNegative())
3957  {
3958  PositiveAdd(diff, *this, b);
3959  diff.sign = Integer::NEGATIVE;
3960  }
3961  else
3962  PositiveSubtract(diff, b, *this);
3963  }
3964  return diff;
3965 }
3966 
3968 {
3969  reg.CleanGrow(t.reg.size());
3970  if (NotNegative())
3971  {
3972  if (t.NotNegative())
3973  PositiveSubtract(*this, *this, t);
3974  else
3975  PositiveAdd(*this, *this, t);
3976  }
3977  else
3978  {
3979  if (t.NotNegative())
3980  {
3981  PositiveAdd(*this, *this, t);
3982  sign = Integer::NEGATIVE;
3983  }
3984  else
3985  PositiveSubtract(*this, t, *this);
3986  }
3987  return *this;
3988 }
3989 
3991 {
3992  const size_t wordCount = WordCount();
3993  const size_t shiftWords = n / WORD_BITS;
3994  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3995 
3996  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3997  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3998  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3999  return *this;
4000 }
4001 
4003 {
4004  const size_t wordCount = WordCount();
4005  const size_t shiftWords = n / WORD_BITS;
4006  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4007 
4008  ShiftWordsRightByWords(reg, wordCount, shiftWords);
4009  if (wordCount > shiftWords)
4010  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
4011  if (IsNegative() && WordCount()==0) // avoid -0
4012  *this = Zero();
4013  return *this;
4014 }
4015 
4017 {
4018  if (this != &t)
4019  {
4020  const size_t size = STDMIN(reg.size(), t.reg.size());
4021  reg.resize(size);
4022  AndWords(reg, t.reg, size);
4023  }
4024  sign = POSITIVE;
4025  return *this;
4026 }
4027 
4029 {
4030  if (this != &t)
4031  {
4032  if (reg.size() >= t.reg.size())
4033  {
4034  OrWords(reg, t.reg, t.reg.size());
4035  }
4036  else // reg.size() < t.reg.size()
4037  {
4038  const size_t head = reg.size();
4039  const size_t tail = t.reg.size() - reg.size();
4040  reg.resize(head+tail);
4041  OrWords(reg, t.reg, head);
4042  CopyWords(reg+head,t.reg+head,tail);
4043  }
4044  }
4045  sign = POSITIVE;
4046  return *this;
4047 }
4048 
4050 {
4051  if (this == &t)
4052  {
4053  *this = Zero();
4054  }
4055  else
4056  {
4057  if (reg.size() >= t.reg.size())
4058  {
4059  XorWords(reg, t.reg, t.reg.size());
4060  }
4061  else // reg.size() < t.reg.size()
4062  {
4063  const size_t head = reg.size();
4064  const size_t tail = t.reg.size() - reg.size();
4065  reg.resize(head+tail);
4066  XorWords(reg, t.reg, head);
4067  CopyWords(reg+head,t.reg+head,tail);
4068  }
4069  }
4070  sign = POSITIVE;
4071  return *this;
4072 }
4073 
4074 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
4075 {
4076  size_t aSize = RoundupSize(a.WordCount());
4077  size_t bSize = RoundupSize(b.WordCount());
4078 
4079  product.reg.CleanNew(RoundupSize(aSize+bSize));
4080  product.sign = Integer::POSITIVE;
4081 
4082  IntegerSecBlock workspace(aSize + bSize);
4083  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
4084 }
4085 
4086 void Multiply(Integer &product, const Integer &a, const Integer &b)
4087 {
4088  PositiveMultiply(product, a, b);
4089 
4090  if (a.NotNegative() != b.NotNegative())
4091  product.Negate();
4092 }
4093 
4095 {
4096  Integer product;
4097  Multiply(product, *this, b);
4098  return product;
4099 }
4100 
4101 /*
4102 void PositiveDivide(Integer &remainder, Integer &quotient,
4103  const Integer &dividend, const Integer &divisor)
4104 {
4105  remainder.reg.CleanNew(divisor.reg.size());
4106  remainder.sign = Integer::POSITIVE;
4107  quotient.reg.New(0);
4108  quotient.sign = Integer::POSITIVE;
4109  unsigned i=dividend.BitCount();
4110  while (i--)
4111  {
4112  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
4113  remainder.reg[0] |= dividend[i];
4114  if (overflow || remainder >= divisor)
4115  {
4116  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
4117  quotient.SetBit(i);
4118  }
4119  }
4120 }
4121 */
4122 
4123 void PositiveDivide(Integer &remainder, Integer &quotient,
4124  const Integer &a, const Integer &b)
4125 {
4126  unsigned aSize = a.WordCount();
4127  unsigned bSize = b.WordCount();
4128 
4129  if (!bSize)
4130  throw Integer::DivideByZero();
4131 
4132  if (aSize < bSize)
4133  {
4134  remainder = a;
4135  remainder.sign = Integer::POSITIVE;
4136  quotient = Integer::Zero();
4137  return;
4138  }
4139 
4140  aSize += aSize%2; // round up to next even number
4141  bSize += bSize%2;
4142 
4143  remainder.reg.CleanNew(RoundupSize(bSize));
4144  remainder.sign = Integer::POSITIVE;
4145  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4146  quotient.sign = Integer::POSITIVE;
4147 
4148  IntegerSecBlock T(aSize+3*(bSize+2));
4149  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4150 }
4151 
4152 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
4153 {
4154  PositiveDivide(remainder, quotient, dividend, divisor);
4155 
4156  if (dividend.IsNegative())
4157  {
4158  quotient.Negate();
4159  if (remainder.NotZero())
4160  {
4161  --quotient;
4162  remainder = divisor.AbsoluteValue() - remainder;
4163  }
4164  }
4165 
4166  if (divisor.IsNegative())
4167  quotient.Negate();
4168 }
4169 
4170 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4171 {
4172  q = a;
4173  q >>= n;
4174 
4175  const size_t wordCount = BitsToWords(n);
4176  if (wordCount <= a.WordCount())
4177  {
4178  r.reg.resize(RoundupSize(wordCount));
4179  CopyWords(r.reg, a.reg, wordCount);
4180  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4181  if (n % WORD_BITS != 0)
4182  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4183  }
4184  else
4185  {
4186  r.reg.resize(RoundupSize(a.WordCount()));
4187  CopyWords(r.reg, a.reg, r.reg.size());
4188  }
4189  r.sign = POSITIVE;
4190 
4191  if (a.IsNegative() && r.NotZero())
4192  {
4193  --q;
4194  r = Power2(n) - r;
4195  }
4196 }
4197 
4199 {
4200  Integer remainder, quotient;
4201  Integer::Divide(remainder, quotient, *this, b);
4202  return quotient;
4203 }
4204 
4206 {
4207  Integer remainder, quotient;
4208  Integer::Divide(remainder, quotient, *this, b);
4209  return remainder;
4210 }
4211 
4212 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4213 {
4214  if (!divisor)
4215  throw Integer::DivideByZero();
4216 
4217  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4218  {
4219  quotient = dividend >> (BitPrecision(divisor)-1);
4220  remainder = dividend.reg[0] & (divisor-1);
4221  return;
4222  }
4223 
4224  unsigned int i = dividend.WordCount();
4225  quotient.reg.CleanNew(RoundupSize(i));
4226  remainder = 0;
4227  while (i--)
4228  {
4229  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4230  remainder = DWord(dividend.reg[i], remainder) % divisor;
4231  }
4232 
4233  if (dividend.NotNegative())
4234  quotient.sign = POSITIVE;
4235  else
4236  {
4237  quotient.sign = NEGATIVE;
4238  if (remainder)
4239  {
4240  --quotient;
4241  remainder = divisor - remainder;
4242  }
4243  }
4244 }
4245 
4247 {
4248  word remainder;
4249  Integer quotient;
4250  Integer::Divide(remainder, quotient, *this, b);
4251  return quotient;
4252 }
4253 
4254 word Integer::Modulo(word divisor) const
4255 {
4256  if (!divisor)
4257  throw Integer::DivideByZero();
4258 
4259  word remainder;
4260 
4261  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4262  // The code change occurred at Commit dc99266599a0e72d.
4263  if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2
4264  {
4265  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4266  // The code change occurred at Commit dc99266599a0e72d.
4267  unsigned int i = WordCount();
4268  if (divisor > 5)
4269  {
4270  remainder = 0;
4271  while (i--)
4272  remainder = DWord(reg[i], remainder) % divisor;
4273  }
4274  else
4275  {
4276  DWord sum(0, 0);
4277  while (i--)
4278  sum += reg[i];
4279  remainder = sum % divisor;
4280  }
4281  }
4282  else // divisor is a power of 2
4283  {
4284  remainder = reg[0] & (divisor-1);
4285  }
4286 
4287  if (IsNegative() && remainder)
4288  remainder = divisor - remainder;
4289 
4290  return remainder;
4291 }
4292 
4294 {
4295  if (!!(*this)) // don't flip sign if *this==0
4296  sign = Sign(1-sign);
4297 }
4298 
4299 int Integer::PositiveCompare(const Integer& t) const
4300 {
4301  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4302  // The code change occurred at Commit dc99266599a0e72d.
4303  const unsigned size = WordCount(), tSize = t.WordCount();
4304  if (size != tSize)
4305  return size > tSize ? 1 : -1;
4306  else
4307  return CryptoPP::Compare(reg, t.reg, size);
4308 }
4309 
4310 int Integer::Compare(const Integer& t) const
4311 {
4312  if (NotNegative())
4313  {
4314  if (t.NotNegative())
4315  return PositiveCompare(t);
4316  else
4317  return 1;
4318  }
4319  else
4320  {
4321  if (t.NotNegative())
4322  return -1;
4323  else
4324  return -PositiveCompare(t);
4325  }
4326 }
4327 
4329 {
4330  if (!IsPositive())
4331  return Zero();
4332 
4333  // overestimate square root
4334  Integer x, y = Power2((BitCount()+1)/2);
4335  CRYPTOPP_ASSERT(y*y >= *this);
4336 
4337  do
4338  {
4339  x = y;
4340  y = (x + *this/x) >> 1;
4341  } while (y<x);
4342 
4343  return x;
4344 }
4345 
4346 bool Integer::IsSquare() const
4347 {
4348  Integer r = SquareRoot();
4349  return *this == r.Squared();
4350 }
4351 
4352 bool Integer::IsUnit() const
4353 {
4354  return (WordCount() == 1) && (reg[0] == 1);
4355 }
4356 
4358 {
4359  return IsUnit() ? *this : Zero();
4360 }
4361 
4362 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4363 {
4364  CRYPTOPP_ASSERT(m != 0);
4365  if (m == 0)
4366  throw Integer::DivideByZero();
4367 
4368  return x*y%m;
4369 }
4370 
4371 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4372 {
4373  CRYPTOPP_ASSERT(m != 0);
4374  if (m == 0)
4375  throw Integer::DivideByZero();
4376 
4377  ModularArithmetic mr(m);
4378  return mr.Exponentiate(x, e);
4379 }
4380 
4382 {
4383  return EuclideanDomainOf<Integer>().Gcd(a, b);
4384 }
4385 
4387 {
4389  CRYPTOPP_ASSERT(m.NotZero());
4390 
4391  if (IsNegative())
4392  return Modulo(m).InverseModNext(m);
4393 
4394  // http://github.com/weidai11/cryptopp/issues/602
4395  if (*this >= m)
4396  return Modulo(m).InverseModNext(m);
4397 
4398  return InverseModNext(m);
4399 }
4400 
4401 Integer Integer::InverseModNext(const Integer &m) const
4402 {
4404  CRYPTOPP_ASSERT(m.NotZero());
4405 
4406  if (m.IsEven())
4407  {
4408  if (!m || IsEven())
4409  return Zero(); // no inverse
4410  if (*this == One())
4411  return One();
4412 
4413  Integer u = m.Modulo(*this).InverseModNext(*this);
4414  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4415  }
4416 
4417  // AlmostInverse requires a 4x workspace
4418  IntegerSecBlock T(m.reg.size() * 4);
4419  Integer r((word)0, m.reg.size());
4420  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4421  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4422  return r;
4423 }
4424 
4425 word Integer::InverseMod(word mod) const
4426 {
4427  CRYPTOPP_ASSERT(mod != 0);
4428 
4429  word g0 = mod, g1 = *this % mod;
4430  word v0 = 0, v1 = 1;
4431  word y;
4432 
4433  while (g1)
4434  {
4435  if (g1 == 1)
4436  return v1;
4437  y = g0 / g1;
4438  g0 = g0 % g1;
4439  v0 += y * v1;
4440 
4441  if (!g0)
4442  break;
4443  if (g0 == 1)
4444  return mod-v0;
4445  y = g1 / g0;
4446  g1 = g1 % g0;
4447  v1 += y * v0;
4448  }
4449  return 0;
4450 }
4451 
4452 // ********************************************************
4453 
4455 {
4456  BERSequenceDecoder seq(bt);
4457  OID oid(seq);
4458  if (oid != ASN1::prime_field())
4459  BERDecodeError();
4460  m_modulus.BERDecode(seq);
4461  seq.MessageEnd();
4462  m_result.reg.resize(m_modulus.reg.size());
4463 }
4464 
4466 {
4467  DERSequenceEncoder seq(bt);
4468  ASN1::prime_field().DEREncode(seq);
4469  m_modulus.DEREncode(seq);
4470  seq.MessageEnd();
4471 }
4472 
4474 {
4475  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4476 }
4477 
4479 {
4480  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4481 }
4482 
4484 {
4485  if (a.reg.size()==m_modulus.reg.size())
4486  {
4487  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4488  return m_result;
4489  }
4490  else
4491  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4492 }
4493 
4494 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4495 {
4496  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4497  {
4498  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4499  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4500  {
4501  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4502  }
4503  return m_result;
4504  }
4505  else
4506  {
4507  m_result1 = a+b;
4508  if (m_result1 >= m_modulus)
4509  m_result1 -= m_modulus;
4510  return m_result1;
4511  }
4512 }
4513 
4515 {
4516  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4517  {
4518  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4519  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4520  {
4521  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4522  }
4523  }
4524  else
4525  {
4526  a+=b;
4527  if (a>=m_modulus)
4528  a-=m_modulus;
4529  }
4530 
4531  return a;
4532 }
4533 
4534 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4535 {
4536  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4537  {
4538  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4539  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4540  return m_result;
4541  }
4542  else
4543  {
4544  m_result1 = a-b;
4545  if (m_result1.IsNegative())
4546  m_result1 += m_modulus;
4547  return m_result1;
4548  }
4549 }
4550 
4552 {
4553  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4554  {
4555  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4556  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4557  }
4558  else
4559  {
4560  a-=b;
4561  if (a.IsNegative())
4562  a+=m_modulus;
4563  }
4564 
4565  return a;
4566 }
4567 
4569 {
4570  if (!a)
4571  return a;
4572 
4573  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4574  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4575  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4576 
4577  return m_result;
4578 }
4579 
4580 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4581 {
4582  if (m_modulus.IsOdd())
4583  {
4584  MontgomeryRepresentation dr(m_modulus);
4585  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4586  }
4587  else
4588  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4589 }
4590 
4591 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4592 {
4593  if (m_modulus.IsOdd())
4594  {
4595  MontgomeryRepresentation dr(m_modulus);
4596  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4597  for (unsigned int i=0; i<exponentsCount; i++)
4598  results[i] = dr.ConvertOut(results[i]);
4599  }
4600  else
4601  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4602 }
4603 
4605  : ModularArithmetic(m),
4606  m_u((word)0, m_modulus.reg.size()),
4607  m_workspace(5*m_modulus.reg.size())
4608 {
4609  if (!m_modulus.IsOdd())
4610  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4611 
4612  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4613 }
4614 
4616 {
4617  word *const T = m_workspace.begin();
4618  word *const R = m_result.reg.begin();
4619  const size_t N = m_modulus.reg.size();
4620  CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4621 
4622  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4623  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4624  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4625  return m_result;
4626 }
4627 
4629 {
4630  word *const T = m_workspace.begin();
4631  word *const R = m_result.reg.begin();
4632  const size_t N = m_modulus.reg.size();
4633  CRYPTOPP_ASSERT(a.reg.size()<=N);
4634 
4635  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4636  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4637  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4638  return m_result;
4639 }
4640 
4642 {
4643  word *const T = m_workspace.begin();
4644  word *const R = m_result.reg.begin();
4645  const size_t N = m_modulus.reg.size();
4646  CRYPTOPP_ASSERT(a.reg.size()<=N);
4647 
4648  CopyWords(T, a.reg, a.reg.size());
4649  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4650  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4651  return m_result;
4652 }
4653 
4655 {
4656 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4657  word *const T = m_workspace.begin();
4658  word *const R = m_result.reg.begin();
4659  const size_t N = m_modulus.reg.size();
4660  CRYPTOPP_ASSERT(a.reg.size()<=N);
4661 
4662  CopyWords(T, a.reg, a.reg.size());
4663  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4664  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4665  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4666 
4667 // cout << "k=" << k << " N*32=" << 32*N << endl;
4668 
4669  if (k>N*WORD_BITS)
4670  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4671  else
4672  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4673 
4674  return m_result;
4675 }
4676 
4677 // Specialization declared in misc.h to allow us to print integers
4678 // with additional control options, like arbirary bases and uppercase.
4679 template <> CRYPTOPP_DLL
4680 std::string IntToString<Integer>(Integer value, unsigned int base)
4681 {
4682  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4683  static const unsigned int BIT_32 = (1U << 31);
4684  const bool UPPER = !!(base & BIT_32);
4685  static const unsigned int BIT_31 = (1U << 30);
4686  const bool BASE = !!(base & BIT_31);
4687 
4688  const char CH = UPPER ? 'A' : 'a';
4689  base &= ~(BIT_32|BIT_31);
4690  CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4691 
4692  if (value == 0)
4693  return "0";
4694 
4695  bool negative = false, zero = false;
4696  if (value.IsNegative())
4697  {
4698  negative = true;
4699  value.Negate();
4700  }
4701 
4702  if (!value)
4703  zero = true;
4704 
4705  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4706  Integer temp;
4707 
4708  unsigned int i=0;
4709  while (!!value)
4710  {
4711  word digit;
4712  Integer::Divide(digit, temp, value, word(base));
4713  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4714  value.swap(temp);
4715  }
4716 
4717  std::string result;
4718  result.reserve(i+2);
4719 
4720  if (negative)
4721  result += '-';
4722 
4723  if (zero)
4724  result += '0';
4725 
4726  while (i--)
4727  result += s[i];
4728 
4729  if (BASE)
4730  {
4731  if (base == 10)
4732  result += '.';
4733  else if (base == 16)
4734  result += 'h';
4735  else if (base == 8)
4736  result += 'o';
4737  else if (base == 2)
4738  result += 'b';
4739  }
4740 
4741  return result;
4742 }
4743 
4744 // Specialization declared in misc.h to avoid Coverity findings.
4745 template <> CRYPTOPP_DLL
4746 std::string IntToString<word64>(word64 value, unsigned int base)
4747 {
4748  // Hack... set the high bit for uppercase.
4749  static const unsigned int HIGH_BIT = (1U << 31);
4750  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4751  base &= ~HIGH_BIT;
4752 
4753  CRYPTOPP_ASSERT(base >= 2);
4754  if (value == 0)
4755  return "0";
4756 
4757  std::string result;
4758  while (value > 0)
4759  {
4760  word64 digit = value % base;
4761  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4762  value /= base;
4763  }
4764  return result;
4765 }
4766 
4767 #ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER
4768 // Allow the linker to discard Integer code if not needed.
4769 // Also see http://github.com/weidai11/cryptopp/issues/389.
4770 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
4771 {
4772  if (valueType != typeid(Integer))
4773  return false;
4774  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
4775  return true;
4776 }
4777 #endif // CRYPTOPP_NO_ASSIGN_TO_INTEGER
4778 
4779 // *************************** C++ Static Initialization ***************************
4780 
4782 {
4783 public:
4784  InitInteger()
4785  {
4786  SetFunctionPointers();
4787  }
4788 };
4789 
4790 // This is not really needed because each Integer can dynamically initialize itself,
4791 // but we take a peephole optimization and initialize the class once if init priorities are
4792 // available. Dynamic initialization will be used if init priorities are not available.
4793 
4794 #if HAVE_GCC_INIT_PRIORITY
4795  const InitInteger s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 10))) = InitInteger();
4796 #elif defined(HAVE_MSC_INIT_PRIORITY)
4797  #pragma warning(disable: 4075)
4798  #pragma init_seg(".CRT$XCU")
4799  const InitInteger s_init;
4800 # if defined(USE_MSC_INIT_PRIORITY)
4801  const Integer g_zero(0L);
4802  const Integer g_one(1L);
4803  const Integer g_two(2L);
4804 # endif
4805  #pragma warning(default: 4075)
4806 #else
4807  const InitInteger s_init;
4808 #endif
4809 
4810 // ***************** Library code ********************
4811 
4813 {
4814 #if defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
4815  static Integer s_zero(0L);
4816  return s_zero;
4817 #elif defined(HAVE_MSC_INIT_PRIORITY) && defined(USE_MSC_INIT_PRIORITY)
4818  return g_zero;
4819 #else
4820  return Singleton<Integer, NewInteger<0L> >().Ref();
4821 #endif
4822 }
4823 
4825 {
4826 #if defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
4827  static Integer s_one(1L);
4828  return s_one;
4829 #elif defined(HAVE_MSC_INIT_PRIORITY) && defined(USE_MSC_INIT_PRIORITY)
4830  return g_one;
4831 #else
4832  return Singleton<Integer, NewInteger<1L> >().Ref();
4833 #endif
4834 }
4835 
4837 {
4838 #if defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
4839  static Integer s_two(2L);
4840  return s_two;
4841 #elif defined(HAVE_MSC_INIT_PRIORITY) && defined(USE_MSC_INIT_PRIORITY)
4842  return g_two;
4843 #else
4844  return Singleton<Integer, NewInteger<2L> >().Ref();
4845 #endif
4846 }
4847 
4848 NAMESPACE_END
4849 
4850 #endif // CRYPTOPP_IMPORTS
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:20
An invalid argument was detected.
Definition: cryptlib.h:199
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3305
Integer MultiplicativeInverse() const
Calculate multiplicative inverse.
Definition: integer.cpp:4357
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4551
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:333
Classes for working with NameValuePairs.
Integer Or(const Integer &t) const
Bitwise OR.
Definition: integer.cpp:3772
Integer & operator|=(const Integer &t)
Bitwise OR Assignment.
Definition: integer.cpp:4028
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:805
Integer And(const Integer &t) const
Bitwise AND.
Definition: integer.cpp:3746
a number which is probabilistically prime
Definition: integer.h:95
Utility functions for the Crypto++ library.
Integer Plus(const Integer &b) const
Addition.
Definition: integer.cpp:3898
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:556
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:140
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:291
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:744
an unsigned value
Definition: integer.h:85
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:360
bool IsSquare() const
Determine whether this integer is a perfect square.
Definition: integer.cpp:4346
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3405
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:457
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:317
size_t size() const
Length of the memory block.
Definition: algparam.h:84
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:790
This file contains helper classes/functions for implementing public key algorithms.
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
Encode absolute value as big-endian octet string.
Definition: integer.cpp:3428
Integer & operator=(const Integer &t)
Assignment.
Definition: integer.cpp:3067
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:379
static Integer Gcd(const Integer &a, const Integer &n)
Calculate greatest common divisor.
Definition: integer.cpp:4381
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:795
Integer & operator+=(const Integer &t)
Addition Assignment.
Definition: integer.cpp:3921
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:810
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2295
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:671
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:777
Integer & operator--()
Pre-decrement.
Definition: integer.cpp:3726
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
Secure memory block with allocator and cleanup.
Definition: secblock.h:454
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4568
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2988
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4534
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3461
Signedness
Used when importing and exporting integers.
Definition: integer.h:83
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
Definition: modarith.h:232
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:206
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:336
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:33
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a MontgomeryRepresentation.
Definition: integer.cpp:4604
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
Extended Division.
Definition: integer.cpp:4170
Ring of congruence classes modulo n.
Definition: modarith.h:38
Interface for random number generators.
Definition: cryptlib.h:1330
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:800
Common C++ header files.
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3476
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4746
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:729
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3113
Integer InverseMod(const Integer &n) const
Calculate multiplicative inverse.
Definition: integer.cpp:4386
the value is negative
Definition: integer.h:77
SecBlock<byte> typedef.
Definition: secblock.h:822
BER Sequence Decoder.
Definition: asn.h:305
Interface for buffered transformations.
Definition: cryptlib.h:1545
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:342
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:339
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4494
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:4824
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4514
P1363 key derivation function.
Definition: pubkey.h:729
byte order is little-endian
Definition: cryptlib.h:142
Sign
Used internally to represent the integer.
Definition: integer.h:73
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:292
bool operator!() const
Negation.
Definition: integer.cpp:3062
Pointer that overloads operator ->
Definition: smartptr.h:36
bool IsUnit() const
Determine if 1 or -1.
Definition: integer.cpp:4352
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3310
Classes and functions for secure memory allocations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4473
Integer Modulo(const Integer &b) const
Remainder.
Definition: integer.cpp:4205
Copy input to a memory buffer.
Definition: filters.h:1132
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4478
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
Minimum number of bytes to encode this integer.
Definition: integer.cpp:3367
Integer DividedBy(const Integer &b) const
Division.
Definition: integer.cpp:4198
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4580
Integer Times(const Integer &b) const
Multiplication.
Definition: integer.cpp:4094
a number with no special properties
Definition: integer.h:93
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:80
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1567
Integer & operator++()
Pre-increment.
Definition: integer.cpp:3705
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:502
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3143
Integer()
Creates the zero integer.
Definition: integer.cpp:2934
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:719
Integer & operator<<=(size_t n)
Left-shift Assignment.
Definition: integer.cpp:3990
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:282
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4293
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4310
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:778
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3435
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:113
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1151
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:3055
Multiple precision integer with arithmetic operations.
Definition: integer.h:49
Precompiled header file.
virtual Element Exponentiate(const Element &a, const Integer &e) const
Raises a base to an exponent in the group.
Definition: algebra.cpp:316
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4615
a signed value
Definition: integer.h:87
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3444
const Integer & Half(const Integer &a) const
Divides an element by 2.
Definition: integer.cpp:4483
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:4836
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:579
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4628
Integer & operator^=(const Integer &t)
Bitwise XOR Assignment.
Definition: integer.cpp:4049
#define MEMORY_BARRIER
A memory barrier.
Definition: misc.h:255
RandomNumberType
Properties of a random integer.
Definition: integer.h:91
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:348
byte order is big-endian
Definition: cryptlib.h:144
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:507
String-based implementation of Store interface.
Definition: filters.h:1191
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
Extended Division.
Definition: integer.cpp:4152
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:49
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:69
Data structure used to store byte strings.
Definition: queue.h:18
Functions for CPU features and intrinsics.
virtual const Element & Gcd(const Element &a, const Element &b) const
Calculates the greatest common denominator in the ring.
Definition: algebra.cpp:56
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3089
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:799
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:536
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3319
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:42
Implementation of BufferedTransformation's attachment interface.
Classes and functions for number theoretic operations.
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3103
Exception thrown when division by 0 is encountered.
Definition: integer.h:55
DER Sequence Encoder.
Definition: asn.h:315
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3383
Integer Squared() const
Multiply this integer by itself.
Definition: integer.h:609
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:907
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:510
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:63
bool GenerateRandomNoThrow(RandomNumberGenerator &rng, const NameValuePairs &params=g_nullNameValuePairs)
Generate a random number.
Definition: integer.cpp:3530
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:287
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:114
Integer Minus(const Integer &b) const
Subtraction.
Definition: integer.cpp:3944
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3517
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3328
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:751
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:306
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4591
Multiple precision integer with arithmetic operations.
Integer Xor(const Integer &t) const
Bitwise XOR.
Definition: integer.cpp:3798
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:4812
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:518
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:759
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3412
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4680
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:533
Class file for performing modular arithmetic.
Crypto++ library namespace.
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:347
Integer & operator>>=(size_t n)
Right-shift Assignment.
Definition: integer.cpp:4002
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3079
Integer SquareRoot() const
Extract square root.
Definition: integer.cpp:4328
BER General Decoder.
Definition: asn.h:254
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:383
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2974
Object Identifier.
Definition: asn.h:166
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
Integer AbsoluteValue() const
Retrieve the absolute value of this integer.
Definition: integer.cpp:3136
Integer & operator&=(const Integer &t)
Bitwise AND Assignment.
Definition: integer.cpp:4016
Integer & operator-=(const Integer &t)
Subtraction Assignment.
Definition: integer.cpp:3967
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:694
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:561
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4465
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3120
Integer operator-() const
Subtraction.
Definition: integer.cpp:3129
the value is positive or 0
Definition: integer.h:75
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4654
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:309
bool IsOdd() const
Determines if the Integer is odd parity.
Definition: integer.h:351
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
Interface for retrieving values given their names.
Definition: cryptlib.h:290
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4641