84#ifndef CRYPTOPP_IMPORTS
85#ifndef CRYPTOPP_GENERATE_X64_MASM
95# ifndef CRYPTOPP_DEBUG
96# pragma optimize("", off)
97# pragma optimize("ts", on)
104#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE))
105# define CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 1
109#define M128I_CAST(x) ((__m128i *)(void *)(x))
110#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x))
112#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
113# if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
114namespace rdtable {CRYPTOPP_ALIGN_DATA(16)
word64 Te[256+2];}
115using namespace rdtable;
121# if defined(CRYPTOPP_X64_MASM_AVAILABLE)
123namespace rdtable {CRYPTOPP_ALIGN_DATA(16)
word64 Te[256+2];}
125CRYPTOPP_ALIGN_DATA(16) static
word32 Te[256*4];
126CRYPTOPP_ALIGN_DATA(16) static
word32 Td[256*4];
129static volatile bool s_TeFilled =
false, s_TdFilled =
false;
131ANONYMOUS_NAMESPACE_BEGIN
133#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
158static inline bool AliasedWithTable(
const byte *begin,
const byte *end)
160 ptrdiff_t s0 = uintptr_t(begin)%4096, s1 = uintptr_t(end)%4096;
161 ptrdiff_t t0 = uintptr_t(Te)%4096, t1 = (uintptr_t(Te)+
sizeof(Te))%4096;
163 return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1);
165 return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);
170 word32 subkeys[4*12], workspace[8];
171 const byte *inBlocks, *inXorBlocks, *outXorBlocks;
173 size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement;
174 size_t regSpill, lengthAndCounterFlag, keysBegin;
177const size_t s_aliasPageSize = 4096;
178const size_t s_aliasBlockSize = 256;
179const size_t s_sizeToAllocate = s_aliasPageSize + s_aliasBlockSize +
sizeof(Locals);
183ANONYMOUS_NAMESPACE_END
187#define QUARTER_ROUND(L, T, t, a, b, c, d) \
188 a ^= L(T, 3, byte(t)); t >>= 8;\
189 b ^= L(T, 2, byte(t)); t >>= 8;\
190 c ^= L(T, 1, byte(t)); t >>= 8;\
193#define QUARTER_ROUND_LE(t, a, b, c, d) \
194 tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
195 tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
196 tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
197 tempBlock[d] = ((byte *)(Te+t))[1];
199#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
200 #define QUARTER_ROUND_LD(t, a, b, c, d) \
201 tempBlock[a] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
202 tempBlock[b] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
203 tempBlock[c] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
204 tempBlock[d] = ((byte *)(Td+t))[GetNativeByteOrder()*7];
206 #define QUARTER_ROUND_LD(t, a, b, c, d) \
207 tempBlock[a] = Sd[byte(t)]; t >>= 8;\
208 tempBlock[b] = Sd[byte(t)]; t >>= 8;\
209 tempBlock[c] = Sd[byte(t)]; t >>= 8;\
210 tempBlock[d] = Sd[t];
213#define QUARTER_ROUND_E(t, a, b, c, d) QUARTER_ROUND(TL_M, Te, t, a, b, c, d)
214#define QUARTER_ROUND_D(t, a, b, c, d) QUARTER_ROUND(TL_M, Td, t, a, b, c, d)
216#if (CRYPTOPP_LITTLE_ENDIAN)
217 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, d, c, b, a)
218 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, d, c, b, a)
219 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
220 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (6-i)%4+1))
221 #define TL_M(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (i+3)%4+1))
223 #define TL_F(T, i, x) rotrFixed(T[x], (3-i)*8)
224 #define TL_M(T, i, x) T[i*256 + x]
227 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, a, b, c, d)
228 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, a, b, c, d)
229 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
230 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (4-i)%4))
233 #define TL_F(T, i, x) rotrFixed(T[x], i*8)
234 #define TL_M(T, i, x) T[i*256 + x]
239#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
240#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
241#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
243#define f3(x) (f2(x) ^ x)
244#define f9(x) (f8(x) ^ x)
245#define fb(x) (f8(x) ^ f2(x) ^ x)
246#define fd(x) (f8(x) ^ f4(x) ^ x)
247#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
249unsigned int Rijndael::Base::OptimalDataAlignment()
const
251#if (CRYPTOPP_AESNI_AVAILABLE)
255#if (CRYPTOPP_ARM_AES_AVAILABLE)
259#if (CRYPTOGAMS_ARM_AES)
265#if (CRYPTOPP_POWER8_AES_AVAILABLE)
272void Rijndael::Base::FillEncTable()
274 for (
int i=0; i<256; i++)
277#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
279 Te[i] =
word64(y | f3(x))<<32 | y;
282 for (
int j=0; j<4; j++)
285 y = rotrConstant<8>(y);
289#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
290 Te[256] = Te[257] = 0;
295void Rijndael::Base::FillDecTable()
297 for (
int i=0; i<256; i++)
300#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
302 Td[i] =
word64(y | fb(x))<<32 | y | x;
305 for (
int j=0; j<4; j++)
308 y = rotrConstant<8>(y);
315#if (CRYPTOPP_AESNI_AVAILABLE)
316extern void Rijndael_UncheckedSetKey_SSE4_AESNI(
const byte *userKey,
size_t keyLen,
word32* rk);
317extern void Rijndael_UncheckedSetKeyRev_AESNI(
word32 *key,
unsigned int rounds);
319extern size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
320 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
321extern size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
322 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
325#if (CRYPTOPP_ARM_AES_AVAILABLE)
326extern size_t Rijndael_Enc_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
327 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
328extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
329 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
332#if (CRYPTOGAMS_ARM_AES)
333extern "C" int cryptogams_AES_set_encrypt_key(
const unsigned char *userKey,
const int bitLen,
word32 *rkey);
334extern "C" int cryptogams_AES_set_decrypt_key(
const unsigned char *userKey,
const int bitLen,
word32 *rkey);
335extern "C" void cryptogams_AES_encrypt_block(
const unsigned char *in,
unsigned char *out,
const word32 *rkey);
336extern "C" void cryptogams_AES_decrypt_block(
const unsigned char *in,
unsigned char *out,
const word32 *rkey);
339#if (CRYPTOPP_POWER8_AES_AVAILABLE)
340extern void Rijndael_UncheckedSetKey_POWER8(
const byte* userKey,
size_t keyLen,
341 word32* rk,
const byte* Se);
343extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
344 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
345extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
346 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
349#if (CRYPTOGAMS_ARM_AES)
350int CRYPTOGAMS_set_encrypt_key(
const byte *userKey,
const int bitLen,
word32 *rkey)
352 return cryptogams_AES_set_encrypt_key(userKey, bitLen, rkey);
354int CRYPTOGAMS_set_decrypt_key(
const byte *userKey,
const int bitLen,
word32 *rkey)
356 return cryptogams_AES_set_decrypt_key(userKey, bitLen, rkey);
358void CRYPTOGAMS_encrypt(
const byte *inBlock,
const byte *xorBlock,
byte *outBlock,
const word32 *rkey)
360 cryptogams_AES_encrypt_block(inBlock, outBlock, rkey);
362 xorbuf (outBlock, xorBlock, 16);
364void CRYPTOGAMS_decrypt(
const byte *inBlock,
const byte *xorBlock,
byte *outBlock,
const word32 *rkey)
366 cryptogams_AES_decrypt_block(inBlock, outBlock, rkey);
368 xorbuf (outBlock, xorBlock, 16);
372std::string Rijndael::Base::AlgorithmProvider()
const
374#if (CRYPTOPP_AESNI_AVAILABLE)
378#if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
382#if (CRYPTOPP_ARM_AES_AVAILABLE)
386#if (CRYPTOGAMS_ARM_AES)
390#if (CRYPTOPP_POWER8_AES_AVAILABLE)
397void Rijndael::Base::UncheckedSetKey(
const byte *userKey,
unsigned int keyLen,
const NameValuePairs &)
399 AssertValidKeyLength(keyLen);
401#if (CRYPTOGAMS_ARM_AES)
404 m_rounds = keyLen/4 + 6;
405 m_key.New(4*(14+1)+4);
407 if (IsForwardTransformation())
408 CRYPTOGAMS_set_encrypt_key(userKey, keyLen*8, m_key.begin());
410 CRYPTOGAMS_set_decrypt_key(userKey, keyLen*8, m_key.begin());
415#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
416 m_aliasBlock.New(s_sizeToAllocate);
419 m_aliasBlock.SetMark(0);
422 m_rounds = keyLen/4 + 6;
423 m_key.New(4*(m_rounds+1));
426#if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE41_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32))
428 if (HasAESNI() && HasSSE41())
432 Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk);
433 if (!IsForwardTransformation())
434 Rijndael_UncheckedSetKeyRev_AESNI(m_key, m_rounds);
440#if CRYPTOPP_POWER8_AES_AVAILABLE
445 Rijndael_UncheckedSetKey_POWER8(userKey, keyLen, rk, Se);
456 temp = rk[keyLen/4-1];
457 word32 x = (
word32(Se[GETBYTE(temp, 2)]) << 24) ^ (
word32(Se[GETBYTE(temp, 1)]) << 16) ^
458 (
word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
459 rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
460 rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
461 rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
462 rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
464 if (rk + keyLen/4 + 4 == m_key.end())
469 rk[10] = rk[ 4] ^ rk[ 9];
470 rk[11] = rk[ 5] ^ rk[10];
472 else if (keyLen == 32)
475 rk[12] = rk[ 4] ^ (
word32(Se[GETBYTE(temp, 3)]) << 24) ^ (
word32(Se[GETBYTE(temp, 2)]) << 16) ^ (
word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
476 rk[13] = rk[ 5] ^ rk[12];
477 rk[14] = rk[ 6] ^ rk[13];
478 rk[15] = rk[ 7] ^ rk[14];
485 if (IsForwardTransformation())
498 #define InverseMixColumn(x) \
499 TL_M(Td, 0, Se[GETBYTE(x, 3)]) ^ TL_M(Td, 1, Se[GETBYTE(x, 2)]) ^ \
500 TL_M(Td, 2, Se[GETBYTE(x, 1)]) ^ TL_M(Td, 3, Se[GETBYTE(x, 0)])
503 for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
505 temp = InverseMixColumn(rk[i ]); rk[i ] = InverseMixColumn(rk[j ]); rk[j ] = temp;
506 temp = InverseMixColumn(rk[i + 1]); rk[i + 1] = InverseMixColumn(rk[j + 1]); rk[j + 1] = temp;
507 temp = InverseMixColumn(rk[i + 2]); rk[i + 2] = InverseMixColumn(rk[j + 2]); rk[j + 2] = temp;
508 temp = InverseMixColumn(rk[i + 3]); rk[i + 3] = InverseMixColumn(rk[j + 3]); rk[j + 3] = temp;
511 rk[i+0] = InverseMixColumn(rk[i+0]);
512 rk[i+1] = InverseMixColumn(rk[i+1]);
513 rk[i+2] = InverseMixColumn(rk[i+2]);
514 rk[i+3] = InverseMixColumn(rk[i+3]);
522#if CRYPTOPP_AESNI_AVAILABLE
526#if CRYPTOPP_ARM_AES_AVAILABLE
532void Rijndael::Enc::ProcessAndXorBlock(
const byte *inBlock,
const byte *xorBlock,
byte *outBlock)
const
534#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) || CRYPTOPP_AESNI_AVAILABLE
535# if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
541 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
546#if (CRYPTOPP_ARM_AES_AVAILABLE)
549 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
554#if (CRYPTOGAMS_ARM_AES)
557 CRYPTOGAMS_encrypt(inBlock, xorBlock, outBlock, m_key.begin());
562#if (CRYPTOPP_POWER8_AES_AVAILABLE)
565 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
572 word32 s0, s1, s2, s3, t0, t1, t2, t3;
573 Block::Get(inBlock)(s0)(s1)(s2)(s3);
588 const int cacheLineSize = GetCacheLineSize();
592#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
593 for (i=0; i<2048; i+=cacheLineSize)
595 for (i=0; i<1024; i+=cacheLineSize)
597 u &= *(
const word32 *)(
const void *)(((
const byte *)Te)+i);
599 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
601 QUARTER_ROUND_FE(s3, t0, t1, t2, t3)
602 QUARTER_ROUND_FE(s2, t3, t0, t1, t2)
603 QUARTER_ROUND_FE(s1, t2, t3, t0, t1)
604 QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
607 unsigned int r = m_rounds/2 - 1;
610 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
612 QUARTER_ROUND_E(t3, s0, s1, s2, s3)
613 QUARTER_ROUND_E(t2, s3, s0, s1, s2)
614 QUARTER_ROUND_E(t1, s2, s3, s0, s1)
615 QUARTER_ROUND_E(t0, s1, s2, s3, s0)
617 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
619 QUARTER_ROUND_E(s3, t0, t1, t2, t3)
620 QUARTER_ROUND_E(s2, t3, t0, t1, t2)
621 QUARTER_ROUND_E(s1, t2, t3, t0, t1)
622 QUARTER_ROUND_E(s0, t1, t2, t3, t0)
628 byte *
const tempBlock = (
byte *)tbw;
630 QUARTER_ROUND_LE(t2, 15, 2, 5, 8)
631 QUARTER_ROUND_LE(t1, 11, 14, 1, 4)
632 QUARTER_ROUND_LE(t0, 7, 10, 13, 0)
633 QUARTER_ROUND_LE(t3, 3, 6, 9, 12)
635 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
638void Rijndael::Dec::ProcessAndXorBlock(const
byte *inBlock, const
byte *xorBlock,
byte *outBlock)
const
640#if CRYPTOPP_AESNI_AVAILABLE
643 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
648#if (CRYPTOPP_ARM_AES_AVAILABLE)
651 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
656#if (CRYPTOGAMS_ARM_AES)
659 CRYPTOGAMS_decrypt(inBlock, xorBlock, outBlock, m_key.begin());
664#if (CRYPTOPP_POWER8_AES_AVAILABLE)
667 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
674 word32 s0, s1, s2, s3, t0, t1, t2, t3;
675 Block::Get(inBlock)(s0)(s1)(s2)(s3);
690 const int cacheLineSize = GetCacheLineSize();
694#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
695 for (i=0; i<2048; i+=cacheLineSize)
697 for (i=0; i<1024; i+=cacheLineSize)
699 u &= *(
const word32 *)(
const void *)(((
const byte *)Td)+i);
701 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
703 QUARTER_ROUND_FD(s3, t2, t1, t0, t3)
704 QUARTER_ROUND_FD(s2, t1, t0, t3, t2)
705 QUARTER_ROUND_FD(s1, t0, t3, t2, t1)
706 QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
709 unsigned int r = m_rounds/2 - 1;
712 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
714 QUARTER_ROUND_D(t3, s2, s1, s0, s3)
715 QUARTER_ROUND_D(t2, s1, s0, s3, s2)
716 QUARTER_ROUND_D(t1, s0, s3, s2, s1)
717 QUARTER_ROUND_D(t0, s3, s2, s1, s0)
719 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
721 QUARTER_ROUND_D(s3, t2, t1, t0, t3)
722 QUARTER_ROUND_D(s2, t1, t0, t3, t2)
723 QUARTER_ROUND_D(s1, t0, t3, t2, t1)
724 QUARTER_ROUND_D(s0, t3, t2, t1, t0)
729#if !(defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS))
734 for (i=0; i<256; i+=cacheLineSize)
735 u &= *(
const word32 *)(
const void *)(Sd+i);
736 u &= *(
const word32 *)(
const void *)(Sd+252);
737 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
741 byte *
const tempBlock = (
byte *)tbw;
743 QUARTER_ROUND_LD(t2, 7, 2, 13, 8)
744 QUARTER_ROUND_LD(t1, 3, 14, 9, 4)
745 QUARTER_ROUND_LD(t0, 15, 10, 5, 0)
746 QUARTER_ROUND_LD(t3, 11, 6, 1, 12)
748 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
753#if CRYPTOPP_MSC_VERSION
754# pragma warning(disable: 4731)
759#if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
761CRYPTOPP_NAKED
void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks_SSE2(
void *locals,
const word32 *k)
763 CRYPTOPP_UNUSED(locals); CRYPTOPP_UNUSED(k);
768#define L_INDEX(i) (L_REG+768+i)
769#define L_INXORBLOCKS L_INBLOCKS+4
770#define L_OUTXORBLOCKS L_INBLOCKS+8
771#define L_OUTBLOCKS L_INBLOCKS+12
772#define L_INCREMENTS L_INDEX(16*15)
773#define L_SP L_INDEX(16*16)
774#define L_LENGTH L_INDEX(16*16+4)
775#define L_KEYS_BEGIN L_INDEX(16*16+8)
782 AS2( movd mm7, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
783 AS2( pxor MM(a), mm7)\
787 AS2( movd MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
792#define L_INDEX(i) (L_REG+i)
793#define L_INXORBLOCKS L_INBLOCKS+8
794#define L_OUTXORBLOCKS L_INBLOCKS+16
795#define L_OUTBLOCKS L_INBLOCKS+24
796#define L_INCREMENTS L_INDEX(16*16)
797#define L_LENGTH L_INDEX(16*18+8)
798#define L_KEYS_BEGIN L_INDEX(16*19)
812 AS2( xor MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
816 AS2( mov MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
820#define L_SUBKEYS L_INDEX(0)
821#define L_SAVED_X L_SUBKEYS
822#define L_KEY12 L_INDEX(16*12)
823#define L_LASTROUND L_INDEX(16*13)
824#define L_INBLOCKS L_INDEX(16*14)
825#define MAP0TO4(i) (ASM_MOD(i+3,4)+1)
829 AS2( xor a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
833 AS2( mov a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
835#ifdef CRYPTOPP_GENERATE_X64_MASM
837 Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
844 mov AS_REG_7, ?Te@rdtable@
CryptoPP@@3PA_KA
845 mov edi, DWORD PTR [?g_cacheLineSize@
CryptoPP@@3IA]
846#elif defined(__GNUC__)
855 AS2( mov AS_REG_7, WORD_REG(si))
861 AS2( lea AS_REG_7, [Te])
862 AS2( mov edi, [g_cacheLineSize])
866 AS2( mov [ecx+16*12+16*4], esp)
867 AS2( lea esp, [ecx-768])
871 AS2( mov WORD_REG(si), [L_KEYS_BEGIN])
872 AS2( mov WORD_REG(ax), 16)
873 AS2( and WORD_REG(ax), WORD_REG(si))
874 AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)])
875 AS2( movdqa [L_KEY12], xmm3)
876 AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
877 AS2( sub WORD_REG(ax), WORD_REG(si))
879 AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(si)])
880 AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0)
881 AS2( add WORD_REG(si), 16)
882 AS2( cmp WORD_REG(si), 16*12)
888 AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(si)])
889 AS2( movdqa xmm1, [WORD_REG(dx)])
890 AS2( MOVD MM(1), [WORD_REG(dx)+4*4])
891 AS2( mov ebx, [WORD_REG(dx)+5*4])
892 AS2( mov ecx, [WORD_REG(dx)+6*4])
893 AS2( mov edx, [WORD_REG(dx)+7*4])
896 AS2( xor WORD_REG(ax), WORD_REG(ax))
898 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
899 AS2( add WORD_REG(ax), WORD_REG(di))
900 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
901 AS2( add WORD_REG(ax), WORD_REG(di))
902 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
903 AS2( add WORD_REG(ax), WORD_REG(di))
904 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
905 AS2( add WORD_REG(ax), WORD_REG(di))
906 AS2( cmp WORD_REG(ax), 2048)
912 AS2( test DWORD PTR [L_LENGTH], 1)
918 AS2( mov WORD_REG(si), [L_INBLOCKS])
919 AS2( movdqu xmm2, [WORD_REG(si)])
920 AS2( pxor xmm2, xmm1)
921 AS2( psrldq xmm1, 14)
923 AS2( mov al, BYTE PTR [WORD_REG(si)+15])
924 AS2( MOVD MM(2), eax)
963 AS2( mov eax, [L_KEY12+0*4])
964 AS2( mov edi, [L_KEY12+2*4])
965 AS2( MOVD MM(0), [L_KEY12+3*4])
972 AS2( xor ebx, [L_KEY12+1*4])
984 AS2( MOVD edx, MM(1))
985 AS2( MOVD [L_SAVED_X+3*4], MM(0))
986 AS2( mov [L_SAVED_X+0*4], eax)
987 AS2( mov [L_SAVED_X+1*4], ebx)
988 AS2( mov [L_SAVED_X+2*4], edi)
994 AS2( MOVD MM(1), [L_KEY12+0*4])
995 AS2( mov ebx, [L_KEY12+1*4])
996 AS2( mov ecx, [L_KEY12+2*4])
997 AS2( mov edx, [L_KEY12+3*4])
999 AS2( mov WORD_REG(ax), [L_INBLOCKS])
1000 AS2( movdqu xmm2, [WORD_REG(ax)])
1001 AS2( mov WORD_REG(si), [L_INXORBLOCKS])
1002 AS2( movdqu xmm5, [WORD_REG(si)])
1003 AS2( pxor xmm2, xmm1)
1004 AS2( pxor xmm2, xmm5)
1007 AS2( movd eax, xmm2)
1008 AS2( psrldq xmm2, 4)
1009 AS2( movd edi, xmm2)
1010 AS2( psrldq xmm2, 4)
1017 AS2( movd edi, xmm2)
1018 AS2( psrldq xmm2, 4)
1025 AS2( movd edi, xmm2)
1037 AS2( MOVD eax, MM(1))
1039 AS2( add L_REG, [L_KEYS_BEGIN])
1040 AS2( add L_REG, 4*16)
1046 AS2( MOVD ecx, MM(2))
1047 AS2( MOVD edx, MM(1))
1048 AS2( mov eax, [L_SAVED_X+0*4])
1049 AS2( mov ebx, [L_SAVED_X+1*4])
1051 AS2( and WORD_REG(cx), 255)
1054 AS2( paddb MM(2), mm3)
1059 AS2( xor edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3])
1063 AS2( xor ecx, [L_SAVED_X+2*4])
1066 AS2( xor edx, [L_SAVED_X+3*4])
1068 AS2( add L_REG, [L_KEYS_BEGIN])
1069 AS2( add L_REG, 3*16)
1100 AS2( MOVD MM(0), [L_SUBKEYS-4*16+3*4])
1101 AS2( mov edi, [L_SUBKEYS-4*16+2*4])
1104 AS2( xor eax, [L_SUBKEYS-4*16+0*4])
1105 AS2( xor ebx, [L_SUBKEYS-4*16+1*4])
1106 AS2( MOVD edx, MM(0))
1109 AS2( MOVD MM(0), [L_SUBKEYS-4*16+7*4])
1110 AS2( mov edi, [L_SUBKEYS-4*16+6*4])
1113 AS2( xor eax, [L_SUBKEYS-4*16+4*4])
1114 AS2( xor ebx, [L_SUBKEYS-4*16+5*4])
1115 AS2( MOVD edx, MM(0))
1118 AS2( test L_REG, 255)
1122 AS2( sub L_REG, 16*16)
1124#define LAST(a, b, c) \
1125 AS2( movzx esi, a )\
1126 AS2( movzx edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1] )\
1127 AS2( movzx esi, b )\
1128 AS2( xor edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0] )\
1129 AS2( mov WORD PTR [L_LASTROUND+c], di )\
1145 AS2( mov WORD_REG(ax), [L_OUTXORBLOCKS])
1146 AS2( mov WORD_REG(bx), [L_OUTBLOCKS])
1148 AS2( mov WORD_REG(cx), [L_LENGTH])
1149 AS2( sub WORD_REG(cx), 16)
1151 AS2( movdqu xmm2, [WORD_REG(ax)])
1152 AS2( pxor xmm2, xmm4)
1155 AS2( movdqa xmm0, [L_INCREMENTS])
1156 AS2( paddd xmm0, [L_INBLOCKS])
1157 AS2( movdqa [L_INBLOCKS], xmm0)
1159 AS2( movdqa xmm0, [L_INCREMENTS+16])
1160 AS2( paddq xmm0, [L_INBLOCKS+16])
1161 AS2( movdqa [L_INBLOCKS+16], xmm0)
1164 AS2( pxor xmm2, [L_LASTROUND])
1165 AS2( movdqu [WORD_REG(bx)], xmm2)
1170 AS2( mov [L_LENGTH], WORD_REG(cx))
1171 AS2( test WORD_REG(cx), 1)
1176 AS2( movdqa xmm0, [L_INCREMENTS])
1177 AS2( paddq xmm0, [L_INBLOCKS])
1178 AS2( movdqa [L_INBLOCKS], xmm0)
1186 AS2( xorps xmm0, xmm0)
1187 AS2( lea WORD_REG(ax), [L_SUBKEYS+7*16])
1188 AS2( movaps [WORD_REG(ax)-7*16], xmm0)
1189 AS2( movaps [WORD_REG(ax)-6*16], xmm0)
1190 AS2( movaps [WORD_REG(ax)-5*16], xmm0)
1191 AS2( movaps [WORD_REG(ax)-4*16], xmm0)
1192 AS2( movaps [WORD_REG(ax)-3*16], xmm0)
1193 AS2( movaps [WORD_REG(ax)-2*16], xmm0)
1194 AS2( movaps [WORD_REG(ax)-1*16], xmm0)
1195 AS2( movaps [WORD_REG(ax)+0*16], xmm0)
1196 AS2( movaps [WORD_REG(ax)+1*16], xmm0)
1197 AS2( movaps [WORD_REG(ax)+2*16], xmm0)
1198 AS2( movaps [WORD_REG(ax)+3*16], xmm0)
1199 AS2( movaps [WORD_REG(ax)+4*16], xmm0)
1200 AS2( movaps [WORD_REG(ax)+5*16], xmm0)
1201 AS2( movaps [WORD_REG(ax)+6*16], xmm0)
1203 AS2( mov esp, [L_SP])
1213#ifdef CRYPTOPP_GENERATE_X64_MASM
1219 Rijndael_Enc_AdvancedProcessBlocks ENDP
1224 :
"c" (locals),
"d" (k),
"S" (Te),
"D" (g_cacheLineSize)
1225 :
"memory",
"cc",
"%eax"
1227 ,
"%rbx",
"%r8",
"%r9",
"%r10",
"%r11",
"%r12"
1235#ifndef CRYPTOPP_GENERATE_X64_MASM
1237#ifdef CRYPTOPP_X64_MASM_AVAILABLE
1239void Rijndael_Enc_AdvancedProcessBlocks_SSE2(
void *locals,
const word32 *k);
1243#if CRYPTOPP_RIJNDAEL_ADVANCED_PROCESS_BLOCKS
1244size_t Rijndael::Enc::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
const
1246#if CRYPTOPP_AESNI_AVAILABLE
1248 return Rijndael_Enc_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1250#if CRYPTOPP_ARM_AES_AVAILABLE
1252 return Rijndael_Enc_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1254#if CRYPTOPP_POWER8_AES_AVAILABLE
1256 return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1259#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
1262 if (length < BLOCKSIZE)
1265 static const byte *zeros = (
const byte*)(Te+256);
1266 m_aliasBlock.SetMark(m_aliasBlock.size());
1267 byte *space = NULLPTR, *originalSpace =
const_cast<byte*
>(m_aliasBlock.data());
1270 space = originalSpace + (s_aliasBlockSize - (uintptr_t)originalSpace % s_aliasBlockSize) % s_aliasBlockSize;
1271 while (AliasedWithTable(space, space +
sizeof(Locals)))
1277 size_t increment = BLOCKSIZE;
1278 if (flags & BT_ReverseDirection)
1281 inBlocks += length - BLOCKSIZE;
1282 xorBlocks += length - BLOCKSIZE;
1283 outBlocks += length - BLOCKSIZE;
1284 increment = 0-increment;
1287 Locals &locals = *(Locals *)(
void *)space;
1289 locals.inBlocks = inBlocks;
1290 locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros;
1291 locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
1292 locals.outBlocks = outBlocks;
1294 locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1295 locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0;
1296 locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment;
1297 locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1299 locals.lengthAndCounterFlag = length - (length%16) -
bool(flags & BT_InBlockIsCounter);
1300 int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
1301 locals.keysBegin = (12-keysToCopy)*16;
1303 Rijndael_Enc_AdvancedProcessBlocks_SSE2(&locals, m_key);
1305 return length % BLOCKSIZE;
1312size_t Rijndael::Dec::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
const
1314#if CRYPTOPP_AESNI_AVAILABLE
1316 return Rijndael_Dec_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1318#if CRYPTOPP_ARM_AES_AVAILABLE
1320 return Rijndael_Dec_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1322#if CRYPTOPP_POWER8_AES_AVAILABLE
1324 return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Interface for retrieving values given their names.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
unsigned int word32
32-bit unsigned datatype
unsigned long long word64
64-bit unsigned datatype
Functions for CPU features and intrinsics.
@ BIG_ENDIAN_ORDER
byte order is big-endian
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
CRYPTOPP_DLL void xorbuf(byte *buf, const byte *mask, size_t count)
Performs an XOR of a buffer with a mask.
Crypto++ library namespace.
Classes for Rijndael encryption algorithm.
Access a block of memory.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.