diff options
author | Tom Musta <tommusta@gmail.com> | 2014-02-12 15:23:17 -0600 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2014-03-05 03:06:59 +0100 |
commit | 557d52fa697c938aeff2784b79df55952c3bfcc1 (patch) | |
tree | cadb732d788c6cae4a14700ef5909da8f47724d8 /target-ppc | |
parent | e8f7b27b9942d02ece7df34ae2b2a09cb9da7196 (diff) | |
download | qemu-557d52fa697c938aeff2784b79df55952c3bfcc1.zip qemu-557d52fa697c938aeff2784b79df55952c3bfcc1.tar.gz qemu-557d52fa697c938aeff2784b79df55952c3bfcc1.tar.bz2 |
target-ppc: Altivec 2.07: AES Instructions
This patch adds the Vector AES instructions introduced in Power ISA
Version 2.07:
- Vector AES Cipher (vcipher)
- Vector AES Cipher Last (vcipherlast)
- Vector AES Inverse Cipher (vncipher)
- Vector AES Inverse Cipher Last (vncipherlast)
- Vector AES SubBytes (vsbox)
Note that the implementation of vncipher deviates from the RTL in
ISA V2.07. However it does match the verbal description in the
third paragraph. The RTL will be fixed in ISA V2.07B. The
implementation here has been tested against actual P8 hardware.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'target-ppc')
-rw-r--r-- | target-ppc/helper.h | 6 | ||||
-rw-r--r-- | target-ppc/int_helper.c | 280 | ||||
-rw-r--r-- | target-ppc/translate.c | 29 |
3 files changed, 315 insertions, 0 deletions
diff --git a/target-ppc/helper.h b/target-ppc/helper.h index ef6aa58..93e549e 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -316,6 +316,12 @@ DEF_HELPER_3(vpmsumh, void, avr, avr, avr) DEF_HELPER_3(vpmsumw, void, avr, avr, avr) DEF_HELPER_3(vpmsumd, void, avr, avr, avr) +DEF_HELPER_2(vsbox, void, avr, avr) +DEF_HELPER_3(vcipher, void, avr, avr, avr) +DEF_HELPER_3(vcipherlast, void, avr, avr, avr) +DEF_HELPER_3(vncipher, void, avr, avr, avr) +DEF_HELPER_3(vncipherlast, void, avr, avr, avr) + DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32) DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index ce7c6a0..cd04e8a 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -2338,6 +2338,286 @@ uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) return helper_bcdadd(r, a, &bcopy, ps); } +static uint8_t SBOX[256] = { +0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, +0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, +0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, +0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, +0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, +0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, +0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, +0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, +0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, +0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, +0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, +0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, +0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, +0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, +0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, +0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, +0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, +0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, +0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, +0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, +0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, +0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, +0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, +0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, +0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, +0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, +0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, +0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, +0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, +0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, +0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, +0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, +}; + +static void SubBytes(ppc_avr_t *r, ppc_avr_t *a) +{ + int i; + VECTOR_FOR_INORDER_I(i, u8) { + r->u8[i] = SBOX[a->u8[i]]; + } +} + +static uint8_t InvSBOX[256] = { +0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, +0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, +0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, +0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, +0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, +0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, +0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, +0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, +0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, +0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, +0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, +0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, +0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, +0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, +0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, +0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, +0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, +0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, +0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, +0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, +0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, +0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, +0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, +0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, +0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, +0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, +0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, +0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, +0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, +0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, +0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, +0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D, +}; + +static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a) +{ + int i; + VECTOR_FOR_INORDER_I(i, u8) { + r->u8[i] = InvSBOX[a->u8[i]]; + } +} + +static uint8_t ROTL8(uint8_t x, int n) +{ + return (x << n) | (x >> (8-n)); +} + +static inline int BIT8(uint8_t x, int n) +{ + return (x & (0x80 >> n)) != 0; +} + +static uint8_t GFx02(uint8_t x) +{ + return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0); +} + +static uint8_t GFx03(uint8_t x) +{ + return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0); +} + +static uint8_t GFx09(uint8_t x) +{ + uint8_t term2 = ROTL8(x, 3); + uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) | + (BIT8(x, 2) ? 0x02 : 0); + uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0); + return x ^ term2 ^ term3 ^ term4; +} + +static uint8_t GFx0B(uint8_t x) +{ + uint8_t term2 = ROTL8(x, 1); + uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) | + (BIT8(x, 2) ? 0x01 : 0); + uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) | + (BIT8(x, 2) ? 0x08 : 0); + uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0); + uint8_t term6 = BIT8(x, 2) ? 0x10 : 0; + return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6; +} + +static uint8_t GFx0D(uint8_t x) +{ + uint8_t term2 = ROTL8(x, 2); + uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) | + (BIT8(x, 2) ? 0x03 : 0); + uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0); + uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0); + uint8_t term6 = BIT8(x, 2) ? 0x08 : 0; + return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6; +} + +static uint8_t GFx0E(uint8_t x) +{ + uint8_t term1 = ROTL8(x, 1); + uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) | + (BIT8(x, 1) ? 0x01 : 0); + uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) | + (BIT8(x, 2) ? 0x01 : 0); + uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) | + (BIT8(x, 2) ? 0x10 : 0); + uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0); + return term1 ^ term2 ^ term3 ^ term4 ^ term5; +} + +#if defined(HOST_WORDS_BIGENDIAN) +#define MCB(x, i, b) ((x)->u8[(i)*4 + (b)]) +#else +#define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))]) +#endif + +static void MixColumns(ppc_avr_t *r, ppc_avr_t *x) +{ + int i; + for (i = 0; i < 4; i++) { + MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^ + MCB(x, i, 2) ^ MCB(x, i, 3); + MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^ + GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3); + MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^ + GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3)); + MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^ + MCB(x, i, 2) ^ GFx02(MCB(x, i, 3)); + } +} + +static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x) +{ + int i; + for (i = 0; i < 4; i++) { + MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^ + GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3)); + MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^ + GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3)); + MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^ + GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3)); + MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^ + GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3)); + } +} + +static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x) +{ + MCB(r, 0, 0) = MCB(x, 0, 0); + MCB(r, 1, 0) = MCB(x, 1, 0); + MCB(r, 2, 0) = MCB(x, 2, 0); + MCB(r, 3, 0) = MCB(x, 3, 0); + + MCB(r, 0, 1) = MCB(x, 1, 1); + MCB(r, 1, 1) = MCB(x, 2, 1); + MCB(r, 2, 1) = MCB(x, 3, 1); + MCB(r, 3, 1) = MCB(x, 0, 1); + + MCB(r, 0, 2) = MCB(x, 2, 2); + MCB(r, 1, 2) = MCB(x, 3, 2); + MCB(r, 2, 2) = MCB(x, 0, 2); + MCB(r, 3, 2) = MCB(x, 1, 2); + + MCB(r, 0, 3) = MCB(x, 3, 3); + MCB(r, 1, 3) = MCB(x, 0, 3); + MCB(r, 2, 3) = MCB(x, 1, 3); + MCB(r, 3, 3) = MCB(x, 2, 3); +} + +static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x) +{ + MCB(r, 0, 0) = MCB(x, 0, 0); + MCB(r, 1, 0) = MCB(x, 1, 0); + MCB(r, 2, 0) = MCB(x, 2, 0); + MCB(r, 3, 0) = MCB(x, 3, 0); + + MCB(r, 0, 1) = MCB(x, 3, 1); + MCB(r, 1, 1) = MCB(x, 0, 1); + MCB(r, 2, 1) = MCB(x, 1, 1); + MCB(r, 3, 1) = MCB(x, 2, 1); + + MCB(r, 0, 2) = MCB(x, 2, 2); + MCB(r, 1, 2) = MCB(x, 3, 2); + MCB(r, 2, 2) = MCB(x, 0, 2); + MCB(r, 3, 2) = MCB(x, 1, 2); + + MCB(r, 0, 3) = MCB(x, 1, 3); + MCB(r, 1, 3) = MCB(x, 2, 3); + MCB(r, 2, 3) = MCB(x, 3, 3); + MCB(r, 3, 3) = MCB(x, 0, 3); +} + +#undef MCB + +void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) +{ + SubBytes(r, a); +} + +void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + ppc_avr_t vtemp1, vtemp2, vtemp3; + SubBytes(&vtemp1, a); + ShiftRows(&vtemp2, &vtemp1); + MixColumns(&vtemp3, &vtemp2); + r->u64[0] = vtemp3.u64[0] ^ b->u64[0]; + r->u64[1] = vtemp3.u64[1] ^ b->u64[1]; +} + +void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + ppc_avr_t vtemp1, vtemp2; + SubBytes(&vtemp1, a); + ShiftRows(&vtemp2, &vtemp1); + r->u64[0] = vtemp2.u64[0] ^ b->u64[0]; + r->u64[1] = vtemp2.u64[1] ^ b->u64[1]; +} + +void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + /* This differs from what is written in ISA V2.07. The RTL is */ + /* incorrect and will be fixed in V2.07B. */ + ppc_avr_t vtemp1, vtemp2, vtemp3; + InvShiftRows(&vtemp1, a); + InvSubBytes(&vtemp2, &vtemp1); + vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0]; + vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1]; + InvMixColumns(r, &vtemp3); +} + +void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + ppc_avr_t vtemp1, vtemp2; + InvShiftRows(&vtemp1, a); + InvSubBytes(&vtemp2, &vtemp1); + r->u64[0] = vtemp2.u64[0] ^ b->u64[0]; + r->u64[1] = vtemp2.u64[1] ^ b->u64[1]; +} + #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 2b46cd6..0d43b1c 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7414,6 +7414,30 @@ GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \ GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \ bcdsub, PPC_NONE, PPC2_ALTIVEC_207) +static void gen_vsbox(DisasContext *ctx) +{ + TCGv_ptr ra, rd; + if (unlikely(!ctx->altivec_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VPU); + return; + } + ra = gen_avr_ptr(rA(ctx->opcode)); + rd = gen_avr_ptr(rD(ctx->opcode)); + gen_helper_vsbox(rd, ra); + tcg_temp_free_ptr(ra); + tcg_temp_free_ptr(rd); +} + +GEN_VXFORM(vcipher, 4, 20) +GEN_VXFORM(vcipherlast, 4, 20) +GEN_VXFORM(vncipher, 4, 21) +GEN_VXFORM(vncipherlast, 4, 21) + +GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207, + vcipherlast, PPC_NONE, PPC2_ALTIVEC_207) +GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207, + vncipherlast, PPC_NONE, PPC2_ALTIVEC_207) + /*** VSX extension ***/ static inline TCGv_i64 cpu_vsrh(int n) @@ -10669,6 +10693,11 @@ GEN_VXFORM_207(vpmsumh, 4, 17), GEN_VXFORM_207(vpmsumw, 4, 18), GEN_VXFORM_207(vpmsumd, 4, 19), +GEN_VXFORM_207(vsbox, 4, 23), + +GEN_VXFORM_DUAL(vcipher, vcipherlast, 4, 20, PPC_NONE, PPC2_ALTIVEC_207), +GEN_VXFORM_DUAL(vncipher, vncipherlast, 4, 21, PPC_NONE, PPC2_ALTIVEC_207), + GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207), |