aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/annotate-metadata.ll15
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll91
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-acc-memops.ll170
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-acc-spill.ll102
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll166
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-intrinsics.ll517
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-outer-product.ll1266
-rw-r--r--llvm/test/CodeGen/PowerPC/mma-phi-accs.ll202
-rw-r--r--llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll39
-rw-r--r--llvm/test/CodeGen/PowerPC/vec_rounding.ll195
-rw-r--r--llvm/test/CodeGen/PowerPC/vp-ld-st.ll160
11 files changed, 2865 insertions, 58 deletions
diff --git a/llvm/test/CodeGen/PowerPC/annotate-metadata.ll b/llvm/test/CodeGen/PowerPC/annotate-metadata.ll
new file mode 100644
index 0000000..4149b56
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/annotate-metadata.ll
@@ -0,0 +1,15 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff < \
+; RUN: %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64le-unknown-linux < \
+; RUN: %s | FileCheck %s
+
+@.str = private unnamed_addr constant [12 x i8] c"MY_METADATA\00", section "llvm.metadata"
+@.str.1 = private unnamed_addr constant [10 x i8] c"my_file.c\00", section "llvm.metadata"
+@global.annotations = appending global [3 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @a, ptr @.str, ptr @.str.1, i32 100, ptr null }, { ptr, ptr, ptr, i32, ptr } { ptr @b, ptr @.str, ptr @.str.1, i32 200, ptr null }, { ptr, ptr, ptr, i32, ptr } { ptr @c, ptr @.str, ptr @.str.1, i32 300, ptr null }], section "llvm.metadata"
+
+@a = global i32 1
+@b = global i32 2
+@c = global i32 3
+
+; CHECK-NOT: metadata
+; CHECK-NOT: annotations
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
index 7e2f744..94121f0 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC
define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c) local_unnamed_addr #0 {
; CHECK-LABEL: testMultiply:
@@ -91,6 +97,91 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
; CHECK-BE-NEXT: ld r30, -16(r1)
; CHECK-BE-NEXT: mtlr r0
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: testMultiply:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: mflr r0
+; CHECK-LE-WACC-NEXT: std r30, -16(r1)
+; CHECK-LE-WACC-NEXT: std r0, 16(r1)
+; CHECK-LE-WACC-NEXT: clrldi r0, r1, 59
+; CHECK-LE-WACC-NEXT: subfic r0, r0, -128
+; CHECK-LE-WACC-NEXT: mr r30, r1
+; CHECK-LE-WACC-NEXT: stdux r1, r1, r0
+; CHECK-LE-WACC-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill
+; CHECK-LE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill
+; CHECK-LE-WACC-NEXT: lxv v31, 0(r3)
+; CHECK-LE-WACC-NEXT: lxv v30, 0(r4)
+; CHECK-LE-WACC-NEXT: addi r3, r1, 32
+; CHECK-LE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill
+; CHECK-LE-WACC-NEXT: vmr v2, v31
+; CHECK-LE-WACC-NEXT: vmr v3, v30
+; CHECK-LE-WACC-NEXT: mr r29, r5
+; CHECK-LE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc
+; CHECK-LE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, v31, v30
+; CHECK-LE-WACC-NEXT: lxv vs0, 48(r1)
+; CHECK-LE-WACC-NEXT: lxv vs1, 32(r1)
+; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, vs1, vs0
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v5, 0(r29)
+; CHECK-LE-WACC-NEXT: pstxv v4, 8(r29), 0
+; CHECK-LE-WACC-NEXT: stxv v3, 16(r29)
+; CHECK-LE-WACC-NEXT: pstxv v2, 24(r29), 0
+; CHECK-LE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload
+; CHECK-LE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload
+; CHECK-LE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload
+; CHECK-LE-WACC-NEXT: mr r1, r30
+; CHECK-LE-WACC-NEXT: ld r0, 16(r1)
+; CHECK-LE-WACC-NEXT: ld r30, -16(r1)
+; CHECK-LE-WACC-NEXT: mtlr r0
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testMultiply:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: mflr r0
+; CHECK-BE-WACC-NEXT: std r30, -16(r1)
+; CHECK-BE-WACC-NEXT: std r0, 16(r1)
+; CHECK-BE-WACC-NEXT: clrldi r0, r1, 59
+; CHECK-BE-WACC-NEXT: subfic r0, r0, -224
+; CHECK-BE-WACC-NEXT: mr r30, r1
+; CHECK-BE-WACC-NEXT: stdux r1, r1, r0
+; CHECK-BE-WACC-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill
+; CHECK-BE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill
+; CHECK-BE-WACC-NEXT: lxv v31, 0(r3)
+; CHECK-BE-WACC-NEXT: lxv v30, 0(r4)
+; CHECK-BE-WACC-NEXT: addi r3, r1, 128
+; CHECK-BE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill
+; CHECK-BE-WACC-NEXT: vmr v2, v31
+; CHECK-BE-WACC-NEXT: vmr v3, v30
+; CHECK-BE-WACC-NEXT: mr r29, r5
+; CHECK-BE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_
+; CHECK-BE-WACC-NEXT: nop
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v31, v30
+; CHECK-BE-WACC-NEXT: lxv vs0, 128(r1)
+; CHECK-BE-WACC-NEXT: lxv vs1, 144(r1)
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, vs0, vs1
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: vmr v1, v2
+; CHECK-BE-WACC-NEXT: vmr v7, v4
+; CHECK-BE-WACC-NEXT: vmr v0, v3
+; CHECK-BE-WACC-NEXT: vmr v6, v5
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r29)
+; CHECK-BE-WACC-NEXT: pstxv v3, 8(r29), 0
+; CHECK-BE-WACC-NEXT: stxv v4, 16(r29)
+; CHECK-BE-WACC-NEXT: pstxv v5, 24(r29), 0
+; CHECK-BE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload
+; CHECK-BE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload
+; CHECK-BE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload
+; CHECK-BE-WACC-NEXT: mr r1, r30
+; CHECK-BE-WACC-NEXT: ld r0, 16(r1)
+; CHECK-BE-WACC-NEXT: ld r30, -16(r1)
+; CHECK-BE-WACC-NEXT: mtlr r0
+; CHECK-BE-WACC-NEXT: blr
entry:
%vP = alloca <256 x i1>, align 32
call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %vP)
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 059d60a..bc5d5be 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -3,10 +3,18 @@
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \
; RUN: --check-prefix=LE-PAIRED
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \
+; RUN: --check-prefix=LE-PAIRED-WACC
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \
; RUN: FileCheck %s --check-prefix=BE-PAIRED
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \
+; RUN: FileCheck %s --check-prefix=BE-PAIRED-WACC
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck %s --check-prefix=LE-PWR9
@@ -36,6 +44,20 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+128(0), 1
; LE-PAIRED-NEXT: blr
;
+; LE-PAIRED-WACC-LABEL: testLdSt:
+; LE-PAIRED-WACC: # %bb.0: # %entry
+; LE-PAIRED-WACC-NEXT: plxv v3, f@PCREL+64(0), 1
+; LE-PAIRED-WACC-NEXT: plxv v5, f@PCREL+96(0), 1
+; LE-PAIRED-WACC-NEXT: plxv v2, f@PCREL+80(0), 1
+; LE-PAIRED-WACC-NEXT: plxv v4, f@PCREL+112(0), 1
+; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; LE-PAIRED-WACC-NEXT: pstxv v4, f@PCREL+176(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv v5, f@PCREL+160(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv v2, f@PCREL+144(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv v3, f@PCREL+128(0), 1
+; LE-PAIRED-WACC-NEXT: blr
+;
; BE-PAIRED-LABEL: testLdSt:
; BE-PAIRED: # %bb.0: # %entry
; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha
@@ -50,6 +72,22 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv vs2, 160(r3)
; BE-PAIRED-NEXT: blr
;
+; BE-PAIRED-WACC-LABEL: testLdSt:
+; BE-PAIRED-WACC: # %bb.0: # %entry
+; BE-PAIRED-WACC-NEXT: addis r3, r2, f@toc@ha
+; BE-PAIRED-WACC-NEXT: addi r3, r3, f@toc@l
+; BE-PAIRED-WACC-NEXT: lxv v3, 112(r3)
+; BE-PAIRED-WACC-NEXT: lxv v5, 80(r3)
+; BE-PAIRED-WACC-NEXT: lxv v2, 96(r3)
+; BE-PAIRED-WACC-NEXT: lxv v4, 64(r3)
+; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; BE-PAIRED-WACC-NEXT: stxv v5, 176(r3)
+; BE-PAIRED-WACC-NEXT: stxv v4, 160(r3)
+; BE-PAIRED-WACC-NEXT: stxv v3, 144(r3)
+; BE-PAIRED-WACC-NEXT: stxv v2, 128(r3)
+; BE-PAIRED-WACC-NEXT: blr
+;
; LE-PWR9-LABEL: testLdSt:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r3, r2, f@toc@ha
@@ -147,6 +185,25 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: stxv vs2, 16(r4)
; LE-PAIRED-NEXT: blr
;
+; LE-PAIRED-WACC-LABEL: testXLdSt:
+; LE-PAIRED-WACC: # %bb.0: # %entry
+; LE-PAIRED-WACC-NEXT: paddi r5, 0, f@PCREL, 1
+; LE-PAIRED-WACC-NEXT: sldi r3, r3, 6
+; LE-PAIRED-WACC-NEXT: add r6, r5, r3
+; LE-PAIRED-WACC-NEXT: lxvx v3, r5, r3
+; LE-PAIRED-WACC-NEXT: lxv v2, 16(r6)
+; LE-PAIRED-WACC-NEXT: lxv v5, 32(r6)
+; LE-PAIRED-WACC-NEXT: lxv v4, 48(r6)
+; LE-PAIRED-WACC-NEXT: sldi r3, r4, 6
+; LE-PAIRED-WACC-NEXT: add r4, r5, r3
+; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; LE-PAIRED-WACC-NEXT: stxvx v3, r5, r3
+; LE-PAIRED-WACC-NEXT: stxv v4, 48(r4)
+; LE-PAIRED-WACC-NEXT: stxv v5, 32(r4)
+; LE-PAIRED-WACC-NEXT: stxv v2, 16(r4)
+; LE-PAIRED-WACC-NEXT: blr
+;
; BE-PAIRED-LABEL: testXLdSt:
; BE-PAIRED: # %bb.0: # %entry
; BE-PAIRED-NEXT: addis r5, r2, f@toc@ha
@@ -165,6 +222,26 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv vs2, 32(r4)
; BE-PAIRED-NEXT: blr
;
+; BE-PAIRED-WACC-LABEL: testXLdSt:
+; BE-PAIRED-WACC: # %bb.0: # %entry
+; BE-PAIRED-WACC-NEXT: addis r5, r2, f@toc@ha
+; BE-PAIRED-WACC-NEXT: addi r5, r5, f@toc@l
+; BE-PAIRED-WACC-NEXT: sldi r3, r3, 6
+; BE-PAIRED-WACC-NEXT: add r6, r5, r3
+; BE-PAIRED-WACC-NEXT: lxvx v2, r5, r3
+; BE-PAIRED-WACC-NEXT: lxv v5, 48(r6)
+; BE-PAIRED-WACC-NEXT: lxv v3, 16(r6)
+; BE-PAIRED-WACC-NEXT: lxv v4, 32(r6)
+; BE-PAIRED-WACC-NEXT: sldi r3, r4, 6
+; BE-PAIRED-WACC-NEXT: add r4, r5, r3
+; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; BE-PAIRED-WACC-NEXT: stxvx v2, r5, r3
+; BE-PAIRED-WACC-NEXT: stxv v5, 48(r4)
+; BE-PAIRED-WACC-NEXT: stxv v4, 32(r4)
+; BE-PAIRED-WACC-NEXT: stxv v3, 16(r4)
+; BE-PAIRED-WACC-NEXT: blr
+;
; LE-PWR9-LABEL: testXLdSt:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r5, r2, f@toc@ha
@@ -263,6 +340,20 @@ define dso_local void @testUnalignedLdSt() {
; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+19(0), 1
; LE-PAIRED-NEXT: blr
;
+; LE-PAIRED-WACC-LABEL: testUnalignedLdSt:
+; LE-PAIRED-WACC: # %bb.0: # %entry
+; LE-PAIRED-WACC-NEXT: plxv v3, f@PCREL+11(0), 1
+; LE-PAIRED-WACC-NEXT: plxv v5, f@PCREL+43(0), 1
+; LE-PAIRED-WACC-NEXT: plxv v2, f@PCREL+27(0), 1
+; LE-PAIRED-WACC-NEXT: plxv v4, f@PCREL+59(0), 1
+; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; LE-PAIRED-WACC-NEXT: pstxv v4, f@PCREL+67(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv v5, f@PCREL+51(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv v2, f@PCREL+35(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv v3, f@PCREL+19(0), 1
+; LE-PAIRED-WACC-NEXT: blr
+;
; BE-PAIRED-LABEL: testUnalignedLdSt:
; BE-PAIRED: # %bb.0: # %entry
; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha
@@ -277,6 +368,22 @@ define dso_local void @testUnalignedLdSt() {
; BE-PAIRED-NEXT: pstxv vs2, 51(r3), 0
; BE-PAIRED-NEXT: blr
;
+; BE-PAIRED-WACC-LABEL: testUnalignedLdSt:
+; BE-PAIRED-WACC: # %bb.0: # %entry
+; BE-PAIRED-WACC-NEXT: addis r3, r2, f@toc@ha
+; BE-PAIRED-WACC-NEXT: addi r3, r3, f@toc@l
+; BE-PAIRED-WACC-NEXT: plxv v3, 59(r3), 0
+; BE-PAIRED-WACC-NEXT: plxv v5, 27(r3), 0
+; BE-PAIRED-WACC-NEXT: plxv v2, 43(r3), 0
+; BE-PAIRED-WACC-NEXT: plxv v4, 11(r3), 0
+; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; BE-PAIRED-WACC-NEXT: pstxv v5, 67(r3), 0
+; BE-PAIRED-WACC-NEXT: pstxv v4, 51(r3), 0
+; BE-PAIRED-WACC-NEXT: pstxv v3, 35(r3), 0
+; BE-PAIRED-WACC-NEXT: pstxv v2, 19(r3), 0
+; BE-PAIRED-WACC-NEXT: blr
+;
; LE-PWR9-LABEL: testUnalignedLdSt:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r3, r2, f@toc@ha
@@ -381,6 +488,14 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+64(0), 1
; LE-PAIRED-NEXT: blr
;
+; LE-PAIRED-WACC-LABEL: testLdStPair:
+; LE-PAIRED-WACC: # %bb.0: # %entry
+; LE-PAIRED-WACC-NEXT: plxv vs0, g@PCREL+48(0), 1
+; LE-PAIRED-WACC-NEXT: plxv vs1, g@PCREL+32(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv vs0, g@PCREL+80(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv vs1, g@PCREL+64(0), 1
+; LE-PAIRED-WACC-NEXT: blr
+;
; BE-PAIRED-LABEL: testLdStPair:
; BE-PAIRED: # %bb.0: # %entry
; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha
@@ -391,6 +506,16 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv vs0, 64(r3)
; BE-PAIRED-NEXT: blr
;
+; BE-PAIRED-WACC-LABEL: testLdStPair:
+; BE-PAIRED-WACC: # %bb.0: # %entry
+; BE-PAIRED-WACC-NEXT: addis r3, r2, g@toc@ha
+; BE-PAIRED-WACC-NEXT: addi r3, r3, g@toc@l
+; BE-PAIRED-WACC-NEXT: lxv vs0, 48(r3)
+; BE-PAIRED-WACC-NEXT: lxv vs1, 32(r3)
+; BE-PAIRED-WACC-NEXT: stxv vs0, 80(r3)
+; BE-PAIRED-WACC-NEXT: stxv vs1, 64(r3)
+; BE-PAIRED-WACC-NEXT: blr
+;
; LE-PWR9-LABEL: testLdStPair:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r3, r2, g@toc@ha
@@ -460,6 +585,19 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: stxv vs1, 16(r4)
; LE-PAIRED-NEXT: blr
;
+; LE-PAIRED-WACC-LABEL: testXLdStPair:
+; LE-PAIRED-WACC: # %bb.0: # %entry
+; LE-PAIRED-WACC-NEXT: sldi r3, r3, 5
+; LE-PAIRED-WACC-NEXT: paddi r5, 0, g@PCREL, 1
+; LE-PAIRED-WACC-NEXT: add r6, r5, r3
+; LE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3
+; LE-PAIRED-WACC-NEXT: lxv vs1, 16(r6)
+; LE-PAIRED-WACC-NEXT: sldi r3, r4, 5
+; LE-PAIRED-WACC-NEXT: add r4, r5, r3
+; LE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3
+; LE-PAIRED-WACC-NEXT: stxv vs1, 16(r4)
+; LE-PAIRED-WACC-NEXT: blr
+;
; BE-PAIRED-LABEL: testXLdStPair:
; BE-PAIRED: # %bb.0: # %entry
; BE-PAIRED-NEXT: addis r5, r2, g@toc@ha
@@ -474,6 +612,20 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv vs1, 16(r4)
; BE-PAIRED-NEXT: blr
;
+; BE-PAIRED-WACC-LABEL: testXLdStPair:
+; BE-PAIRED-WACC: # %bb.0: # %entry
+; BE-PAIRED-WACC-NEXT: addis r5, r2, g@toc@ha
+; BE-PAIRED-WACC-NEXT: sldi r3, r3, 5
+; BE-PAIRED-WACC-NEXT: addi r5, r5, g@toc@l
+; BE-PAIRED-WACC-NEXT: add r6, r5, r3
+; BE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3
+; BE-PAIRED-WACC-NEXT: lxv vs1, 16(r6)
+; BE-PAIRED-WACC-NEXT: sldi r3, r4, 5
+; BE-PAIRED-WACC-NEXT: add r4, r5, r3
+; BE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3
+; BE-PAIRED-WACC-NEXT: stxv vs1, 16(r4)
+; BE-PAIRED-WACC-NEXT: blr
+;
; LE-PWR9-LABEL: testXLdStPair:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r5, r2, g@toc@ha
@@ -548,6 +700,14 @@ define dso_local void @testUnalignedLdStPair() {
; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+19(0), 1
; LE-PAIRED-NEXT: blr
;
+; LE-PAIRED-WACC-LABEL: testUnalignedLdStPair:
+; LE-PAIRED-WACC: # %bb.0: # %entry
+; LE-PAIRED-WACC-NEXT: plxv vs0, g@PCREL+27(0), 1
+; LE-PAIRED-WACC-NEXT: plxv vs1, g@PCREL+11(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv vs0, g@PCREL+35(0), 1
+; LE-PAIRED-WACC-NEXT: pstxv vs1, g@PCREL+19(0), 1
+; LE-PAIRED-WACC-NEXT: blr
+;
; BE-PAIRED-LABEL: testUnalignedLdStPair:
; BE-PAIRED: # %bb.0: # %entry
; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha
@@ -558,6 +718,16 @@ define dso_local void @testUnalignedLdStPair() {
; BE-PAIRED-NEXT: pstxv vs0, 19(r3), 0
; BE-PAIRED-NEXT: blr
;
+; BE-PAIRED-WACC-LABEL: testUnalignedLdStPair:
+; BE-PAIRED-WACC: # %bb.0: # %entry
+; BE-PAIRED-WACC-NEXT: addis r3, r2, g@toc@ha
+; BE-PAIRED-WACC-NEXT: addi r3, r3, g@toc@l
+; BE-PAIRED-WACC-NEXT: plxv vs0, 27(r3), 0
+; BE-PAIRED-WACC-NEXT: plxv vs1, 11(r3), 0
+; BE-PAIRED-WACC-NEXT: pstxv vs0, 35(r3), 0
+; BE-PAIRED-WACC-NEXT: pstxv vs1, 19(r3), 0
+; BE-PAIRED-WACC-NEXT: blr
+;
; LE-PWR9-LABEL: testUnalignedLdStPair:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r3, r2, g@toc@ha
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index abc65be..9db8ba1 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -13,6 +13,13 @@
; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC
+
declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare void @foo()
@@ -119,6 +126,101 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: ld r0, 16(r1)
; CHECK-BE-NEXT: mtlr r0
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: intrinsics1:
+; CHECK-LE-WACC: # %bb.0:
+; CHECK-LE-WACC-NEXT: mflr r0
+; CHECK-LE-WACC-NEXT: std r0, 16(r1)
+; CHECK-LE-WACC-NEXT: stdu r1, -176(r1)
+; CHECK-LE-WACC-NEXT: .cfi_def_cfa_offset 176
+; CHECK-LE-WACC-NEXT: .cfi_offset lr, 16
+; CHECK-LE-WACC-NEXT: .cfi_offset r30, -16
+; CHECK-LE-WACC-NEXT: .cfi_offset v28, -80
+; CHECK-LE-WACC-NEXT: .cfi_offset v29, -64
+; CHECK-LE-WACC-NEXT: .cfi_offset v30, -48
+; CHECK-LE-WACC-NEXT: .cfi_offset v31, -32
+; CHECK-LE-WACC-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
+; CHECK-LE-WACC-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
+; CHECK-LE-WACC-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
+; CHECK-LE-WACC-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
+; CHECK-LE-WACC-NEXT: vmr v31, v5
+; CHECK-LE-WACC-NEXT: vmr v29, v3
+; CHECK-LE-WACC-NEXT: vmr v30, v4
+; CHECK-LE-WACC-NEXT: vmr v28, v2
+; CHECK-LE-WACC-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; CHECK-LE-WACC-NEXT: ld r30, 272(r1)
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp60, vsp62, 0
+; CHECK-LE-WACC-NEXT: xvf16ger2pp wacc0, v2, v4
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxvp vsp36, 64(r1)
+; CHECK-LE-WACC-NEXT: stxvp vsp34, 32(r1)
+; CHECK-LE-WACC-NEXT: bl foo@notoc
+; CHECK-LE-WACC-NEXT: lxvp vsp34, 64(r1)
+; CHECK-LE-WACC-NEXT: lxvp vsp36, 32(r1)
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-LE-WACC-NEXT: xvf16ger2pp wacc0, v28, v30
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r30)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r30)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r30)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r30)
+; CHECK-LE-WACC-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
+; CHECK-LE-WACC-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
+; CHECK-LE-WACC-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
+; CHECK-LE-WACC-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
+; CHECK-LE-WACC-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; CHECK-LE-WACC-NEXT: addi r1, r1, 176
+; CHECK-LE-WACC-NEXT: ld r0, 16(r1)
+; CHECK-LE-WACC-NEXT: mtlr r0
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: intrinsics1:
+; CHECK-BE-WACC: # %bb.0:
+; CHECK-BE-WACC-NEXT: mflr r0
+; CHECK-BE-WACC-NEXT: std r0, 16(r1)
+; CHECK-BE-WACC-NEXT: stdu r1, -256(r1)
+; CHECK-BE-WACC-NEXT: .cfi_def_cfa_offset 256
+; CHECK-BE-WACC-NEXT: .cfi_offset lr, 16
+; CHECK-BE-WACC-NEXT: .cfi_offset r30, -16
+; CHECK-BE-WACC-NEXT: .cfi_offset v28, -80
+; CHECK-BE-WACC-NEXT: .cfi_offset v29, -64
+; CHECK-BE-WACC-NEXT: .cfi_offset v30, -48
+; CHECK-BE-WACC-NEXT: .cfi_offset v31, -32
+; CHECK-BE-WACC-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-WACC-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-WACC-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-WACC-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-WACC-NEXT: vmr v31, v5
+; CHECK-BE-WACC-NEXT: vmr v29, v3
+; CHECK-BE-WACC-NEXT: vmr v30, v4
+; CHECK-BE-WACC-NEXT: vmr v28, v2
+; CHECK-BE-WACC-NEXT: std r30, 240(r1) # 8-byte Folded Spill
+; CHECK-BE-WACC-NEXT: ld r30, 368(r1)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp60, vsp62, 0
+; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v2, v4
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxvp vsp36, 112(r1)
+; CHECK-BE-WACC-NEXT: stxvp vsp34, 144(r1)
+; CHECK-BE-WACC-NEXT: bl foo
+; CHECK-BE-WACC-NEXT: nop
+; CHECK-BE-WACC-NEXT: lxvp vsp34, 112(r1)
+; CHECK-BE-WACC-NEXT: lxvp vsp36, 144(r1)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v28, v30
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r30)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r30)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r30)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r30)
+; CHECK-BE-WACC-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
+; CHECK-BE-WACC-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
+; CHECK-BE-WACC-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
+; CHECK-BE-WACC-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
+; CHECK-BE-WACC-NEXT: ld r30, 240(r1) # 8-byte Folded Reload
+; CHECK-BE-WACC-NEXT: addi r1, r1, 256
+; CHECK-BE-WACC-NEXT: ld r0, 16(r1)
+; CHECK-BE-WACC-NEXT: mtlr r0
+; CHECK-BE-WACC-NEXT: blr
%1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
%2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3)
tail call void @foo()
diff --git a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll
index e932aec..7b36fa4 100644
--- a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC
; Function Attrs: nofree nounwind writeonly
define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
@@ -27,6 +33,26 @@ define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: test1:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: xvi16ger2 wacc0, v2, v2
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test1:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: xvi16ger2 wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %resp, align 64
@@ -57,6 +83,26 @@ define dso_local void @test2(ptr nocapture readnone %vqp, ptr nocapture readnone
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: test2:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: pmxvi16ger2 wacc0, v2, v2, 0, 0, 0
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test2:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: pmxvi16ger2 wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
store <512 x i1> %0, ptr %resp, align 64
@@ -97,6 +143,36 @@ define dso_local void @test3(ptr nocapture readonly %vqp, ptr nocapture readnone
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: test3:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-LE-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-LE-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-LE-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-LE-WACC-NEXT: xvi8ger4spp wacc0, v2, v2
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test3:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi8ger4spp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -138,6 +214,36 @@ define dso_local void @test4(ptr nocapture readonly %vqp, ptr nocapture readnone
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: test4:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-LE-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-LE-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-LE-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-LE-WACC-NEXT: xvi16ger2pp wacc0, v2, v2
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test4:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi16ger2pp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -179,6 +285,36 @@ define dso_local void @test5(ptr nocapture readonly %vqp, ptr nocapture readnone
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: test5:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-LE-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-LE-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-LE-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-LE-WACC-NEXT: pmxvi8ger4spp wacc0, v2, v2, 0, 0, 0
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test5:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvi8ger4spp wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -220,6 +356,36 @@ define dso_local void @test6(ptr nocapture readonly %vqp, ptr nocapture readnone
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-WACC-LABEL: test6:
+; CHECK-LE-WACC: # %bb.0: # %entry
+; CHECK-LE-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-LE-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-LE-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-LE-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-LE-WACC-NEXT: pmxvi16ger2pp wacc0, v2, v2, 0, 0, 0
+; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-LE-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-LE-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-LE-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-LE-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-LE-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test6:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvi16ger2pp wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 8fbc9d7..3505cbb 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC
; assemble_acc
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
@@ -32,6 +38,28 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: ass_acc:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: vmr v3, v2
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: ass_acc:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: vmr v3, v2
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %ptr, align 64
@@ -66,6 +94,28 @@ define void @int_xxmtacc(ptr %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: int_xxmtacc:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: vmr v3, v2
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: int_xxmtacc:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: vmr v3, v2
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is
; generated from the call to xxmtacc then one xxmfacc is generated for the store
@@ -101,6 +151,28 @@ define void @int_xxmfacc(ptr %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: int_xxmfacc:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: vmr v3, v2
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: int_xxmfacc:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: vmr v3, v2
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is
; generated from the call to xxmfacc then one xxmfacc is generated for the store
@@ -132,6 +204,26 @@ define void @int_xxsetaccz(ptr %ptr) {
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: int_xxsetaccz:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: int_xxsetaccz:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
store <512 x i1> %0, ptr %ptr, align 64
@@ -160,6 +252,26 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
; CHECK-BE-NEXT: stxv vs2, 0(r5)
; CHECK-BE-NEXT: stxv vs3, 0(r6)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: disass_acc:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v5, 0(r3)
+; CHECK-WACC-NEXT: stxv v4, 0(r4)
+; CHECK-WACC-NEXT: stxv v3, 0(r5)
+; CHECK-WACC-NEXT: stxv v2, 0(r6)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: disass_acc:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 0(r4)
+; CHECK-BE-WACC-NEXT: stxv v4, 0(r5)
+; CHECK-BE-WACC-NEXT: stxv v5, 0(r6)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
%1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
@@ -219,6 +331,50 @@ define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) {
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testBranch:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: cmplwi r7, 0
+; CHECK-WACC-NEXT: beq cr0, .LBB5_2
+; CHECK-WACC-NEXT: # %bb.1: # %if.then
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: b .LBB5_3
+; CHECK-WACC-NEXT: .LBB5_2: # %if.else
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v2
+; CHECK-WACC-NEXT: .LBB5_3: # %if.end
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testBranch:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: cmplwi r7, 0
+; CHECK-BE-WACC-NEXT: beq cr0, .LBB5_2
+; CHECK-BE-WACC-NEXT: # %bb.1: # %if.then
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: b .LBB5_3
+; CHECK-BE-WACC-NEXT: .LBB5_2: # %if.else
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: .LBB5_3: # %if.end
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%tobool = icmp eq i32 %val, 0
br i1 %tobool, label %if.else, label %if.then
@@ -273,6 +429,36 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-BE-NEXT: stxv vs3, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 96(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testcse:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: stxv v4, 112(r3)
+; CHECK-WACC-NEXT: stxv v5, 96(r3)
+; CHECK-WACC-NEXT: stxv v2, 80(r3)
+; CHECK-WACC-NEXT: stxv v3, 64(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testcse:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: stxv v5, 112(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 96(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 80(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 64(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
%1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
@@ -320,6 +506,42 @@ define void @testcse2(ptr %res, <16 x i8> %vc) {
; CHECK-BE-NEXT: stxv vs3, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 96(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testcse2:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: dmxxsetaccz wacc1
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: xvf32gerpp wacc1, v2, v2
+; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 112(r3)
+; CHECK-WACC-NEXT: stxv v5, 96(r3)
+; CHECK-WACC-NEXT: stxv v2, 80(r3)
+; CHECK-WACC-NEXT: stxv v3, 64(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testcse2:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc1, v2, v2
+; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 112(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 96(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 80(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 64(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
%1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
@@ -367,6 +589,42 @@ define void @testcse3(ptr %res, <16 x i8> %vc) {
; CHECK-BE-NEXT: stxv vs3, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 96(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testcse3:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: dmxxsetaccz wacc1
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: xvf32gerpp wacc1, v2, v2
+; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 112(r3)
+; CHECK-WACC-NEXT: stxv v5, 96(r3)
+; CHECK-WACC-NEXT: stxv v2, 80(r3)
+; CHECK-WACC-NEXT: stxv v3, 64(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testcse3:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc1, v2, v2
+; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 112(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 96(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 80(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 64(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
%1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -475,6 +733,104 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
; CHECK-BE-NEXT: bdnz .LBB9_2
; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testcse4:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: cmpwi r4, 1
+; CHECK-WACC-NEXT: bltlr cr0
+; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-WACC-NEXT: clrldi r4, r4, 32
+; CHECK-WACC-NEXT: mtctr r4
+; CHECK-WACC-NEXT: li r4, 0
+; CHECK-WACC-NEXT: li r6, 0
+; CHECK-WACC-NEXT: .p2align 4
+; CHECK-WACC-NEXT: .LBB9_2: # %for.body
+; CHECK-WACC-NEXT: #
+; CHECK-WACC-NEXT: rldic r7, r6, 4, 28
+; CHECK-WACC-NEXT: add r8, r5, r7
+; CHECK-WACC-NEXT: lxvx vs0, r5, r7
+; CHECK-WACC-NEXT: lxv vs1, 16(r8)
+; CHECK-WACC-NEXT: dmxxsetaccz wacc2
+; CHECK-WACC-NEXT: dmxxsetaccz wacc1
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1
+; CHECK-WACC-NEXT: lxv vs0, 32(r8)
+; CHECK-WACC-NEXT: lxv vs1, 48(r8)
+; CHECK-WACC-NEXT: rldic r7, r4, 6, 26
+; CHECK-WACC-NEXT: addi r4, r4, 3
+; CHECK-WACC-NEXT: addi r6, r6, 6
+; CHECK-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1
+; CHECK-WACC-NEXT: lxv vs0, 64(r8)
+; CHECK-WACC-NEXT: lxv vs1, 80(r8)
+; CHECK-WACC-NEXT: add r8, r3, r7
+; CHECK-WACC-NEXT: xvf32gernp wacc0, vs0, vs1
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0
+; CHECK-WACC-NEXT: stxvx v3, r3, r7
+; CHECK-WACC-NEXT: stxv v4, 48(r8)
+; CHECK-WACC-NEXT: stxv v5, 32(r8)
+; CHECK-WACC-NEXT: stxv v2, 16(r8)
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-WACC-NEXT: stxv v4, 112(r8)
+; CHECK-WACC-NEXT: stxv v5, 96(r8)
+; CHECK-WACC-NEXT: stxv v2, 80(r8)
+; CHECK-WACC-NEXT: stxv v3, 64(r8)
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 176(r8)
+; CHECK-WACC-NEXT: stxv v5, 160(r8)
+; CHECK-WACC-NEXT: stxv v2, 144(r8)
+; CHECK-WACC-NEXT: stxv v3, 128(r8)
+; CHECK-WACC-NEXT: bdnz .LBB9_2
+; CHECK-WACC-NEXT: # %bb.3: # %for.cond.cleanup
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testcse4:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: cmpwi r4, 1
+; CHECK-BE-WACC-NEXT: bltlr cr0
+; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-BE-WACC-NEXT: clrldi r4, r4, 32
+; CHECK-BE-WACC-NEXT: mtctr r4
+; CHECK-BE-WACC-NEXT: li r4, 0
+; CHECK-BE-WACC-NEXT: li r6, 0
+; CHECK-BE-WACC-NEXT: .p2align 4
+; CHECK-BE-WACC-NEXT: .LBB9_2: # %for.body
+; CHECK-BE-WACC-NEXT: #
+; CHECK-BE-WACC-NEXT: rldic r7, r6, 4, 28
+; CHECK-BE-WACC-NEXT: add r8, r5, r7
+; CHECK-BE-WACC-NEXT: lxvx vs0, r5, r7
+; CHECK-BE-WACC-NEXT: lxv vs1, 16(r8)
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc2
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1
+; CHECK-BE-WACC-NEXT: lxv vs0, 32(r8)
+; CHECK-BE-WACC-NEXT: lxv vs1, 48(r8)
+; CHECK-BE-WACC-NEXT: rldic r7, r4, 6, 26
+; CHECK-BE-WACC-NEXT: addi r4, r4, 3
+; CHECK-BE-WACC-NEXT: addi r6, r6, 6
+; CHECK-BE-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1
+; CHECK-BE-WACC-NEXT: lxv vs0, 64(r8)
+; CHECK-BE-WACC-NEXT: lxv vs1, 80(r8)
+; CHECK-BE-WACC-NEXT: add r8, r3, r7
+; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, vs0, vs1
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0
+; CHECK-BE-WACC-NEXT: stxvx v2, r3, r7
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r8)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r8)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r8)
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 112(r8)
+; CHECK-BE-WACC-NEXT: stxv v4, 96(r8)
+; CHECK-BE-WACC-NEXT: stxv v3, 80(r8)
+; CHECK-BE-WACC-NEXT: stxv v2, 64(r8)
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 176(r8)
+; CHECK-BE-WACC-NEXT: stxv v4, 160(r8)
+; CHECK-BE-WACC-NEXT: stxv v3, 144(r8)
+; CHECK-BE-WACC-NEXT: stxv v2, 128(r8)
+; CHECK-BE-WACC-NEXT: bdnz .LBB9_2
+; CHECK-BE-WACC-NEXT: # %bb.3: # %for.cond.cleanup
+; CHECK-BE-WACC-NEXT: blr
entry:
%cmp55 = icmp sgt i32 %lim, 0
br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup
@@ -600,6 +956,71 @@ define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind {
; CHECK-BE-NEXT: ld r0, 16(r1)
; CHECK-BE-NEXT: mtlr r0
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testRedundantPrimeUnprime:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: mflr r0
+; CHECK-WACC-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-WACC-NEXT: std r0, 16(r1)
+; CHECK-WACC-NEXT: stdu r1, -112(r1)
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-WACC-NEXT: stxv v0, 48(r3)
+; CHECK-WACC-NEXT: stxv v1, 32(r3)
+; CHECK-WACC-NEXT: stxv v4, 16(r3)
+; CHECK-WACC-NEXT: stxv v5, 0(r3)
+; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-WACC-NEXT: mr r30, r3
+; CHECK-WACC-NEXT: stxvp vsp36, 64(r1)
+; CHECK-WACC-NEXT: stxvp vsp34, 32(r1)
+; CHECK-WACC-NEXT: bl testRedundantPrimeUnprimeF@notoc
+; CHECK-WACC-NEXT: lxvp vsp34, 64(r1)
+; CHECK-WACC-NEXT: lxvp vsp36, 32(r1)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 112(r30)
+; CHECK-WACC-NEXT: stxv v5, 96(r30)
+; CHECK-WACC-NEXT: stxv v2, 80(r30)
+; CHECK-WACC-NEXT: stxv v3, 64(r30)
+; CHECK-WACC-NEXT: addi r1, r1, 112
+; CHECK-WACC-NEXT: ld r0, 16(r1)
+; CHECK-WACC-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-WACC-NEXT: mtlr r0
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testRedundantPrimeUnprime:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: mflr r0
+; CHECK-BE-WACC-NEXT: std r0, 16(r1)
+; CHECK-BE-WACC-NEXT: stdu r1, -192(r1)
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: std r30, 176(r1) # 8-byte Folded Spill
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v1, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v0, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v5, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 0(r3)
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-BE-WACC-NEXT: mr r30, r3
+; CHECK-BE-WACC-NEXT: stxvp vsp36, 112(r1)
+; CHECK-BE-WACC-NEXT: stxvp vsp34, 144(r1)
+; CHECK-BE-WACC-NEXT: bl testRedundantPrimeUnprimeF
+; CHECK-BE-WACC-NEXT: nop
+; CHECK-BE-WACC-NEXT: lxvp vsp34, 112(r1)
+; CHECK-BE-WACC-NEXT: lxvp vsp36, 144(r1)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 112(r30)
+; CHECK-BE-WACC-NEXT: stxv v4, 96(r30)
+; CHECK-BE-WACC-NEXT: stxv v3, 80(r30)
+; CHECK-BE-WACC-NEXT: stxv v2, 64(r30)
+; CHECK-BE-WACC-NEXT: ld r30, 176(r1) # 8-byte Folded Reload
+; CHECK-BE-WACC-NEXT: addi r1, r1, 192
+; CHECK-BE-WACC-NEXT: ld r0, 16(r1)
+; CHECK-BE-WACC-NEXT: mtlr r0
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
store <512 x i1> %0, ptr %dst, align 64
@@ -646,6 +1067,38 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test_ldst_1:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: plxvp vsp36, 8(r4), 0
+; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test_ldst_1:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: plxvp vsp36, 8(r4), 0
+; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = getelementptr i8, ptr %vpp, i64 8
@@ -688,6 +1141,38 @@ define void @test_ldst_2(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test_ldst_2:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxvp vsp36, 0(r4)
+; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test_ldst_2:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp)
@@ -729,6 +1214,38 @@ define void @test_ldst_3(ptr nocapture readonly %vqp, i64 %offs, ptr %vpp, <16 x
; CHECK-BE-NEXT: stxv vs3, 48(r9)
; CHECK-BE-NEXT: stxv vs2, 32(r9)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test_ldst_3:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxvp vsp36, 0(r5)
+; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r9)
+; CHECK-WACC-NEXT: stxv v5, 32(r9)
+; CHECK-WACC-NEXT: stxv v2, 16(r9)
+; CHECK-WACC-NEXT: stxv v3, 0(r9)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test_ldst_3:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxvp vsp36, 0(r5)
+; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r9)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r9)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r9)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r9)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp)
diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index ac6ad41..ff860b8 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
@@ -56,6 +62,46 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: intrinsics1:
+; CHECK-WACC: # %bb.0:
+; CHECK-WACC-NEXT: vmr v1, v4
+; CHECK-WACC-NEXT: vmr v4, v3
+; CHECK-WACC-NEXT: vmr v0, v2
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v4
+; CHECK-WACC-NEXT: ld r3, 96(r1)
+; CHECK-WACC-NEXT: xvf16ger2pp wacc0, v0, v1
+; CHECK-WACC-NEXT: vmr v3, v2
+; CHECK-WACC-NEXT: vmr v2, v5
+; CHECK-WACC-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0
+; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r3)
+; CHECK-WACC-NEXT: stxv v5, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: intrinsics1:
+; CHECK-BE-WACC: # %bb.0:
+; CHECK-BE-WACC-NEXT: vmr v1, v4
+; CHECK-BE-WACC-NEXT: vmr v4, v3
+; CHECK-BE-WACC-NEXT: vmr v0, v2
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v4
+; CHECK-BE-WACC-NEXT: ld r3, 112(r1)
+; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v0, v1
+; CHECK-BE-WACC-NEXT: vmr v3, v2
+; CHECK-BE-WACC-NEXT: vmr v2, v5
+; CHECK-BE-WACC-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0
+; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
%1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc3, <16 x i8> %vc2, <16 x i8> %vc4)
%2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
%3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
@@ -115,6 +161,46 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) {
; CHECK-BE-NEXT: stxv vs2, 0(r5)
; CHECK-BE-NEXT: stxv vs3, 0(r6)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: intrinsics2:
+; CHECK-WACC: # %bb.0:
+; CHECK-WACC-NEXT: lxv v2, 0(r3)
+; CHECK-WACC-NEXT: lxv v4, 0(r5)
+; CHECK-WACC-NEXT: lxv v3, 0(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r6)
+; CHECK-WACC-NEXT: vmr v1, v2
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-WACC-NEXT: xvi8ger4pp wacc0, v2, v3
+; CHECK-WACC-NEXT: xvf16ger2pn wacc0, v2, v4
+; CHECK-WACC-NEXT: vmr v0, v5
+; CHECK-WACC-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0
+; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v5, 0(r3)
+; CHECK-WACC-NEXT: stxv v4, 0(r4)
+; CHECK-WACC-NEXT: stxv v3, 0(r5)
+; CHECK-WACC-NEXT: stxv v2, 0(r6)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: intrinsics2:
+; CHECK-BE-WACC: # %bb.0:
+; CHECK-BE-WACC-NEXT: lxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r5)
+; CHECK-BE-WACC-NEXT: lxv v3, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 0(r6)
+; CHECK-BE-WACC-NEXT: vmr v1, v2
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi8ger4pp wacc0, v2, v3
+; CHECK-BE-WACC-NEXT: xvf16ger2pn wacc0, v2, v4
+; CHECK-BE-WACC-NEXT: vmr v0, v5
+; CHECK-BE-WACC-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0
+; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 0(r4)
+; CHECK-BE-WACC-NEXT: stxv v4, 0(r5)
+; CHECK-BE-WACC-NEXT: stxv v5, 0(r6)
+; CHECK-BE-WACC-NEXT: blr
%vc1 = load <16 x i8>, ptr %ptr1, align 16
%vc2 = load <16 x i8>, ptr %ptr2, align 16
%vc3 = load <16 x i8>, ptr %ptr3, align 16
@@ -157,6 +243,26 @@ define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test1:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: xvi4ger8 wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test1:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: xvi4ger8 wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %resp, align 64
@@ -196,6 +302,36 @@ define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test2:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test2:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -226,6 +362,26 @@ define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test3:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test3:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
store <512 x i1> %0, ptr %resp, align 64
@@ -265,6 +421,36 @@ define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test4:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test4:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -295,6 +481,26 @@ define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test5:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: xvi8ger4 wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test5:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: xvi8ger4 wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %resp, align 64
@@ -334,6 +540,36 @@ define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test6:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvi8ger4pp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test6:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi8ger4pp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -364,6 +600,26 @@ define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test7:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test7:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
store <512 x i1> %0, ptr %resp, align 64
@@ -403,6 +659,36 @@ define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test8:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test8:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -433,6 +719,26 @@ define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test9:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: xvi16ger2s wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test9:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: xvi16ger2s wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %resp, align 64
@@ -472,6 +778,36 @@ define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test10:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvi16ger2spp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test10:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvi16ger2spp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -502,6 +838,26 @@ define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test11:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test11:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
store <512 x i1> %0, ptr %resp, align 64
@@ -541,6 +897,36 @@ define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test12:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test12:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -571,6 +957,26 @@ define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test13:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: xvf16ger2 wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test13:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: xvf16ger2 wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %resp, align 64
@@ -610,6 +1016,36 @@ define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test14:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf16ger2pp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test14:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -650,6 +1086,36 @@ define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test15:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf16ger2pn wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test15:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf16ger2pn wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -690,6 +1156,36 @@ define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test16:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf16ger2np wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test16:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf16ger2np wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -730,6 +1226,36 @@ define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test17:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf16ger2nn wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test17:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf16ger2nn wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -760,6 +1286,26 @@ define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test18:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test18:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
store <512 x i1> %0, ptr %resp, align 64
@@ -799,6 +1345,36 @@ define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test19:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test19:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -839,6 +1415,36 @@ define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test20:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test20:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -879,6 +1485,36 @@ define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test21:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test21:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -919,6 +1555,36 @@ define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test22:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test22:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
@@ -949,6 +1615,26 @@ define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test23:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: xvf32ger wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test23:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: xvf32ger wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %vc, <16 x i8> %vc)
store <512 x i1> %0, ptr %resp, align 64
@@ -988,6 +1674,36 @@ define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test24:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test24:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -1028,6 +1744,36 @@ define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test25:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test25:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -1068,6 +1814,36 @@ define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test26:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test26:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -1108,6 +1884,36 @@ define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test27:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: xvf32gernn wacc0, v2, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test27:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: xvf32gernn wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
@@ -1138,6 +1944,26 @@ define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test28:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test28:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
store <512 x i1> %0, ptr %resp, align 64
@@ -1177,6 +2003,36 @@ define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test29:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test29:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
@@ -1217,6 +2073,36 @@ define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test30:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test30:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
@@ -1257,6 +2143,36 @@ define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test31:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test31:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
@@ -1297,6 +2213,36 @@ define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test32:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test32:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
@@ -1331,6 +2277,30 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test33:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: xvf64ger wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test33:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <256 x i1>, ptr %vpp, align 32
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %0, <16 x i8> %vc)
@@ -1375,6 +2345,40 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test34:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test34:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1420,6 +2424,40 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test35:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: xvf64gerpn wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test35:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: xvf64gerpn wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1465,6 +2503,40 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test36:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test36:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1510,6 +2582,40 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test37:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: xvf64gernn wacc0, vsp36, v2
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test37:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: xvf64gernn wacc0, vsp36, v2
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1545,6 +2651,30 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test38:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test38:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <256 x i1>, ptr %vpp, align 32
%1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %0, <16 x i8> %vc, i32 0, i32 0)
@@ -1589,6 +2719,40 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test39:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test39:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1634,6 +2798,40 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test40:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test40:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1679,6 +2877,40 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test41:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test41:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
@@ -1724,6 +2956,40 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-NEXT: stxv vs3, 48(r7)
; CHECK-BE-NEXT: stxv vs2, 32(r7)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: test42:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v5, 0(r3)
+; CHECK-WACC-NEXT: lxv v1, 32(r3)
+; CHECK-WACC-NEXT: lxv v4, 16(r3)
+; CHECK-WACC-NEXT: lxv v0, 48(r3)
+; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-WACC-NEXT: lxv v4, 16(r4)
+; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r7)
+; CHECK-WACC-NEXT: stxv v5, 32(r7)
+; CHECK-WACC-NEXT: stxv v2, 16(r7)
+; CHECK-WACC-NEXT: stxv v3, 0(r7)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: test42:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
+; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
+; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r7)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r7)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <512 x i1>, ptr %vqp, align 64
%1 = load <256 x i1>, ptr %vpp, align 32
diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
index 89e5147..37d0e69 100644
--- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
@@ -5,6 +5,12 @@
; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC
+; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC
declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
@@ -64,6 +70,60 @@ define void @testPHI1(ptr %Dst, ptr %Src, i32 signext %Len) {
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testPHI1:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: cmpwi r5, 3
+; CHECK-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-WACC-NEXT: blt cr0, .LBB0_3
+; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-WACC-NEXT: clrldi r5, r5, 32
+; CHECK-WACC-NEXT: addi r5, r5, -2
+; CHECK-WACC-NEXT: lxv v2, 0(r4)
+; CHECK-WACC-NEXT: lxv v3, 16(r4)
+; CHECK-WACC-NEXT: mtctr r5
+; CHECK-WACC-NEXT: addi r4, r4, 32
+; CHECK-WACC-NEXT: .p2align 4
+; CHECK-WACC-NEXT: .LBB0_2: # %for.body
+; CHECK-WACC-NEXT: #
+; CHECK-WACC-NEXT: lxv vs0, 0(r4)
+; CHECK-WACC-NEXT: addi r4, r4, 16
+; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0
+; CHECK-WACC-NEXT: bdnz .LBB0_2
+; CHECK-WACC-NEXT: .LBB0_3: # %for.cond.cleanup
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v5, 0(r3)
+; CHECK-WACC-NEXT: stxv v4, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 48(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testPHI1:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: cmpwi r5, 3
+; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
+; CHECK-BE-WACC-NEXT: blt cr0, .LBB0_3
+; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-BE-WACC-NEXT: clrldi r5, r5, 32
+; CHECK-BE-WACC-NEXT: addi r5, r5, -2
+; CHECK-BE-WACC-NEXT: lxv v2, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v3, 16(r4)
+; CHECK-BE-WACC-NEXT: mtctr r5
+; CHECK-BE-WACC-NEXT: addi r4, r4, 32
+; CHECK-BE-WACC-NEXT: .p2align 4
+; CHECK-BE-WACC-NEXT: .LBB0_2: # %for.body
+; CHECK-BE-WACC-NEXT: #
+; CHECK-BE-WACC-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-WACC-NEXT: addi r4, r4, 16
+; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0
+; CHECK-BE-WACC-NEXT: bdnz .LBB0_2
+; CHECK-BE-WACC-NEXT: .LBB0_3: # %for.cond.cleanup
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %Src, align 16
%arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1
@@ -161,6 +221,62 @@ define dso_local void @testPHI2(ptr %Dst, ptr %Src, i32 signext %Len) {
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testPHI2:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: lxv v2, 0(r4)
+; CHECK-WACC-NEXT: lxv v3, 16(r4)
+; CHECK-WACC-NEXT: lxv vs0, 32(r4)
+; CHECK-WACC-NEXT: cmpwi r5, 4
+; CHECK-WACC-NEXT: xvf64ger wacc0, vsp34, vs0
+; CHECK-WACC-NEXT: blt cr0, .LBB1_3
+; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-WACC-NEXT: clrldi r5, r5, 32
+; CHECK-WACC-NEXT: addi r5, r5, -3
+; CHECK-WACC-NEXT: mtctr r5
+; CHECK-WACC-NEXT: addi r4, r4, 48
+; CHECK-WACC-NEXT: .p2align 4
+; CHECK-WACC-NEXT: .LBB1_2: # %for.body
+; CHECK-WACC-NEXT: #
+; CHECK-WACC-NEXT: lxv vs0, 0(r4)
+; CHECK-WACC-NEXT: addi r4, r4, 16
+; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0
+; CHECK-WACC-NEXT: bdnz .LBB1_2
+; CHECK-WACC-NEXT: .LBB1_3: # %for.cond.cleanup
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v5, 0(r3)
+; CHECK-WACC-NEXT: stxv v4, 16(r3)
+; CHECK-WACC-NEXT: stxv v3, 32(r3)
+; CHECK-WACC-NEXT: stxv v2, 48(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testPHI2:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: lxv v2, 0(r4)
+; CHECK-BE-WACC-NEXT: lxv v3, 16(r4)
+; CHECK-BE-WACC-NEXT: lxv vs0, 32(r4)
+; CHECK-BE-WACC-NEXT: cmpwi r5, 4
+; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp34, vs0
+; CHECK-BE-WACC-NEXT: blt cr0, .LBB1_3
+; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-BE-WACC-NEXT: clrldi r5, r5, 32
+; CHECK-BE-WACC-NEXT: addi r5, r5, -3
+; CHECK-BE-WACC-NEXT: mtctr r5
+; CHECK-BE-WACC-NEXT: addi r4, r4, 48
+; CHECK-BE-WACC-NEXT: .p2align 4
+; CHECK-BE-WACC-NEXT: .LBB1_2: # %for.body
+; CHECK-BE-WACC-NEXT: #
+; CHECK-BE-WACC-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-WACC-NEXT: addi r4, r4, 16
+; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0
+; CHECK-BE-WACC-NEXT: bdnz .LBB1_2
+; CHECK-BE-WACC-NEXT: .LBB1_3: # %for.cond.cleanup
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r3)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r3)
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r3)
+; CHECK-BE-WACC-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %Src, align 16
%arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1
@@ -229,6 +345,28 @@ define void @testImplicitDef(ptr %ptr) {
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testImplicitDef:
+; CHECK-WACC: # %bb.0: # %label1
+; CHECK-WACC-NEXT: # implicit-def: $wacc0
+; CHECK-WACC-NEXT: bc 12, 4*cr5+lt, .LBB2_2
+; CHECK-WACC-NEXT: # %bb.1: # %label2
+; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0
+; CHECK-WACC-NEXT: .LBB2_2: # %label3
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: stxv v2, 0(r3)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testImplicitDef:
+; CHECK-BE-WACC: # %bb.0: # %label1
+; CHECK-BE-WACC-NEXT: # implicit-def: $wacc0
+; CHECK-BE-WACC-NEXT: bc 12, 4*cr5+lt, .LBB2_2
+; CHECK-BE-WACC-NEXT: # %bb.1: # %label2
+; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0
+; CHECK-BE-WACC-NEXT: .LBB2_2: # %label3
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 0(r3)
+; CHECK-BE-WACC-NEXT: blr
label1:
br i1 undef, label %label3, label %label2
@@ -312,6 +450,70 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
; CHECK-BE-NEXT: stxv vs3, 48(r5)
; CHECK-BE-NEXT: stxv vs2, 32(r5)
; CHECK-BE-NEXT: blr
+;
+; CHECK-WACC-LABEL: testNestedPHI:
+; CHECK-WACC: # %bb.0: # %entry
+; CHECK-WACC-NEXT: cmplwi r3, 0
+; CHECK-WACC-NEXT: beq cr0, .LBB3_2
+; CHECK-WACC-NEXT: # %bb.1: # %if.then
+; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2
+; CHECK-WACC-NEXT: cmpwi r4, 1
+; CHECK-WACC-NEXT: bge cr0, .LBB3_3
+; CHECK-WACC-NEXT: b .LBB3_5
+; CHECK-WACC-NEXT: .LBB3_2:
+; CHECK-WACC-NEXT: # implicit-def: $wacc0
+; CHECK-WACC-NEXT: cmpwi r4, 1
+; CHECK-WACC-NEXT: blt cr0, .LBB3_5
+; CHECK-WACC-NEXT: .LBB3_3: # %for.body.preheader
+; CHECK-WACC-NEXT: addi r3, r4, -1
+; CHECK-WACC-NEXT: clrldi r3, r3, 32
+; CHECK-WACC-NEXT: addi r3, r3, 1
+; CHECK-WACC-NEXT: mtctr r3
+; CHECK-WACC-NEXT: .p2align 4
+; CHECK-WACC-NEXT: .LBB3_4: # %for.body
+; CHECK-WACC-NEXT: #
+; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2
+; CHECK-WACC-NEXT: bdnz .LBB3_4
+; CHECK-WACC-NEXT: .LBB3_5: # %for.cond.cleanup
+; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-WACC-NEXT: li r3, 0
+; CHECK-WACC-NEXT: stxv v4, 48(r5)
+; CHECK-WACC-NEXT: stxv v5, 32(r5)
+; CHECK-WACC-NEXT: stxv v2, 16(r5)
+; CHECK-WACC-NEXT: stxv v3, 0(r5)
+; CHECK-WACC-NEXT: blr
+;
+; CHECK-BE-WACC-LABEL: testNestedPHI:
+; CHECK-BE-WACC: # %bb.0: # %entry
+; CHECK-BE-WACC-NEXT: cmplwi r3, 0
+; CHECK-BE-WACC-NEXT: beq cr0, .LBB3_2
+; CHECK-BE-WACC-NEXT: # %bb.1: # %if.then
+; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: cmpwi r4, 1
+; CHECK-BE-WACC-NEXT: bge cr0, .LBB3_3
+; CHECK-BE-WACC-NEXT: b .LBB3_5
+; CHECK-BE-WACC-NEXT: .LBB3_2:
+; CHECK-BE-WACC-NEXT: # implicit-def: $wacc0
+; CHECK-BE-WACC-NEXT: cmpwi r4, 1
+; CHECK-BE-WACC-NEXT: blt cr0, .LBB3_5
+; CHECK-BE-WACC-NEXT: .LBB3_3: # %for.body.preheader
+; CHECK-BE-WACC-NEXT: addi r3, r4, -1
+; CHECK-BE-WACC-NEXT: clrldi r3, r3, 32
+; CHECK-BE-WACC-NEXT: addi r3, r3, 1
+; CHECK-BE-WACC-NEXT: mtctr r3
+; CHECK-BE-WACC-NEXT: .p2align 4
+; CHECK-BE-WACC-NEXT: .LBB3_4: # %for.body
+; CHECK-BE-WACC-NEXT: #
+; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2
+; CHECK-BE-WACC-NEXT: bdnz .LBB3_4
+; CHECK-BE-WACC-NEXT: .LBB3_5: # %for.cond.cleanup
+; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-WACC-NEXT: li r3, 0
+; CHECK-BE-WACC-NEXT: stxv v5, 48(r5)
+; CHECK-BE-WACC-NEXT: stxv v4, 32(r5)
+; CHECK-BE-WACC-NEXT: stxv v3, 16(r5)
+; CHECK-BE-WACC-NEXT: stxv v2, 0(r5)
+; CHECK-BE-WACC-NEXT: blr
entry:
%tobool.not = icmp eq i32 %cond, 0
br i1 %tobool.not, label %if.end, label %if.then
diff --git a/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll b/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll
index 291cf97..929bf5f 100644
--- a/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll
+++ b/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64-ibm-aix < %s | FileCheck %s --check-prefix=CHECK-WACC
target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
@@ -38,6 +40,43 @@ define void @baz(i64 %arg) local_unnamed_addr #0 {
; CHECK-NEXT: xxswapd 0, 0
; CHECK-NEXT: stxv 0, 0(3)
; CHECK-NEXT: blr
+;
+; CHECK-WACC-LABEL: baz:
+; CHECK-WACC: # %bb.0: # %bb
+; CHECK-WACC-NEXT: dmxxextfdmr512 34, 36, 0, 0
+; CHECK-WACC-NEXT: xxmrgld 1, 34, 36
+; CHECK-WACC-NEXT: xxswapd 2, 1
+; CHECK-WACC-NEXT: xxlxor 0, 0, 0
+; CHECK-WACC-NEXT: xvnegdp 1, 1
+; CHECK-WACC-NEXT: xvnegdp 2, 2
+; CHECK-WACC-NEXT: xvsubdp 1, 1, 0
+; CHECK-WACC-NEXT: xvsubdp 2, 2, 37
+; CHECK-WACC-NEXT: xvmuldp 1, 1, 0
+; CHECK-WACC-NEXT: xvmuldp 2, 2, 0
+; CHECK-WACC-NEXT: xvmaddadp 1, 0, 0
+; CHECK-WACC-NEXT: xvmaddadp 2, 0, 0
+; CHECK-WACC-NEXT: stxv 1, 0(3)
+; CHECK-WACC-NEXT: stxv 2, 0(3)
+; CHECK-WACC-NEXT: # implicit-def: $wacc0
+; CHECK-WACC-NEXT: bc 12, 20, L..BB0_2
+; CHECK-WACC-NEXT: # %bb.1: # %bb10
+; CHECK-WACC-NEXT: xvf64gerpp 0, 34, 0
+; CHECK-WACC-NEXT: L..BB0_2: # %bb12
+; CHECK-WACC-NEXT: cmpdi 3, 0
+; CHECK-WACC-NEXT: .align 4
+; CHECK-WACC-NEXT: L..BB0_3: # %bb13
+; CHECK-WACC-NEXT: #
+; CHECK-WACC-NEXT: bc 4, 2, L..BB0_3
+; CHECK-WACC-NEXT: # %bb.4: # %bb14
+; CHECK-WACC-NEXT: dmxxextfdmr512 34, 36, 0, 0
+; CHECK-WACC-NEXT: xxlxor 0, 0, 0
+; CHECK-WACC-NEXT: xvsubdp 1, 0, 35
+; CHECK-WACC-NEXT: xxlxor 2, 2, 2
+; CHECK-WACC-NEXT: xvmaddadp 2, 1, 2
+; CHECK-WACC-NEXT: xvadddp 0, 2, 0
+; CHECK-WACC-NEXT: xxswapd 0, 0
+; CHECK-WACC-NEXT: stxv 0, 0(3)
+; CHECK-WACC-NEXT: blr
bb:
%call = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> poison)
%extractvalue = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %call, 0
diff --git a/llvm/test/CodeGen/PowerPC/vec_rounding.ll b/llvm/test/CodeGen/PowerPC/vec_rounding.ll
index 2f16a43..438c8eb 100644
--- a/llvm/test/CodeGen/PowerPC/vec_rounding.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_rounding.ll
@@ -1,172 +1,251 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
; Check vector round to single-precision toward -infinity (vrfim)
; instruction generation using Altivec.
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
define <2 x double> @floor_v2f64(<2 x double> %p)
+; CHECK-LABEL: floor_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: frim 1, 1
+; CHECK-NEXT: frim 2, 2
+; CHECK-NEXT: blr
{
%t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
ret <2 x double> %t
}
-; CHECK-LABEL: floor_v2f64:
-; CHECK: frim
-; CHECK: frim
declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
define <4 x double> @floor_v4f64(<4 x double> %p)
+; CHECK-LABEL: floor_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: frim 1, 1
+; CHECK-NEXT: frim 2, 2
+; CHECK-NEXT: frim 3, 3
+; CHECK-NEXT: frim 4, 4
+; CHECK-NEXT: blr
{
%t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
ret <4 x double> %t
}
-; CHECK-LABEL: floor_v4f64:
-; CHECK: frim
-; CHECK: frim
-; CHECK: frim
-; CHECK: frim
declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
define <2 x double> @ceil_v2f64(<2 x double> %p)
+; CHECK-LABEL: ceil_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: frip 1, 1
+; CHECK-NEXT: frip 2, 2
+; CHECK-NEXT: blr
{
%t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
ret <2 x double> %t
}
-; CHECK-LABEL: ceil_v2f64:
-; CHECK: frip
-; CHECK: frip
declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
define <4 x double> @ceil_v4f64(<4 x double> %p)
+; CHECK-LABEL: ceil_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: frip 1, 1
+; CHECK-NEXT: frip 2, 2
+; CHECK-NEXT: frip 3, 3
+; CHECK-NEXT: frip 4, 4
+; CHECK-NEXT: blr
{
%t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
ret <4 x double> %t
}
-; CHECK-LABEL: ceil_v4f64:
-; CHECK: frip
-; CHECK: frip
-; CHECK: frip
-; CHECK: frip
declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
define <2 x double> @trunc_v2f64(<2 x double> %p)
+; CHECK-LABEL: trunc_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: friz 1, 1
+; CHECK-NEXT: friz 2, 2
+; CHECK-NEXT: blr
{
%t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
ret <2 x double> %t
}
-; CHECK-LABEL: trunc_v2f64:
-; CHECK: friz
-; CHECK: friz
declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
define <4 x double> @trunc_v4f64(<4 x double> %p)
+; CHECK-LABEL: trunc_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: friz 1, 1
+; CHECK-NEXT: friz 2, 2
+; CHECK-NEXT: friz 3, 3
+; CHECK-NEXT: friz 4, 4
+; CHECK-NEXT: blr
{
%t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
ret <4 x double> %t
}
-; CHECK-LABEL: trunc_v4f64:
-; CHECK: friz
-; CHECK: friz
-; CHECK: friz
-; CHECK: friz
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
-define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+define <2 x double> @nearbyint_v2f64(<2 x double> %p) nounwind
+; CHECK-LABEL: nearbyint_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stdu 1, -128(1)
+; CHECK-NEXT: std 0, 144(1)
+; CHECK-NEXT: stfd 30, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 31, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT: fmr 31, 2
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: nop
+; CHECK-NEXT: fmr 30, 1
+; CHECK-NEXT: fmr 1, 31
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: nop
+; CHECK-NEXT: fmr 2, 1
+; CHECK-NEXT: fmr 1, 30
+; CHECK-NEXT: lfd 31, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 30, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 128
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
{
%t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
ret <2 x double> %t
}
-; CHECK-LABEL: nearbyint_v2f64:
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
-define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+define <4 x double> @nearbyint_v4f64(<4 x double> %p) nounwind
+; CHECK-LABEL: nearbyint_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stdu 1, -144(1)
+; CHECK-NEXT: std 0, 160(1)
+; CHECK-NEXT: stfd 28, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 29, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT: fmr 29, 2
+; CHECK-NEXT: stfd 30, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT: fmr 30, 3
+; CHECK-NEXT: stfd 31, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT: fmr 31, 4
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: nop
+; CHECK-NEXT: fmr 28, 1
+; CHECK-NEXT: fmr 1, 29
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: nop
+; CHECK-NEXT: fmr 29, 1
+; CHECK-NEXT: fmr 1, 30
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: nop
+; CHECK-NEXT: fmr 30, 1
+; CHECK-NEXT: fmr 1, 31
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: nop
+; CHECK-NEXT: fmr 4, 1
+; CHECK-NEXT: fmr 1, 28
+; CHECK-NEXT: lfd 31, 136(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 28, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT: fmr 2, 29
+; CHECK-NEXT: fmr 3, 30
+; CHECK-NEXT: lfd 30, 128(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 29, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 144
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
{
%t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
ret <4 x double> %t
}
-; CHECK-LABEL: nearbyint_v4f64:
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
define <4 x float> @floor_v4f32(<4 x float> %p)
+; CHECK-LABEL: floor_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfim 2, 2
+; CHECK-NEXT: blr
{
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
ret <4 x float> %t
}
-; CHECK-LABEL: floor_v4f32:
-; CHECK: vrfim
declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
define <8 x float> @floor_v8f32(<8 x float> %p)
+; CHECK-LABEL: floor_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfim 2, 2
+; CHECK-NEXT: vrfim 3, 3
+; CHECK-NEXT: blr
{
%t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
ret <8 x float> %t
}
-; CHECK-LABEL: floor_v8f32:
-; CHECK: vrfim
-; CHECK: vrfim
declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
define <4 x float> @ceil_v4f32(<4 x float> %p)
+; CHECK-LABEL: ceil_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfip 2, 2
+; CHECK-NEXT: blr
{
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
ret <4 x float> %t
}
-; CHECK-LABEL: ceil_v4f32:
-; CHECK: vrfip
declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
define <8 x float> @ceil_v8f32(<8 x float> %p)
+; CHECK-LABEL: ceil_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfip 2, 2
+; CHECK-NEXT: vrfip 3, 3
+; CHECK-NEXT: blr
{
%t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
ret <8 x float> %t
}
-; CHECK-LABEL: ceil_v8f32:
-; CHECK: vrfip
-; CHECK: vrfip
declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
define <4 x float> @trunc_v4f32(<4 x float> %p)
+; CHECK-LABEL: trunc_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfiz 2, 2
+; CHECK-NEXT: blr
{
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
ret <4 x float> %t
}
-; CHECK-LABEL: trunc_v4f32:
-; CHECK: vrfiz
declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
define <8 x float> @trunc_v8f32(<8 x float> %p)
+; CHECK-LABEL: trunc_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfiz 2, 2
+; CHECK-NEXT: vrfiz 3, 3
+; CHECK-NEXT: blr
{
%t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
ret <8 x float> %t
}
-; CHECK-LABEL: trunc_v8f32:
-; CHECK: vrfiz
-; CHECK: vrfiz
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+; CHECK-LABEL: nearbyint_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfin 2, 2
+; CHECK-NEXT: blr
{
%t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
ret <4 x float> %t
}
-; CHECK-LABEL: nearbyint_v4f32:
-; CHECK: vrfin
declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+; CHECK-LABEL: nearbyint_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrfin 2, 2
+; CHECK-NEXT: vrfin 3, 3
+; CHECK-NEXT: blr
{
%t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
ret <8 x float> %t
}
-; CHECK-LABEL: nearbyint_v8f32:
-; CHECK: vrfin
-; CHECK: vrfin
diff --git a/llvm/test/CodeGen/PowerPC/vp-ld-st.ll b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
new file mode 100644
index 0000000..f0f9943
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
+
+; Function Attrs: nounwind readnone
+define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 56
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl1:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: stxvrl 34, 5, 6
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 57
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl2:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 1
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 58
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl4:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 2
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 59
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl8:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 3
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @lxvl1(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 56
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl1:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
+ ret <16 x i8> %0
+}
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @lxvl2(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 57
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl2:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 1
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
+ ret <8 x i16> %0
+}
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @lxvl4(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 58
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl4:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 2
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
+ ret <4 x i32> %0
+}
+
+; Function Attrs: nounwind readnone
+define <2 x i64> @lxvl8(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 59
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl8:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 3
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
+ ret <2 x i64> %0
+}