; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 { ; CHECK-LABEL: test_wacc_copy: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: std r31, -8(r1) ; CHECK-NEXT: std r30, -16(r1) ; CHECK-NEXT: mr r30, r1 ; CHECK-NEXT: clrldi r0, r1, 57 ; CHECK-NEXT: subfic r0, r0, -384 ; CHECK-NEXT: stdux r1, r1, r0 ; CHECK-NEXT: .cfi_def_cfa_register r30 ; CHECK-NEXT: .cfi_offset r31, -8 ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: mr r31, r1 ; CHECK-NEXT: std r3, 360(r31) ; CHECK-NEXT: std r4, 352(r31) ; CHECK-NEXT: stxv v2, 336(r31) ; CHECK-NEXT: std r7, 328(r31) ; CHECK-NEXT: ld r3, 360(r31) ; CHECK-NEXT: lxvp vsp34, 0(r3) ; CHECK-NEXT: lxvp vsp36, 32(r3) ; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 ; CHECK-NEXT: lxvp vsp34, 64(r3) ; CHECK-NEXT: lxvp vsp36, 96(r3) ; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-NEXT: stxvp vsp34, 224(r31) ; CHECK-NEXT: stxvp vsp36, 192(r31) ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-NEXT: stxvp vsp34, 160(r31) ; CHECK-NEXT: stxvp vsp36, 128(r31) ; CHECK-NEXT: ld r3, 352(r31) ; CHECK-NEXT: lxv v2, 16(r3) ; CHECK-NEXT: lxv v3, 0(r3) ; CHECK-NEXT: stxv v2, 112(r31) ; CHECK-NEXT: stxv v3, 96(r31) ; CHECK-NEXT: lxv v2, 112(r31) ; CHECK-NEXT: lxv v3, 96(r31) ; CHECK-NEXT: lxv vs0, 336(r31) ; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0 ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-NEXT: stxvp vsp34, 224(r31) ; CHECK-NEXT: stxvp vsp36, 192(r31) ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-NEXT: stxvp vsp34, 160(r31) ; CHECK-NEXT: stxvp vsp36, 128(r31) ; CHECK-NEXT: lxvp vsp34, 128(r31) ; CHECK-NEXT: lxvp vsp36, 160(r31) ; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 ; CHECK-NEXT: lxvp vsp34, 192(r31) ; CHECK-NEXT: lxvp vsp36, 224(r31) ; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 ; CHECK-NEXT: ld r3, 328(r31) ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-NEXT: stxvp vsp34, 96(r3) ; CHECK-NEXT: stxvp vsp36, 64(r3) ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-NEXT: stxvp vsp34, 32(r3) ; CHECK-NEXT: stxvp vsp36, 0(r3) ; CHECK-NEXT: mr r1, r30 ; CHECK-NEXT: ld r31, -8(r1) ; CHECK-NEXT: ld r30, -16(r1) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_wacc_copy: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 ; CHECK-BE-NEXT: clrldi r0, r1, 57 ; CHECK-BE-NEXT: subfic r0, r0, -384 ; CHECK-BE-NEXT: stdux r1, r1, r0 ; CHECK-BE-NEXT: mr r31, r1 ; CHECK-BE-NEXT: std r3, 360(r31) ; CHECK-BE-NEXT: std r4, 352(r31) ; CHECK-BE-NEXT: stxv v2, 336(r31) ; CHECK-BE-NEXT: std r5, 328(r31) ; CHECK-BE-NEXT: ld r3, 360(r31) ; CHECK-BE-NEXT: lxvp vsp34, 96(r3) ; CHECK-BE-NEXT: lxvp vsp36, 64(r3) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 ; CHECK-BE-NEXT: lxvp vsp34, 32(r3) ; CHECK-BE-NEXT: lxvp vsp36, 0(r3) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-BE-NEXT: stxvp vsp36, 224(r31) ; CHECK-BE-NEXT: stxvp vsp34, 192(r31) ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-BE-NEXT: stxvp vsp36, 160(r31) ; CHECK-BE-NEXT: stxvp vsp34, 128(r31) ; CHECK-BE-NEXT: ld r3, 352(r31) ; CHECK-BE-NEXT: lxv v2, 0(r3) ; CHECK-BE-NEXT: lxv v3, 16(r3) ; CHECK-BE-NEXT: stxv v3, 112(r31) ; CHECK-BE-NEXT: stxv v2, 96(r31) ; CHECK-BE-NEXT: lxv v2, 96(r31) ; CHECK-BE-NEXT: lxv v3, 112(r31) ; CHECK-BE-NEXT: lxv vs0, 336(r31) ; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0 ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-BE-NEXT: stxvp vsp36, 224(r31) ; CHECK-BE-NEXT: stxvp vsp34, 192(r31) ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-BE-NEXT: stxvp vsp36, 160(r31) ; CHECK-BE-NEXT: stxvp vsp34, 128(r31) ; CHECK-BE-NEXT: lxvp vsp34, 224(r31) ; CHECK-BE-NEXT: lxvp vsp36, 192(r31) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 ; CHECK-BE-NEXT: lxvp vsp34, 160(r31) ; CHECK-BE-NEXT: lxvp vsp36, 128(r31) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 ; CHECK-BE-NEXT: ld r3, 328(r31) ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-BE-NEXT: stxvp vsp36, 96(r3) ; CHECK-BE-NEXT: stxvp vsp34, 64(r3) ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-BE-NEXT: stxvp vsp36, 32(r3) ; CHECK-BE-NEXT: stxvp vsp34, 0(r3) ; CHECK-BE-NEXT: mr r1, r30 ; CHECK-BE-NEXT: ld r31, -8(r1) ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: blr entry: %vdmrp.addr = alloca ptr, align 8 %vpp.addr = alloca ptr, align 8 %vc.addr = alloca <16 x i8>, align 16 %resp.addr = alloca ptr, align 8 %vdmr = alloca <1024 x i1>, align 128 %vp = alloca <256 x i1>, align 32 store ptr %vdmrp, ptr %vdmrp.addr, align 8 store ptr %vpp, ptr %vpp.addr, align 8 store <16 x i8> %vc, ptr %vc.addr, align 16 store ptr %resp, ptr %resp.addr, align 8 %0 = load ptr, ptr %vdmrp.addr, align 8 %1 = load <1024 x i1>, ptr %0, align 128 store <1024 x i1> %1, ptr %vdmr, align 128 %2 = load ptr, ptr %vpp.addr, align 8 %3 = load <256 x i1>, ptr %2, align 32 store <256 x i1> %3, ptr %vp, align 32 %4 = load <256 x i1>, ptr %vp, align 32 %5 = load <16 x i8>, ptr %vc.addr, align 16 %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5) store <1024 x i1> %6, ptr %vdmr, align 128 %7 = load <1024 x i1>, ptr %vdmr, align 128 %8 = load ptr, ptr %resp.addr, align 8 store <1024 x i1> %7, ptr %8, align 128 ret void } define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: dmsetdmrz dmr0 ; CHECK-NEXT: lxvp vsp34, 0(r3) ; CHECK-NEXT: lxvp vsp36, 32(r3) ; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 ; CHECK-NEXT: lxvp vsp34, 64(r3) ; CHECK-NEXT: lxvp vsp36, 96(r3) ; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 ; CHECK-NEXT: dmmr dmr2, dmr0 ; CHECK-NEXT: dmxor dmr2, dmr1 ; CHECK-NEXT: lxvp vsp34, 0(r4) ; CHECK-NEXT: lxvp vsp36, 32(r4) ; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 ; CHECK-NEXT: lxvp vsp34, 64(r4) ; CHECK-NEXT: lxvp vsp36, 96(r4) ; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 ; CHECK-NEXT: dmxor dmr0, dmr1 ; CHECK-NEXT: dmmr dmr1, dmr2 ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 ; CHECK-NEXT: stxvp vsp34, 96(r5) ; CHECK-NEXT: stxvp vsp36, 64(r5) ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 ; CHECK-NEXT: stxvp vsp34, 32(r5) ; CHECK-NEXT: stxvp vsp36, 0(r5) ; CHECK-NEXT: dmmr dmr0, dmr0 ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-NEXT: stxvp vsp34, 96(r6) ; CHECK-NEXT: stxvp vsp36, 64(r6) ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-NEXT: stxvp vsp34, 32(r6) ; CHECK-NEXT: stxvp vsp36, 0(r6) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: foo: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: dmsetdmrz dmr0 ; CHECK-BE-NEXT: lxvp vsp34, 96(r3) ; CHECK-BE-NEXT: lxvp vsp36, 64(r3) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 ; CHECK-BE-NEXT: lxvp vsp34, 32(r3) ; CHECK-BE-NEXT: lxvp vsp36, 0(r3) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 ; CHECK-BE-NEXT: dmmr dmr2, dmr0 ; CHECK-BE-NEXT: dmxor dmr2, dmr1 ; CHECK-BE-NEXT: lxvp vsp34, 96(r4) ; CHECK-BE-NEXT: lxvp vsp36, 64(r4) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 ; CHECK-BE-NEXT: lxvp vsp34, 32(r4) ; CHECK-BE-NEXT: lxvp vsp36, 0(r4) ; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 ; CHECK-BE-NEXT: dmxor dmr0, dmr1 ; CHECK-BE-NEXT: dmmr dmr1, dmr2 ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 ; CHECK-BE-NEXT: stxvp vsp36, 96(r5) ; CHECK-BE-NEXT: stxvp vsp34, 64(r5) ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 ; CHECK-BE-NEXT: stxvp vsp36, 32(r5) ; CHECK-BE-NEXT: stxvp vsp34, 0(r5) ; CHECK-BE-NEXT: dmmr dmr0, dmr0 ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; CHECK-BE-NEXT: stxvp vsp36, 96(r6) ; CHECK-BE-NEXT: stxvp vsp34, 64(r6) ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-BE-NEXT: stxvp vsp36, 32(r6) ; CHECK-BE-NEXT: stxvp vsp34, 0(r6) ; CHECK-BE-NEXT: blr entry: %0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz() %1 = load <1024 x i1>, ptr %p1, align 128 %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1) %3 = load <1024 x i1>, ptr %p2, align 128 %4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3) %5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2) store <1024 x i1> %5, ptr %res1, align 128 %6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4) store <1024 x i1> %6, ptr %res2, align 128 ret void } declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz() declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>) declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>) declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>) attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }