From 9fd2e2c2fd0dbd5d11a5899bd6bb4db0fd3f2c35 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 8 Apr 2024 08:53:27 +0100 Subject: [DAG][AArch64] Support masked loads/stores with nontemporal flags (#87608) SVE has some non-temporal masked loads and stores. The metadata coming from the nodes is not copied to the MMO at the moment though, meaning it will generate a normal instruction. This patch ensures that the right flags are set if the instruction has non-temporal metadata. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +++--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 12 ++++++++++-- llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll | 11 ++++++----- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f20080c..8fe0746 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11824,8 +11824,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getPointerInfo(), - MST->getOriginalAlign(), MachineMemOperand::MOStore, - MST->getAAInfo()); + MST->getOriginalAlign(), + MST->getMemOperand()->getFlags(), MST->getAAInfo()); // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) @@ -11962,7 +11962,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SDValue NewLd = DAG.getLoad( N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), MLD->getPointerInfo(), MLD->getOriginalAlign(), - MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges()); + MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges()); return CombineTo(N, NewLd, NewLd.getValue(1)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 618bdee..4ba2715 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4754,8 +4754,12 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, EVT VT = Src0.getValueType(); + auto MMOFlags = MachineMemOperand::MOStore; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, @@ -4924,8 +4928,12 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + auto MMOFlags = MachineMemOperand::MOLoad; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges); SDValue Load = diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll index bcfc7b3..bcb878a 100644 --- a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll +++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll @@ -9,7 +9,7 @@ define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind { ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0 @@ -25,7 +25,7 @@ define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind { ; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> %mask), !nontemporal !0 ret void @@ -43,7 +43,8 @@ define <4 x i32> @load_v4i32(ptr %a) nounwind { define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind { ; CHECK-LABEL: store_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: stnp d0, d1, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> ), !nontemporal !0 ret void @@ -52,7 +53,7 @@ define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind { define @masked_load_nxv4i32(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv4i32(ptr %a, i32 1, %mask, undef), !nontemporal !0 ret %load @@ -61,7 +62,7 @@ define @masked_load_nxv4i32(ptr %a, %mask) define void @masked_store_nxv4i32( %x, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.nxv4i32.p0( %x, ptr %a, i32 1, %mask), !nontemporal !0 ret void -- cgit v1.1