[AMDGPU] Select VMEM prefetch for llvm.prefetch on gfx1250 (#150493)

We have a choice to use a scalar or vector prefetch for an uniform pointer. Since we do not have scalar stores our scalar cache is practically readonly. The rw argument of the prefetch intrinsic is used to force vector operation even for an uniform case. On GFX12 scalar prefetch will be used anyway, it is still useful but it will only bring data to L2.
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2025-07-24 13:22:50 -0700
committer: GitHub <noreply@github.com> 2025-07-24 13:22:50 -0700
commit: 96e5eed92af267b151c29a95f2c208f2bc0a32b3 (patch)
tree: a9cd2c189bcfe3b6076288677a5c15c8e9e66455 /llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
parent: 7884c077ffda1efbff7443d4b3a8e6c163b44509 (diff)
download: llvm-96e5eed92af267b151c29a95f2c208f2bc0a32b3.zip
llvm-96e5eed92af267b151c29a95f2c208f2bc0a32b3.tar.gz
llvm-96e5eed92af267b151c29a95f2c208f2bc0a32b3.tar.bz2
1 files changed, 11 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 877c3ac..8ca9a97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -7068,6 +7068,17 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
   MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
 }
 
+void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
+                                                  const MachineInstr &MI,
+                                                  int OpIdx) const {
+  uint32_t V = MI.getOperand(2).getImm();
+  V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK))
+      << AMDGPU::CPol::SCOPE_SHIFT;
+  if (!Subtarget->hasSafeCUPrefetch())
+    V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
+  MIB.addImm(V);
+}
+
 /// Convert from 2-bit value to enum values used for op_sel* source modifiers.
 void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
     MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2025-07-24 13:22:50 -0700
committer	GitHub <noreply@github.com>	2025-07-24 13:22:50 -0700
commit	96e5eed92af267b151c29a95f2c208f2bc0a32b3 (patch)
tree	a9cd2c189bcfe3b6076288677a5c15c8e9e66455 /llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
parent	7884c077ffda1efbff7443d4b3a8e6c163b44509 (diff)
download	llvm-96e5eed92af267b151c29a95f2c208f2bc0a32b3.zip llvm-96e5eed92af267b151c29a95f2c208f2bc0a32b3.tar.gz llvm-96e5eed92af267b151c29a95f2c208f2bc0a32b3.tar.bz2