aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/sdwa-stack.mir32
2 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 87bacc5..4774041 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -992,6 +992,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
if (Opc == AMDGPU::V_CNDMASK_B32_e32)
return false;
+ if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
+ if (!Src0->isReg() && !Src0->isImm())
+ return false;
+ }
+
+ if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
+ if (!Src1->isReg() && !Src1->isImm())
+ return false;
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir b/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir
new file mode 100644
index 0000000..d804605
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir
@@ -0,0 +1,32 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+# Do not fold stack objects into SDWA.
+
+---
+# GCN-LABEL: name: sdwa_stack_object_src0
+# GCN: V_ADD_U32_e64 %stack.0, killed %1
+name: sdwa_stack_object_src0
+stack:
+ - { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default }
+body: |
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec
+ %2:vgpr_32 = V_ADD_U32_e64 %stack.0, killed %1, 0, implicit $exec
+ S_ENDPGM 0, implicit %2
+
+...
+---
+name: sdwa_stack_object_src1
+# GCN-LABEL: name: sdwa_stack_object_src1
+# GCN: V_ADD_U32_e64 killed %1, %stack.0
+stack:
+ - { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default }
+body: |
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec
+ %2:vgpr_32 = V_ADD_U32_e64 killed %1, %stack.0, 0, implicit $exec
+ S_ENDPGM 0, implicit %2
+
+...