Merging r359891:

------------------------------------------------------------------------ r359891 | arsenm | 2019-05-03 07:40:10 -0700 (Fri, 03 May 2019) | 9 lines AMDGPU: Replace shrunk instruction with dummy implicit_def This was broken if the original operand was killed. The kill flag would appear on both instructions, and fail the verifier. Keep the kill flag, but remove the operands from the old instruction. This has an added benefit of really reducing the use count for future folds. Ideally the pass would be structured more like what PeepholeOptimizer does to avoid this hack to avoid breaking instruction iterators. ------------------------------------------------------------------------ llvm-svn: 362634
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-06-05 19:06:41 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-06-05 19:06:41 +0000
commit: 836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56 (patch)
tree: b5807cf97743d38f95633a1166a4300e7a259f2c
parent: 0489682ef3b5659eaf05dd7d51ab6c049575ca6b (diff)
download: llvm-836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56.zip
llvm-836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56.tar.gz
llvm-836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56.tar.bz2
2 files changed, 64 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4e29b73..50a109d 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -218,8 +218,6 @@ static bool updateOperand(FoldCandidate &Fold,
 
       const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
       unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
-      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
-      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
 
       MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
 
@@ -229,9 +227,15 @@ static bool updateOperand(FoldCandidate &Fold,
       }
 
       // Keep the old instruction around to avoid breaking iterators, but
-      // replace the outputs with dummy registers.
+      // replace it with a dummy instruction to remove uses.
+      //
+      // FIXME: We should not invert how this pass looks at operands to avoid
+      // this. Should track set of foldable movs instead of looking for uses
+      // when looking at a use.
       Dst0.setReg(NewReg0);
-      Dst1.setReg(NewReg1);
+      for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+        MI->RemoveOperand(I);
+      MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
 
       if (Fold.isCommuted())
         TII.commuteInstruction(*Inst32, false);
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
index e4ea36f..ce02e37 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
@@ -590,3 +590,59 @@ body:             |
     S_ENDPGM implicit %2
 
 ...
+
+---
+name: shrink_add_kill_flags_src0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: shrink_add_kill_flags_src0
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec
+   S_ENDPGM 0, implicit %2
+...
+
+---
+name: shrink_add_kill_flags_src1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: shrink_add_kill_flags_src1
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec
+   S_ENDPGM 0, implicit %2
+...
+
+---
+name: shrink_addc_kill_flags_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vcc
+    ; GCN-LABEL: name: shrink_addc_kill_flags_src2
+    ; GCN: liveins: $vgpr0, $vcc
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc
+    ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    %2:sreg_64_xexec = COPY $vcc
+    %3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, 0, implicit $exec
+   S_ENDPGM 0, implicit %3
+...
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-06-05 19:06:41 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-06-05 19:06:41 +0000
commit	836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56 (patch)
tree	b5807cf97743d38f95633a1166a4300e7a259f2c
parent	0489682ef3b5659eaf05dd7d51ab6c049575ca6b (diff)
download	llvm-836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56.zip llvm-836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56.tar.gz llvm-836f1e2be4cf0e458bbf9333dcb01d5c65dc3c56.tar.bz2