aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorBrox Chen <guochen2@amd.com>2025-07-18 12:55:11 -0400
committerGitHub <noreply@github.com>2025-07-18 12:55:11 -0400
commit5138b61a25f11eb8675d0031712c1ee6b4cb8be4 (patch)
tree0e32157b1ceeb31c8733a51e3330da29a63ba26b /llvm/lib/Target
parent73e4b589ba9526c72f495ca6898ed18d730d2db4 (diff)
downloadllvm-5138b61a25f11eb8675d0031712c1ee6b4cb8be4.zip
llvm-5138b61a25f11eb8675d0031712c1ee6b4cb8be4.tar.gz
llvm-5138b61a25f11eb8675d0031712c1ee6b4cb8be4.tar.bz2
[AMDGPU][True16][Codegen] remove packed build_vector pattern from true16 (#148715)
Some of the packed build_vector use vgpr_32 for i16/f16/bf16. In gfx11, bf16 arithmetic get promoted to f32 and this is done via v2i16 pack. In true16 mode this v2i16 pack is selected to a build_vector/v_lshlrev pattern which only accepts VGPR32. This causes isel to insert an illegal copy "vgpr32 = copy vgpr16" between def and use. In the end this illegal copy confuses cse pass and trigger wrong code elimination. Remove the packed build_vector pattern from true16. After removal, ISel will use vgpr16 build_vector patterns instead.
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td18
1 files changed, 10 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2a6fcad..991d9f8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3427,30 +3427,32 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1))
>;
-
def : GCNPat <
- (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))),
- (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+ (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src1), (i16 0))),
+ (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
>;
def : GCNPat <
- (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src1), (i16 0))),
- (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
+ (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
+ (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
>;
+}
def : GCNPat <
- (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
def : GCNPat <
- (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
- (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
+ (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
foreach vecTy = [v2i16, v2f16, v2bf16] in {