aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td2
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td12
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td64
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td3
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td3
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td6
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td6
9 files changed, 53 insertions, 67 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d8072d1..e472e7d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -303,6 +303,16 @@ public:
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return false;
+
+ return VT.getScalarSizeInBits() <= 64;
+ }
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eaa1870..7003a40 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()
// only allow 32-bit registers in operands and use low halves thereof.
def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
+def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">,
+ AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>;
def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && "
"!Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index f2e432f..b2ff5a1 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -969,10 +969,9 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
- foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
- let True16Predicate = p in {
- def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
- }
+ let True16Predicate = NotUseRealTrue16Insts in {
+ def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
let True16Predicate = UseRealTrue16Insts in {
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
}
@@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> {
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
- foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
- let True16Predicate = p in {
- def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
- }
+ let True16Predicate = NotUseRealTrue16Insts in {
+ def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
let True16Predicate = UseRealTrue16Insts in {
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
}
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9f33bac..5a22b23 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1982,8 +1982,7 @@ defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
@@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
@@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in {
+let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in {
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
@@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 59fd2f1..be084a9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat <
def : VOPSelectModsPat <i32>;
def : VOPSelectModsPat <f32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : VOPSelectPat <f16>;
def : VOPSelectPat <i16>;
} // End True16Predicate = p
@@ -2137,8 +2136,7 @@ def : GCNPat <
>;
foreach fp16vt = [f16, bf16] in {
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let SubtargetPredicate = p in {
+let SubtargetPredicate = NotUseRealTrue16Insts in {
def : GCNPat <
(fabs (fp16vt VGPR_32:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@@ -2230,8 +2228,7 @@ def : GCNPat <
}
foreach fp16vt = [f16, bf16] in {
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(fcopysign fp16vt:$src0, fp16vt:$src1),
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -2354,23 +2351,21 @@ def : GCNPat <
(S_MOV_B32 $ga)
>;
-foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
- let True16Predicate = pred in {
- def : GCNPat <
- (VGPRImm<(i16 imm)>:$imm),
- (V_MOV_B32_e32 imm:$imm)
- >;
+let True16Predicate = NotUseRealTrue16Insts in {
+ def : GCNPat <
+ (VGPRImm<(i16 imm)>:$imm),
+ (V_MOV_B32_e32 imm:$imm)
+ >;
- // FIXME: Workaround for ordering issue with peephole optimizer where
- // a register class copy interferes with immediate folding. Should
- // use s_mov_b32, which can be shrunk to s_movk_i32
+ // FIXME: Workaround for ordering issue with peephole optimizer where
+ // a register class copy interferes with immediate folding. Should
+ // use s_mov_b32, which can be shrunk to s_movk_i32
- foreach vt = [f16, bf16] in {
- def : GCNPat <
- (VGPRImm<(vt fpimm)>:$imm),
- (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
- >;
- }
+ foreach vt = [f16, bf16] in {
+ def : GCNPat <
+ (VGPRImm<(vt fpimm)>:$imm),
+ (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
+ >;
}
}
@@ -2859,8 +2854,7 @@ def : GCNPat<
(i32 (DivergentSextInreg<i1> i32:$src)),
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(i16 (DivergentSextInreg<i1> i16:$src)),
(V_BFE_I32_e64 $src, (i32 0), (i32 1))
@@ -3205,8 +3199,7 @@ def : GCNPat<
}
} // AddedComplexity = 1
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat<
(i32 (DivergentUnaryFrag<zext> i16:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
@@ -3416,8 +3409,7 @@ def : GCNPat <
// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
// The 12s emit 0s.
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(i16 (bswap i16:$a)),
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
@@ -3670,8 +3662,7 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1))
@@ -3707,8 +3698,7 @@ def : GCNPat <
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
@@ -3735,8 +3725,7 @@ def : GCNPat <
>;
let SubtargetPredicate = HasVOP3PInsts in {
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3766,8 +3755,7 @@ def : GCNPat <
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
// Take the lower 16 bits from each VGPR_32 and concat them
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
@@ -3838,8 +3826,7 @@ def : GCNPat <
>;
// Take the upper 16 bits from each VGPR_32 and concat them
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector>
(Ty !if(!eq(Ty, i16),
@@ -3881,8 +3868,7 @@ def : GCNPat <
(v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(v2f16 (scalar_to_vector f16:$src0)),
(COPY $src0)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 6230c17..77df721 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1561,8 +1561,7 @@ def : GCNPat <
} // End OtherPredicates = [isGFX8Plus]
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let OtherPredicates = [isGFX8Plus, p] in {
+let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in {
def : GCNPat<
(i32 (anyext i16:$src)),
(COPY $src)
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 37d92bc..30dab55 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(and i16:$src0, i16:$src1),
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index e6a7c35..4a2b54d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in {
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat<
(i32 (DivergentUnaryFrag<sext> i16:$src)),
(i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
@@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat<
(i64 (DivergentUnaryFrag<sext> i16:$src)),
(REG_SEQUENCE VReg_64,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 52ee1e8..5daf860 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi
defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
let True16Predicate = UseRealTrue16Insts in
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_F16_t16>;
@@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro
} // End isCommutable = 1
defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32_BF16, bf16>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>;
let True16Predicate = UseRealTrue16Insts in
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>;