diff options
Diffstat (limited to 'llvm/lib/Target')
33 files changed, 1096 insertions, 163 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9078675d..45f5235 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18867,21 +18867,25 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming()))) return SDValue(); - unsigned NumUses = N->use_size(); + // Count the number of users which are extract_vectors. + unsigned NumExts = count_if(N->users(), [](SDNode *Use) { + return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR; + }); + auto MaskEC = N->getValueType(0).getVectorElementCount(); - if (!MaskEC.isKnownMultipleOf(NumUses)) + if (!MaskEC.isKnownMultipleOf(NumExts)) return SDValue(); - ElementCount ExtMinEC = MaskEC.divideCoefficientBy(NumUses); + ElementCount ExtMinEC = MaskEC.divideCoefficientBy(NumExts); if (ExtMinEC.getKnownMinValue() < 2) return SDValue(); - SmallVector<SDNode *> Extracts(NumUses, nullptr); + SmallVector<SDNode *> Extracts(NumExts, nullptr); for (SDNode *Use : N->users()) { if (Use->getOpcode() != ISD::EXTRACT_SUBVECTOR) - return SDValue(); + continue; - // Ensure the extract type is correct (e.g. if NumUses is 4 and + // Ensure the extract type is correct (e.g. if NumExts is 4 and // the mask return type is nxv8i1, each extract should be nxv2i1. if (Use->getValueType(0).getVectorElementCount() != ExtMinEC) return SDValue(); @@ -18902,32 +18906,39 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SDValue Idx = N->getOperand(0); SDValue TC = N->getOperand(1); - EVT OpVT = Idx.getValueType(); - if (OpVT != MVT::i64) { + if (Idx.getValueType() != MVT::i64) { Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx); TC = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, TC); } // Create the whilelo_x2 intrinsics from each pair of extracts EVT ExtVT = Extracts[0]->getValueType(0); + EVT DoubleExtVT = ExtVT.getDoubleNumVectorElementsVT(*DAG.getContext()); auto R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, {ExtVT, ExtVT}, {ID, Idx, TC}); DCI.CombineTo(Extracts[0], R.getValue(0)); DCI.CombineTo(Extracts[1], R.getValue(1)); + SmallVector<SDValue> Concats = {DAG.getNode( + ISD::CONCAT_VECTORS, DL, DoubleExtVT, R.getValue(0), R.getValue(1))}; - if (NumUses == 2) - return SDValue(N, 0); + if (NumExts == 2) { + assert(N->getValueType(0) == DoubleExtVT); + return Concats[0]; + } - auto Elts = DAG.getElementCount(DL, OpVT, ExtVT.getVectorElementCount() * 2); - for (unsigned I = 2; I < NumUses; I += 2) { + auto Elts = + DAG.getElementCount(DL, MVT::i64, ExtVT.getVectorElementCount() * 2); + for (unsigned I = 2; I < NumExts; I += 2) { // After the first whilelo_x2, we need to increment the starting value. - Idx = DAG.getNode(ISD::UADDSAT, DL, OpVT, Idx, Elts); + Idx = DAG.getNode(ISD::UADDSAT, DL, MVT::i64, Idx, Elts); R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, {ExtVT, ExtVT}, {ID, Idx, TC}); DCI.CombineTo(Extracts[I], R.getValue(0)); DCI.CombineTo(Extracts[I + 1], R.getValue(1)); + Concats.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, DoubleExtVT, + R.getValue(0), R.getValue(1))); } - return SDValue(N, 0); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Concats); } // Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index d8072d1..e472e7d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -303,6 +303,16 @@ public: bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; + /// Return true if it is profitable to fold a pair of shifts into a mask. + bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { + EVT VT = Y.getValueType(); + + if (VT.isVector()) + return false; + + return VT.getScalarSizeInBits() <= 64; + } + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override; diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7947469..09b3643 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -541,6 +541,13 @@ void AArch64PrologueEmitter::emitPrologue() { // to determine the end of the prologue. DebugLoc DL; + // In some cases, particularly with CallingConv::SwiftTail, it is possible to + // have a tail-call where the caller only needs to adjust the stack pointer in + // the epilogue. In this case, we still need to emit a SEH prologue sequence. + // See `seh-minimal-prologue-epilogue.ll` test cases. + if (AFI->getArgumentStackToRestore()) + HasWinCFI = true; + if (AFI->shouldSignReturnAddress(MF)) { // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions // are inserted by emitPacRetPlusLeafHardening(). diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index cced0fa..4749748 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -22,7 +22,7 @@ // To handle ZA state across control flow, we make use of edge bundling. This // assigns each block an "incoming" and "outgoing" edge bundle (representing // incoming and outgoing edges). Initially, these are unique to each block; -// then, in the process of forming bundles, the outgoing block of a block is +// then, in the process of forming bundles, the outgoing bundle of a block is // joined with the incoming bundle of all successors. The result is that each // bundle can be assigned a single ZA state, which ensures the state required by // all a blocks' successors is the same, and that each basic block will always diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index eaa1870..7003a40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts() // only allow 32-bit registers in operands and use low halves thereof. def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; +def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">, + AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>; def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && " "!Subtarget->useRealTrue16Insts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 0776d14..f413bbc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -840,7 +840,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}}); // clang-format on - addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, StandardB) + addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT, + G_AMDGPU_TBUFFER_LOAD_FORMAT}, + StandardB) .Div(B32, {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}) .Uni(B32, {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}) .Div(B64, {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}) diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index f2e432f..b2ff5a1 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -969,10 +969,9 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> { } let OtherPredicates = [NotLDSRequiresM0Init] in { - foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in - let True16Predicate = p in { - def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; - } + let True16Predicate = NotUseRealTrue16Insts in { + def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; + } let True16Predicate = UseRealTrue16Insts in { def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>; } @@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> { } let OtherPredicates = [NotLDSRequiresM0Init] in { - foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in - let True16Predicate = p in { - def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; - } + let True16Predicate = NotUseRealTrue16Insts in { + def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; + } let True16Predicate = UseRealTrue16Insts in { def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9f33bac..5a22b23 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1982,8 +1982,7 @@ defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>; defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; @@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; @@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in { +let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in { defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; @@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 31a2d55..c2252af 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1006,9 +1006,8 @@ public: Opcode == AMDGPU::S_BARRIER_INIT_M0 || Opcode == AMDGPU::S_BARRIER_INIT_IMM || Opcode == AMDGPU::S_BARRIER_JOIN_IMM || - Opcode == AMDGPU::S_BARRIER_LEAVE || - Opcode == AMDGPU::S_BARRIER_LEAVE_IMM || - Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER; + Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT || + Opcode == AMDGPU::DS_GWS_BARRIER; } static bool isF16PseudoScalarTrans(unsigned Opcode) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 59fd2f1..be084a9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat < def : VOPSelectModsPat <i32>; def : VOPSelectModsPat <f32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : VOPSelectPat <f16>; def : VOPSelectPat <i16>; } // End True16Predicate = p @@ -2137,8 +2136,7 @@ def : GCNPat < >; foreach fp16vt = [f16, bf16] in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let SubtargetPredicate = p in { +let SubtargetPredicate = NotUseRealTrue16Insts in { def : GCNPat < (fabs (fp16vt VGPR_32:$src)), (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) @@ -2230,8 +2228,7 @@ def : GCNPat < } foreach fp16vt = [f16, bf16] in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (fcopysign fp16vt:$src0, fp16vt:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) @@ -2354,23 +2351,21 @@ def : GCNPat < (S_MOV_B32 $ga) >; -foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in { - let True16Predicate = pred in { - def : GCNPat < - (VGPRImm<(i16 imm)>:$imm), - (V_MOV_B32_e32 imm:$imm) - >; +let True16Predicate = NotUseRealTrue16Insts in { + def : GCNPat < + (VGPRImm<(i16 imm)>:$imm), + (V_MOV_B32_e32 imm:$imm) + >; - // FIXME: Workaround for ordering issue with peephole optimizer where - // a register class copy interferes with immediate folding. Should - // use s_mov_b32, which can be shrunk to s_movk_i32 + // FIXME: Workaround for ordering issue with peephole optimizer where + // a register class copy interferes with immediate folding. Should + // use s_mov_b32, which can be shrunk to s_movk_i32 - foreach vt = [f16, bf16] in { - def : GCNPat < - (VGPRImm<(vt fpimm)>:$imm), - (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm))) - >; - } + foreach vt = [f16, bf16] in { + def : GCNPat < + (VGPRImm<(vt fpimm)>:$imm), + (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm))) + >; } } @@ -2859,8 +2854,7 @@ def : GCNPat< (i32 (DivergentSextInreg<i1> i32:$src)), (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (i16 (DivergentSextInreg<i1> i16:$src)), (V_BFE_I32_e64 $src, (i32 0), (i32 1)) @@ -3205,8 +3199,7 @@ def : GCNPat< } } // AddedComplexity = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat< (i32 (DivergentUnaryFrag<zext> i16:$src)), (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src) @@ -3416,8 +3409,7 @@ def : GCNPat < // Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24) // The 12s emit 0s. -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (i16 (bswap i16:$a)), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) @@ -3670,8 +3662,7 @@ def : GCNPat < (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1)) @@ -3707,8 +3698,7 @@ def : GCNPat < (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))), (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) @@ -3735,8 +3725,7 @@ def : GCNPat < >; let SubtargetPredicate = HasVOP3PInsts in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0)))) @@ -3766,8 +3755,7 @@ def : GCNPat < (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { // Take the lower 16 bits from each VGPR_32 and concat them def : GCNPat < (vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))), @@ -3838,8 +3826,7 @@ def : GCNPat < >; // Take the upper 16 bits from each VGPR_32 and concat them -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat < (vecTy (DivergentBinFrag<build_vector> (Ty !if(!eq(Ty, i16), @@ -3881,8 +3868,7 @@ def : GCNPat < (v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1)) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (v2f16 (scalar_to_vector f16:$src0)), (COPY $src0) diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 296ce5a..b3fd8c7 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1616,7 +1616,8 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm let isConvergent = 1; } -def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> { + def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", + (ins), "", [(int_amdgcn_s_barrier_leave (i16 srcvalue))] > { let SchedRW = [WriteBarrier]; let simm16 = 0; let fixed_imm = 1; @@ -1624,9 +1625,6 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> { let Defs = [SCC]; } -def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave", - (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>; - def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > { let SubtargetPredicate = isGFX8Plus; let simm16 = 0; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 6230c17..77df721 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1561,8 +1561,7 @@ def : GCNPat < } // End OtherPredicates = [isGFX8Plus] -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let OtherPredicates = [isGFX8Plus, p] in { +let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in { def : GCNPat< (i32 (anyext i16:$src)), (COPY $src) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 37d92bc..30dab55 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < $src) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (and i16:$src0, i16:$src1), (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index e6a7c35..4a2b54d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in { } // End SchedRW = [Write64Bit] } // End isReMaterializable = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat< (i32 (DivergentUnaryFrag<sext> i16:$src)), (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10))) @@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat< (i64 (DivergentUnaryFrag<sext> i16:$src)), (REG_SEQUENCE VReg_64, diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 52ee1e8..5daf860 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>; let True16Predicate = UseRealTrue16Insts in defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_F16_t16>; @@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro } // End isCommutable = 1 defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32_BF16, bf16>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>; let True16Predicate = UseRealTrue16Insts in defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e94220a..2e8a676 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -960,17 +960,3 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, } return false; } - -bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, - unsigned DefSubReg, - const TargetRegisterClass *SrcRC, - unsigned SrcSubReg) const { - // We can't extract an SPR from an arbitary DPR (as opposed to a DPR_VFP2). - if (DefRC == &ARM::SPRRegClass && DefSubReg == 0 && - SrcRC == &ARM::DPRRegClass && - (SrcSubReg == ARM::ssub_0 || SrcSubReg == ARM::ssub_1)) - return false; - - return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, - SrcRC, SrcSubReg); -} diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 5b67b34..03b0fa0 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -158,11 +158,6 @@ public: const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override; - bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, - unsigned DefSubReg, - const TargetRegisterClass *SrcRC, - unsigned SrcSubReg) const override; - int getSEHRegNum(unsigned i) const { return getEncodingValue(i); } }; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index c3ab965..1aefea1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -182,10 +182,113 @@ class XX3Form_XTAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } +class XX3Form_XTAB6_S<bits<5> xo, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : I<59, OOL, IOL, asmstr, NoItinerary> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6...10} = XT{4...0}; + let Inst{11...15} = XA{4...0}; + let Inst{16...20} = XB{4...0}; + let Inst{24...28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_XTAB6_S3<bits<5> xo, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : XX3Form_XTAB6_S<xo, OOL, IOL, asmstr, pattern> { + + bits<3> S; + let Inst{21...23} = S; +} + +class XX3Form_XTAB6_3S1<bits<5> xo, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : XX3Form_XTAB6_S<xo, OOL, IOL, asmstr, pattern> { + + bits<1> S0; + bits<1> S1; + bits<1> S2; + + let Inst{21} = S0; + let Inst{22} = S1; + let Inst{23} = S2; +} + +class XX3Form_XTAB6_2S1<bits<5> xo, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : XX3Form_XTAB6_S<xo, OOL, IOL, asmstr, pattern> { + + bits<1> S1; + bits<1> S2; + + let Inst{21} = 0; + let Inst{22} = S1; + let Inst{23} = S2; +} + +class XX3Form_XTAB6_P<bits<7> xo, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : I<59, OOL, IOL, asmstr, NoItinerary> { + + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<1> P; + + let Pattern = pattern; + + let Inst{6...10} = XT{4...0}; + let Inst{11...15} = XA{4...0}; + let Inst{16...20} = XB{4...0}; + let Inst{21} = P; + let Inst{22...28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +// Prefix instruction classes. + +class 8RR_XX4Form_XTABC6_P<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + bits<1> P; + + let Pattern = pattern; + + // The prefix. + let Inst{6...7} = 1; + let Inst{8...11} = 0; + + // The instruction. + let Inst{38...42} = XT{4...0}; + let Inst{43...47} = XA{4...0}; + let Inst{48...52} = XB{4...0}; + let Inst{53...57} = XC{4...0}; + let Inst{58} = 1; + let Inst{59} = P; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + //-------------------------- Instruction definitions -------------------------// // Predicate combinations available: // [IsISAFuture] // [HasVSX, IsISAFuture] +// [HasVSX, PrefixInstrs, IsISAFuture] let Predicates = [IsISAFuture] in { defm SUBFUS : XOForm_RTAB5_L1r<31, 72, (outs g8rc:$RT), @@ -294,6 +397,78 @@ let Predicates = [HasVSX, IsISAFuture] in { "xvmulhuw $XT, $XA, $XB", []>; def XVMULHUH: XX3Form_XTAB6<60, 122, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmulhuh $XT, $XA, $XB", []>; + + // Elliptic Curve Cryptography Acceleration Instructions. + def XXMULMUL + : XX3Form_XTAB6_S3<1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u3imm:$S), + "xxmulmul $XT, $XA, $XB, $S", []>; + def XXMULMULHIADD + : XX3Form_XTAB6_3S1<9, (outs vsrc:$XT), + (ins vsrc:$XA, vsrc:$XB, u1imm:$S0, u1imm:$S1, + u1imm:$S2), + "xxmulmulhiadd $XT, $XA, $XB, $S0, $S1, $S2", []>; + def XXMULMULLOADD + : XX3Form_XTAB6_2S1<17, (outs vsrc:$XT), + (ins vsrc:$XA, vsrc:$XB, u1imm:$S1, u1imm:$S2), + "xxmulmulloadd $XT, $XA, $XB, $S1, $S2", []>; + def XXSSUMUDM + : XX3Form_XTAB6_P<25, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u1imm:$P), + "xxssumudm $XT, $XA, $XB, $P", []>; + def XXSSUMUDMC + : XX3Form_XTAB6_P<57, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u1imm:$P), + "xxssumudmc $XT, $XA, $XB, $P", []>; + def XSADDADDUQM + : XX3Form_XTAB6<59, 96, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsaddadduqm $XT, $XA, $XB", []>; + def XSADDADDSUQM + : XX3Form_XTAB6<59, 104, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsaddaddsuqm $XT, $XA, $XB", []>; + def XSADDSUBUQM + : XX3Form_XTAB6<59, 112, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsaddsubuqm $XT, $XA, $XB", []>; + def XSADDSUBSUQM + : XX3Form_XTAB6<59, 224, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsaddsubsuqm $XT, $XA, $XB", []>; + def XSMERGE2T1UQM + : XX3Form_XTAB6<59, 232, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsmerge2t1uqm $XT, $XA, $XB", []>; + def XSMERGE2T2UQM + : XX3Form_XTAB6<59, 240, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsmerge2t2uqm $XT, $XA, $XB", []>; + def XSMERGE2T3UQM + : XX3Form_XTAB6<59, 89, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsmerge2t3uqm $XT, $XA, $XB", []>; + def XSMERGE3T1UQM + : XX3Form_XTAB6<59, 121, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsmerge3t1uqm $XT, $XA, $XB", []>; + def XSREBASE2T1UQM + : XX3Form_XTAB6<59, 145, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase2t1uqm $XT, $XA, $XB", []>; + def XSREBASE2T2UQM + : XX3Form_XTAB6<59, 177, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase2t2uqm $XT, $XA, $XB", []>; + def XSREBASE2T3UQM + : XX3Form_XTAB6<59, 209, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase2t3uqm $XT, $XA, $XB", []>; + def XSREBASE2T4UQM + : XX3Form_XTAB6<59, 217, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase2t4uqm $XT, $XA, $XB", []>; + def XSREBASE3T1UQM + : XX3Form_XTAB6<59, 241, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase3t1uqm $XT, $XA, $XB", []>; + def XSREBASE3T2UQM + : XX3Form_XTAB6<59, 249, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase3t2uqm $XT, $XA, $XB", []>; + def XSREBASE3T3UQM + : XX3Form_XTAB6<59, 195, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xsrebase3t3uqm $XT, $XA, $XB", []>; +} + +let Predicates = [HasVSX, PrefixInstrs, IsISAFuture] in { + def XXSSUMUDMCEXT + : 8RR_XX4Form_XTABC6_P< + 34, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC, u1imm:$P), + "xxssumudmcext $XT, $XA, $XB, $XC, $P", IIC_VecGeneral, []>; } //---------------------------- Anonymous Patterns ----------------------------// diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index c2a6e51..b765fec 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -81,6 +81,7 @@ public: void outputExecutionMode(const Module &M); void outputAnnotations(const Module &M); void outputModuleSections(); + void outputFPFastMathDefaultInfo(); bool isHidden() { return MF->getFunction() .getFnAttribute(SPIRV_BACKEND_SERVICE_FUN_NAME) @@ -498,11 +499,27 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { NamedMDNode *Node = M.getNamedMetadata("spirv.ExecutionMode"); if (Node) { for (unsigned i = 0; i < Node->getNumOperands(); i++) { + // If SPV_KHR_float_controls2 is enabled and we find any of + // FPFastMathDefault, ContractionOff or SignedZeroInfNanPreserve execution + // modes, skip it, it'll be done somewhere else. + if (ST->canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2)) { + const auto EM = + cast<ConstantInt>( + cast<ConstantAsMetadata>((Node->getOperand(i))->getOperand(1)) + ->getValue()) + ->getZExtValue(); + if (EM == SPIRV::ExecutionMode::FPFastMathDefault || + EM == SPIRV::ExecutionMode::ContractionOff || + EM == SPIRV::ExecutionMode::SignedZeroInfNanPreserve) + continue; + } + MCInst Inst; Inst.setOpcode(SPIRV::OpExecutionMode); addOpsFromMDNode(cast<MDNode>(Node->getOperand(i)), Inst, MAI); outputMCInst(Inst); } + outputFPFastMathDefaultInfo(); } for (auto FI = M.begin(), E = M.end(); FI != E; ++FI) { const Function &F = *FI; @@ -552,12 +569,84 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { } if (ST->isKernel() && !M.getNamedMetadata("spirv.ExecutionMode") && !M.getNamedMetadata("opencl.enable.FP_CONTRACT")) { - MCInst Inst; - Inst.setOpcode(SPIRV::OpExecutionMode); - Inst.addOperand(MCOperand::createReg(FReg)); - unsigned EM = static_cast<unsigned>(SPIRV::ExecutionMode::ContractionOff); - Inst.addOperand(MCOperand::createImm(EM)); - outputMCInst(Inst); + if (ST->canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2)) { + // When SPV_KHR_float_controls2 is enabled, ContractionOff is + // deprecated. We need to use FPFastMathDefault with the appropriate + // flags instead. Since FPFastMathDefault takes a target type, we need + // to emit it for each floating-point type that exists in the module + // to match the effect of ContractionOff. As of now, there are 3 FP + // types: fp16, fp32 and fp64. + + // We only end up here because there is no "spirv.ExecutionMode" + // metadata, so that means no FPFastMathDefault. Therefore, we only + // need to make sure AllowContract is set to 0, as the rest of flags. + // We still need to emit the OpExecutionMode instruction, otherwise + // it's up to the client API to define the flags. Therefore, we need + // to find the constant with 0 value. + + // Collect the SPIRVTypes for fp16, fp32, and fp64 and the constant of + // type int32 with 0 value to represent the FP Fast Math Mode. + std::vector<const MachineInstr *> SPIRVFloatTypes; + const MachineInstr *ConstZero = nullptr; + for (const MachineInstr *MI : + MAI->getMSInstrs(SPIRV::MB_TypeConstVars)) { + // Skip if the instruction is not OpTypeFloat or OpConstant. + unsigned OpCode = MI->getOpcode(); + if (OpCode != SPIRV::OpTypeFloat && OpCode != SPIRV::OpConstantNull) + continue; + + // Collect the SPIRV type if it's a float. + if (OpCode == SPIRV::OpTypeFloat) { + // Skip if the target type is not fp16, fp32, fp64. + const unsigned OpTypeFloatSize = MI->getOperand(1).getImm(); + if (OpTypeFloatSize != 16 && OpTypeFloatSize != 32 && + OpTypeFloatSize != 64) { + continue; + } + SPIRVFloatTypes.push_back(MI); + } else { + // Check if the constant is int32, if not skip it. + const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); + MachineInstr *TypeMI = MRI.getVRegDef(MI->getOperand(1).getReg()); + if (!TypeMI || TypeMI->getOperand(1).getImm() != 32) + continue; + + ConstZero = MI; + } + } + + // When SPV_KHR_float_controls2 is enabled, ContractionOff is + // deprecated. We need to use FPFastMathDefault with the appropriate + // flags instead. Since FPFastMathDefault takes a target type, we need + // to emit it for each floating-point type that exists in the module + // to match the effect of ContractionOff. As of now, there are 3 FP + // types: fp16, fp32 and fp64. + for (const MachineInstr *MI : SPIRVFloatTypes) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionModeId); + Inst.addOperand(MCOperand::createReg(FReg)); + unsigned EM = + static_cast<unsigned>(SPIRV::ExecutionMode::FPFastMathDefault); + Inst.addOperand(MCOperand::createImm(EM)); + const MachineFunction *MF = MI->getMF(); + MCRegister TypeReg = + MAI->getRegisterAlias(MF, MI->getOperand(0).getReg()); + Inst.addOperand(MCOperand::createReg(TypeReg)); + assert(ConstZero && "There should be a constant zero."); + MCRegister ConstReg = MAI->getRegisterAlias( + ConstZero->getMF(), ConstZero->getOperand(0).getReg()); + Inst.addOperand(MCOperand::createReg(ConstReg)); + outputMCInst(Inst); + } + } else { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + Inst.addOperand(MCOperand::createReg(FReg)); + unsigned EM = + static_cast<unsigned>(SPIRV::ExecutionMode::ContractionOff); + Inst.addOperand(MCOperand::createImm(EM)); + outputMCInst(Inst); + } } } } @@ -606,6 +695,101 @@ void SPIRVAsmPrinter::outputAnnotations(const Module &M) { } } +void SPIRVAsmPrinter::outputFPFastMathDefaultInfo() { + // Collect the SPIRVTypes that are OpTypeFloat and the constants of type + // int32, that might be used as FP Fast Math Mode. + std::vector<const MachineInstr *> SPIRVFloatTypes; + // Hashtable to associate immediate values with the constant holding them. + std::unordered_map<int, const MachineInstr *> ConstMap; + for (const MachineInstr *MI : MAI->getMSInstrs(SPIRV::MB_TypeConstVars)) { + // Skip if the instruction is not OpTypeFloat or OpConstant. + unsigned OpCode = MI->getOpcode(); + if (OpCode != SPIRV::OpTypeFloat && OpCode != SPIRV::OpConstantI && + OpCode != SPIRV::OpConstantNull) + continue; + + // Collect the SPIRV type if it's a float. + if (OpCode == SPIRV::OpTypeFloat) { + SPIRVFloatTypes.push_back(MI); + } else { + // Check if the constant is int32, if not skip it. + const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); + MachineInstr *TypeMI = MRI.getVRegDef(MI->getOperand(1).getReg()); + if (!TypeMI || TypeMI->getOpcode() != SPIRV::OpTypeInt || + TypeMI->getOperand(1).getImm() != 32) + continue; + + if (OpCode == SPIRV::OpConstantI) + ConstMap[MI->getOperand(2).getImm()] = MI; + else + ConstMap[0] = MI; + } + } + + for (const auto &[Func, FPFastMathDefaultInfoVec] : + MAI->FPFastMathDefaultInfoMap) { + if (FPFastMathDefaultInfoVec.empty()) + continue; + + for (const MachineInstr *MI : SPIRVFloatTypes) { + unsigned OpTypeFloatSize = MI->getOperand(1).getImm(); + unsigned Index = SPIRV::FPFastMathDefaultInfoVector:: + computeFPFastMathDefaultInfoVecIndex(OpTypeFloatSize); + assert(Index < FPFastMathDefaultInfoVec.size() && + "Index out of bounds for FPFastMathDefaultInfoVec"); + const auto &FPFastMathDefaultInfo = FPFastMathDefaultInfoVec[Index]; + assert(FPFastMathDefaultInfo.Ty && + "Expected target type for FPFastMathDefaultInfo"); + assert(FPFastMathDefaultInfo.Ty->getScalarSizeInBits() == + OpTypeFloatSize && + "Mismatched float type size"); + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionModeId); + MCRegister FuncReg = MAI->getFuncReg(Func); + assert(FuncReg.isValid()); + Inst.addOperand(MCOperand::createReg(FuncReg)); + Inst.addOperand( + MCOperand::createImm(SPIRV::ExecutionMode::FPFastMathDefault)); + MCRegister TypeReg = + MAI->getRegisterAlias(MI->getMF(), MI->getOperand(0).getReg()); + Inst.addOperand(MCOperand::createReg(TypeReg)); + unsigned Flags = FPFastMathDefaultInfo.FastMathFlags; + if (FPFastMathDefaultInfo.ContractionOff && + (Flags & SPIRV::FPFastMathMode::AllowContract)) + report_fatal_error( + "Conflicting FPFastMathFlags: ContractionOff and AllowContract"); + + if (FPFastMathDefaultInfo.SignedZeroInfNanPreserve && + !(Flags & + (SPIRV::FPFastMathMode::NotNaN | SPIRV::FPFastMathMode::NotInf | + SPIRV::FPFastMathMode::NSZ))) { + if (FPFastMathDefaultInfo.FPFastMathDefault) + report_fatal_error("Conflicting FPFastMathFlags: " + "SignedZeroInfNanPreserve but at least one of " + "NotNaN/NotInf/NSZ is enabled."); + } + + // Don't emit if none of the execution modes was used. + if (Flags == SPIRV::FPFastMathMode::None && + !FPFastMathDefaultInfo.ContractionOff && + !FPFastMathDefaultInfo.SignedZeroInfNanPreserve && + !FPFastMathDefaultInfo.FPFastMathDefault) + continue; + + // Retrieve the constant instruction for the immediate value. + auto It = ConstMap.find(Flags); + if (It == ConstMap.end()) + report_fatal_error("Expected constant instruction for FP Fast Math " + "Mode operand of FPFastMathDefault execution mode."); + const MachineInstr *ConstMI = It->second; + MCRegister ConstReg = MAI->getRegisterAlias( + ConstMI->getMF(), ConstMI->getOperand(0).getReg()); + Inst.addOperand(MCOperand::createReg(ConstReg)); + outputMCInst(Inst); + } + } +} + void SPIRVAsmPrinter::outputModuleSections() { const Module *M = MMI->getModule(); // Get the global subtarget to output module-level info. @@ -614,7 +798,8 @@ void SPIRVAsmPrinter::outputModuleSections() { MAI = &SPIRVModuleAnalysis::MAI; assert(ST && TII && MAI && M && "Module analysis is required"); // Output instructions according to the Logical Layout of a Module: - // 1,2. All OpCapability instructions, then optional OpExtension instructions. + // 1,2. All OpCapability instructions, then optional OpExtension + // instructions. outputGlobalRequirements(); // 3. Optional OpExtInstImport instructions. outputOpExtInstImports(*M); @@ -622,7 +807,8 @@ void SPIRVAsmPrinter::outputModuleSections() { outputOpMemoryModel(); // 5. All entry point declarations, using OpEntryPoint. outputEntryPoints(); - // 6. Execution-mode declarations, using OpExecutionMode or OpExecutionModeId. + // 6. Execution-mode declarations, using OpExecutionMode or + // OpExecutionModeId. outputExecutionMode(*M); // 7a. Debug: all OpString, OpSourceExtension, OpSource, and // OpSourceContinued, without forward references. diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index f704d3a..0e0c454 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -1162,11 +1162,24 @@ static unsigned getNumSizeComponents(SPIRVType *imgType) { static bool generateExtInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, - SPIRVGlobalRegistry *GR) { + SPIRVGlobalRegistry *GR, const CallBase &CB) { // Lookup the extended instruction number in the TableGen records. const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; uint32_t Number = SPIRV::lookupExtendedBuiltin(Builtin->Name, Builtin->Set)->Number; + // fmin_common and fmax_common are now deprecated, and we should use fmin and + // fmax with NotInf and NotNaN flags instead. Keep original number to add + // later the NoNans and NoInfs flags. + uint32_t OrigNumber = Number; + const SPIRVSubtarget &ST = + cast<SPIRVSubtarget>(MIRBuilder.getMF().getSubtarget()); + if (ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2) && + (Number == SPIRV::OpenCLExtInst::fmin_common || + Number == SPIRV::OpenCLExtInst::fmax_common)) { + Number = (Number == SPIRV::OpenCLExtInst::fmin_common) + ? SPIRV::OpenCLExtInst::fmin + : SPIRV::OpenCLExtInst::fmax; + } // Build extended instruction. auto MIB = @@ -1178,6 +1191,13 @@ static bool generateExtInst(const SPIRV::IncomingCall *Call, for (auto Argument : Call->Arguments) MIB.addUse(Argument); + MIB.getInstr()->copyIRFlags(CB); + if (OrigNumber == SPIRV::OpenCLExtInst::fmin_common || + OrigNumber == SPIRV::OpenCLExtInst::fmax_common) { + // Add NoNans and NoInfs flags to fmin/fmax instruction. + MIB.getInstr()->setFlag(MachineInstr::MIFlag::FmNoNans); + MIB.getInstr()->setFlag(MachineInstr::MIFlag::FmNoInfs); + } return true; } @@ -2908,7 +2928,7 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, MachineIRBuilder &MIRBuilder, const Register OrigRet, const Type *OrigRetTy, const SmallVectorImpl<Register> &Args, - SPIRVGlobalRegistry *GR) { + SPIRVGlobalRegistry *GR, const CallBase &CB) { LLVM_DEBUG(dbgs() << "Lowering builtin call: " << DemangledCall << "\n"); // Lookup the builtin in the TableGen records. @@ -2931,7 +2951,7 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, // Match the builtin with implementation based on the grouping. switch (Call->Builtin->Group) { case SPIRV::Extended: - return generateExtInst(Call.get(), MIRBuilder, GR); + return generateExtInst(Call.get(), MIRBuilder, GR, CB); case SPIRV::Relational: return generateRelationalInst(Call.get(), MIRBuilder, GR); case SPIRV::Group: diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.h b/llvm/lib/Target/SPIRV/SPIRVBuiltins.h index 1a8641a..f6a5234 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.h +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.h @@ -39,7 +39,7 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, MachineIRBuilder &MIRBuilder, const Register OrigRet, const Type *OrigRetTy, const SmallVectorImpl<Register> &Args, - SPIRVGlobalRegistry *GR); + SPIRVGlobalRegistry *GR, const CallBase &CB); /// Helper function for finding a builtin function attributes /// by a demangled function name. Defined in SPIRVBuiltins.cpp. diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index a412887..1a7c02c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -641,9 +641,9 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, GR->getPointerSize())); } } - if (auto Res = - SPIRV::lowerBuiltin(DemangledName, ST->getPreferredInstructionSet(), - MIRBuilder, ResVReg, OrigRetTy, ArgVRegs, GR)) + if (auto Res = SPIRV::lowerBuiltin( + DemangledName, ST->getPreferredInstructionSet(), MIRBuilder, + ResVReg, OrigRetTy, ArgVRegs, GR, *Info.CB)) return *Res; } diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 704edd3..9f2e075 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/TypedPointerType.h" #include "llvm/Transforms/Utils/Local.h" +#include <cassert> #include <queue> #include <unordered_set> @@ -152,6 +153,7 @@ class SPIRVEmitIntrinsics void insertPtrCastOrAssignTypeInstr(Instruction *I, IRBuilder<> &B); bool shouldTryToAddMemAliasingDecoration(Instruction *Inst); void insertSpirvDecorations(Instruction *I, IRBuilder<> &B); + void insertConstantsForFPFastMathDefault(Module &M); void processGlobalValue(GlobalVariable &GV, IRBuilder<> &B); void processParamTypes(Function *F, IRBuilder<> &B); void processParamTypesByFunHeader(Function *F, IRBuilder<> &B); @@ -2249,6 +2251,198 @@ void SPIRVEmitIntrinsics::insertSpirvDecorations(Instruction *I, } } +static SPIRV::FPFastMathDefaultInfoVector &getOrCreateFPFastMathDefaultInfoVec( + const Module &M, + DenseMap<Function *, SPIRV::FPFastMathDefaultInfoVector> + &FPFastMathDefaultInfoMap, + Function *F) { + auto it = FPFastMathDefaultInfoMap.find(F); + if (it != FPFastMathDefaultInfoMap.end()) + return it->second; + + // If the map does not contain the entry, create a new one. Initialize it to + // contain all 3 elements sorted by bit width of target type: {half, float, + // double}. + SPIRV::FPFastMathDefaultInfoVector FPFastMathDefaultInfoVec; + FPFastMathDefaultInfoVec.emplace_back(Type::getHalfTy(M.getContext()), + SPIRV::FPFastMathMode::None); + FPFastMathDefaultInfoVec.emplace_back(Type::getFloatTy(M.getContext()), + SPIRV::FPFastMathMode::None); + FPFastMathDefaultInfoVec.emplace_back(Type::getDoubleTy(M.getContext()), + SPIRV::FPFastMathMode::None); + return FPFastMathDefaultInfoMap[F] = std::move(FPFastMathDefaultInfoVec); +} + +static SPIRV::FPFastMathDefaultInfo &getFPFastMathDefaultInfo( + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec, + const Type *Ty) { + size_t BitWidth = Ty->getScalarSizeInBits(); + int Index = + SPIRV::FPFastMathDefaultInfoVector::computeFPFastMathDefaultInfoVecIndex( + BitWidth); + assert(Index >= 0 && Index < 3 && + "Expected FPFastMathDefaultInfo for half, float, or double"); + assert(FPFastMathDefaultInfoVec.size() == 3 && + "Expected FPFastMathDefaultInfoVec to have exactly 3 elements"); + return FPFastMathDefaultInfoVec[Index]; +} + +void SPIRVEmitIntrinsics::insertConstantsForFPFastMathDefault(Module &M) { + const SPIRVSubtarget *ST = TM->getSubtargetImpl(); + if (!ST->canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2)) + return; + + // Store the FPFastMathDefaultInfo in the FPFastMathDefaultInfoMap. + // We need the entry point (function) as the key, and the target + // type and flags as the value. + // We also need to check ContractionOff and SignedZeroInfNanPreserve + // execution modes, as they are now deprecated and must be replaced + // with FPFastMathDefaultInfo. + auto Node = M.getNamedMetadata("spirv.ExecutionMode"); + if (!Node) { + if (!M.getNamedMetadata("opencl.enable.FP_CONTRACT")) { + // This requires emitting ContractionOff. However, because + // ContractionOff is now deprecated, we need to replace it with + // FPFastMathDefaultInfo with FP Fast Math Mode bitmask set to all 0. + // We need to create the constant for that. + + // Create constant instruction with the bitmask flags. + Constant *InitValue = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + // TODO: Reuse constant if there is one already with the required + // value. + [[maybe_unused]] GlobalVariable *GV = + new GlobalVariable(M, // Module + Type::getInt32Ty(M.getContext()), // Type + true, // isConstant + GlobalValue::InternalLinkage, // Linkage + InitValue // Initializer + ); + } + return; + } + + // The table maps function pointers to their default FP fast math info. It + // can be assumed that the SmallVector is sorted by the bit width of the + // type. The first element is the smallest bit width, and the last element + // is the largest bit width, therefore, we will have {half, float, double} + // in the order of their bit widths. + DenseMap<Function *, SPIRV::FPFastMathDefaultInfoVector> + FPFastMathDefaultInfoMap; + + for (unsigned i = 0; i < Node->getNumOperands(); i++) { + MDNode *MDN = cast<MDNode>(Node->getOperand(i)); + assert(MDN->getNumOperands() >= 2 && "Expected at least 2 operands"); + Function *F = cast<Function>( + cast<ConstantAsMetadata>(MDN->getOperand(0))->getValue()); + const auto EM = + cast<ConstantInt>( + cast<ConstantAsMetadata>(MDN->getOperand(1))->getValue()) + ->getZExtValue(); + if (EM == SPIRV::ExecutionMode::FPFastMathDefault) { + assert(MDN->getNumOperands() == 4 && + "Expected 4 operands for FPFastMathDefault"); + const Type *T = cast<ValueAsMetadata>(MDN->getOperand(2))->getType(); + unsigned Flags = + cast<ConstantInt>( + cast<ConstantAsMetadata>(MDN->getOperand(3))->getValue()) + ->getZExtValue(); + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec = + getOrCreateFPFastMathDefaultInfoVec(M, FPFastMathDefaultInfoMap, F); + SPIRV::FPFastMathDefaultInfo &Info = + getFPFastMathDefaultInfo(FPFastMathDefaultInfoVec, T); + Info.FastMathFlags = Flags; + Info.FPFastMathDefault = true; + } else if (EM == SPIRV::ExecutionMode::ContractionOff) { + assert(MDN->getNumOperands() == 2 && + "Expected no operands for ContractionOff"); + + // We need to save this info for every possible FP type, i.e. {half, + // float, double, fp128}. + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec = + getOrCreateFPFastMathDefaultInfoVec(M, FPFastMathDefaultInfoMap, F); + for (SPIRV::FPFastMathDefaultInfo &Info : FPFastMathDefaultInfoVec) { + Info.ContractionOff = true; + } + } else if (EM == SPIRV::ExecutionMode::SignedZeroInfNanPreserve) { + assert(MDN->getNumOperands() == 3 && + "Expected 1 operand for SignedZeroInfNanPreserve"); + unsigned TargetWidth = + cast<ConstantInt>( + cast<ConstantAsMetadata>(MDN->getOperand(2))->getValue()) + ->getZExtValue(); + // We need to save this info only for the FP type with TargetWidth. + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec = + getOrCreateFPFastMathDefaultInfoVec(M, FPFastMathDefaultInfoMap, F); + int Index = SPIRV::FPFastMathDefaultInfoVector:: + computeFPFastMathDefaultInfoVecIndex(TargetWidth); + assert(Index >= 0 && Index < 3 && + "Expected FPFastMathDefaultInfo for half, float, or double"); + assert(FPFastMathDefaultInfoVec.size() == 3 && + "Expected FPFastMathDefaultInfoVec to have exactly 3 elements"); + FPFastMathDefaultInfoVec[Index].SignedZeroInfNanPreserve = true; + } + } + + std::unordered_map<unsigned, GlobalVariable *> GlobalVars; + for (auto &[Func, FPFastMathDefaultInfoVec] : FPFastMathDefaultInfoMap) { + if (FPFastMathDefaultInfoVec.empty()) + continue; + + for (const SPIRV::FPFastMathDefaultInfo &Info : FPFastMathDefaultInfoVec) { + assert(Info.Ty && "Expected target type for FPFastMathDefaultInfo"); + // Skip if none of the execution modes was used. + unsigned Flags = Info.FastMathFlags; + if (Flags == SPIRV::FPFastMathMode::None && !Info.ContractionOff && + !Info.SignedZeroInfNanPreserve && !Info.FPFastMathDefault) + continue; + + // Check if flags are compatible. + if (Info.ContractionOff && (Flags & SPIRV::FPFastMathMode::AllowContract)) + report_fatal_error("Conflicting FPFastMathFlags: ContractionOff " + "and AllowContract"); + + if (Info.SignedZeroInfNanPreserve && + !(Flags & + (SPIRV::FPFastMathMode::NotNaN | SPIRV::FPFastMathMode::NotInf | + SPIRV::FPFastMathMode::NSZ))) { + if (Info.FPFastMathDefault) + report_fatal_error("Conflicting FPFastMathFlags: " + "SignedZeroInfNanPreserve but at least one of " + "NotNaN/NotInf/NSZ is enabled."); + } + + if ((Flags & SPIRV::FPFastMathMode::AllowTransform) && + !((Flags & SPIRV::FPFastMathMode::AllowReassoc) && + (Flags & SPIRV::FPFastMathMode::AllowContract))) { + report_fatal_error("Conflicting FPFastMathFlags: " + "AllowTransform requires AllowReassoc and " + "AllowContract to be set."); + } + + auto it = GlobalVars.find(Flags); + GlobalVariable *GV = nullptr; + if (it != GlobalVars.end()) { + // Reuse existing global variable. + GV = it->second; + } else { + // Create constant instruction with the bitmask flags. + Constant *InitValue = + ConstantInt::get(Type::getInt32Ty(M.getContext()), Flags); + // TODO: Reuse constant if there is one already with the required + // value. + GV = new GlobalVariable(M, // Module + Type::getInt32Ty(M.getContext()), // Type + true, // isConstant + GlobalValue::InternalLinkage, // Linkage + InitValue // Initializer + ); + GlobalVars[Flags] = GV; + } + } + } +} + void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, IRBuilder<> &B) { auto *II = dyn_cast<IntrinsicInst>(I); @@ -2569,9 +2763,9 @@ GetElementPtrInst * SPIRVEmitIntrinsics::simplifyZeroLengthArrayGepInst(GetElementPtrInst *GEP) { // getelementptr [0 x T], P, 0 (zero), I -> getelementptr T, P, I. // If type is 0-length array and first index is 0 (zero), drop both the - // 0-length array type and the first index. This is a common pattern in the - // IR, e.g. when using a zero-length array as a placeholder for a flexible - // array such as unbound arrays. + // 0-length array type and the first index. This is a common pattern in + // the IR, e.g. when using a zero-length array as a placeholder for a + // flexible array such as unbound arrays. assert(GEP && "GEP is null"); Type *SrcTy = GEP->getSourceElementType(); SmallVector<Value *, 8> Indices(GEP->indices()); @@ -2633,8 +2827,9 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { processParamTypesByFunHeader(CurrF, B); - // StoreInst's operand type can be changed during the next transformations, - // so we need to store it in the set. Also store already transformed types. + // StoreInst's operand type can be changed during the next + // transformations, so we need to store it in the set. Also store already + // transformed types. for (auto &I : instructions(Func)) { StoreInst *SI = dyn_cast<StoreInst>(&I); if (!SI) @@ -2681,8 +2876,8 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { for (auto &I : llvm::reverse(instructions(Func))) deduceOperandElementType(&I, &IncompleteRets); - // Pass forward for PHIs only, their operands are not preceed the instruction - // in meaning of `instructions(Func)`. + // Pass forward for PHIs only, their operands are not preceed the + // instruction in meaning of `instructions(Func)`. for (BasicBlock &BB : Func) for (PHINode &Phi : BB.phis()) if (isPointerTy(Phi.getType())) @@ -2692,8 +2887,8 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { TrackConstants = true; if (!I->getType()->isVoidTy() || isa<StoreInst>(I)) setInsertPointAfterDef(B, I); - // Visitors return either the original/newly created instruction for further - // processing, nullptr otherwise. + // Visitors return either the original/newly created instruction for + // further processing, nullptr otherwise. I = visit(*I); if (!I) continue; @@ -2816,6 +3011,7 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) { bool Changed = false; parseFunDeclarations(M); + insertConstantsForFPFastMathDefault(M); TodoType.clear(); for (auto &F : M) diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 115766c..6fd1c7e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -806,7 +806,7 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( // arguments. MDNode *GVarMD = nullptr; if (GVar && (GVarMD = GVar->getMetadata("spirv.Decorations")) != nullptr) - buildOpSpirvDecorations(Reg, MIRBuilder, GVarMD); + buildOpSpirvDecorations(Reg, MIRBuilder, GVarMD, ST); return Reg; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index 45e88fc..ba95ad8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -132,7 +132,8 @@ bool SPIRVInstrInfo::isHeaderInstr(const MachineInstr &MI) const { } } -bool SPIRVInstrInfo::canUseFastMathFlags(const MachineInstr &MI) const { +bool SPIRVInstrInfo::canUseFastMathFlags(const MachineInstr &MI, + bool KHRFloatControls2) const { switch (MI.getOpcode()) { case SPIRV::OpFAddS: case SPIRV::OpFSubS: @@ -146,6 +147,24 @@ bool SPIRVInstrInfo::canUseFastMathFlags(const MachineInstr &MI) const { case SPIRV::OpFRemV: case SPIRV::OpFMod: return true; + case SPIRV::OpFNegateV: + case SPIRV::OpFNegate: + case SPIRV::OpOrdered: + case SPIRV::OpUnordered: + case SPIRV::OpFOrdEqual: + case SPIRV::OpFOrdNotEqual: + case SPIRV::OpFOrdLessThan: + case SPIRV::OpFOrdLessThanEqual: + case SPIRV::OpFOrdGreaterThan: + case SPIRV::OpFOrdGreaterThanEqual: + case SPIRV::OpFUnordEqual: + case SPIRV::OpFUnordNotEqual: + case SPIRV::OpFUnordLessThan: + case SPIRV::OpFUnordLessThanEqual: + case SPIRV::OpFUnordGreaterThan: + case SPIRV::OpFUnordGreaterThanEqual: + case SPIRV::OpExtInst: + return KHRFloatControls2 ? true : false; default: return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h index 72d2243..4de9d6a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -36,7 +36,8 @@ public: bool isTypeDeclInstr(const MachineInstr &MI) const; bool isDecorationInstr(const MachineInstr &MI) const; bool isAliasingInstr(const MachineInstr &MI) const; - bool canUseFastMathFlags(const MachineInstr &MI) const; + bool canUseFastMathFlags(const MachineInstr &MI, + bool KHRFloatControls2) const; bool canUseNSW(const MachineInstr &MI) const; bool canUseNUW(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 1aadd9d..273edf3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -1073,7 +1073,8 @@ bool SPIRVInstructionSelector::selectExtInst(Register ResVReg, .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addImm(static_cast<uint32_t>(Set)) - .addImm(Opcode); + .addImm(Opcode) + .setMIFlags(I.getFlags()); const unsigned NumOps = I.getNumOperands(); unsigned Index = 1; if (Index < NumOps && @@ -2629,6 +2630,7 @@ bool SPIRVInstructionSelector::selectCmp(Register ResVReg, .addUse(GR.getSPIRVTypeID(ResType)) .addUse(Cmp0) .addUse(Cmp1) + .setMIFlags(I.getFlags()) .constrainAllUses(TII, TRI, RBI); } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index bc159d5..dc717a6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -248,6 +248,22 @@ static InstrSignature instrToSignature(const MachineInstr &MI, Register DefReg; InstrSignature Signature{MI.getOpcode()}; for (unsigned i = 0; i < MI.getNumOperands(); ++i) { + // The only decorations that can be applied more than once to a given <id> + // or structure member are UserSemantic(5635), CacheControlLoadINTEL (6442), + // and CacheControlStoreINTEL (6443). For all the rest of decorations, we + // will only add to the signature the Opcode, the id to which it applies, + // and the decoration id, disregarding any decoration flags. This will + // ensure that any subsequent decoration with the same id will be deemed as + // a duplicate. Then, at the call site, we will be able to handle duplicates + // in the best way. + unsigned Opcode = MI.getOpcode(); + if ((Opcode == SPIRV::OpDecorate) && i >= 2) { + unsigned DecorationID = MI.getOperand(1).getImm(); + if (DecorationID != SPIRV::Decoration::UserSemantic && + DecorationID != SPIRV::Decoration::CacheControlLoadINTEL && + DecorationID != SPIRV::Decoration::CacheControlStoreINTEL) + continue; + } const MachineOperand &MO = MI.getOperand(i); size_t h; if (MO.isReg()) { @@ -559,8 +575,54 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, MAI.setSkipEmission(&MI); InstrSignature MISign = instrToSignature(MI, MAI, true); auto FoundMI = IS.insert(std::move(MISign)); - if (!FoundMI.second) + if (!FoundMI.second) { + if (MI.getOpcode() == SPIRV::OpDecorate) { + assert(MI.getNumOperands() >= 2 && + "Decoration instructions must have at least 2 operands"); + assert(MSType == SPIRV::MB_Annotations && + "Only OpDecorate instructions can be duplicates"); + // For FPFastMathMode decoration, we need to merge the flags of the + // duplicate decoration with the original one, so we need to find the + // original instruction that has the same signature. For the rest of + // instructions, we will simply skip the duplicate. + if (MI.getOperand(1).getImm() != SPIRV::Decoration::FPFastMathMode) + return; // Skip duplicates of other decorations. + + const SPIRV::InstrList &Decorations = MAI.MS[MSType]; + for (const MachineInstr *OrigMI : Decorations) { + if (instrToSignature(*OrigMI, MAI, true) == MISign) { + assert(OrigMI->getNumOperands() == MI.getNumOperands() && + "Original instruction must have the same number of operands"); + assert( + OrigMI->getNumOperands() == 3 && + "FPFastMathMode decoration must have 3 operands for OpDecorate"); + unsigned OrigFlags = OrigMI->getOperand(2).getImm(); + unsigned NewFlags = MI.getOperand(2).getImm(); + if (OrigFlags == NewFlags) + return; // No need to merge, the flags are the same. + + // Emit warning about possible conflict between flags. + unsigned FinalFlags = OrigFlags | NewFlags; + llvm::errs() + << "Warning: Conflicting FPFastMathMode decoration flags " + "in instruction: " + << *OrigMI << "Original flags: " << OrigFlags + << ", new flags: " << NewFlags + << ". They will be merged on a best effort basis, but not " + "validated. Final flags: " + << FinalFlags << "\n"; + MachineInstr *OrigMINonConst = const_cast<MachineInstr *>(OrigMI); + MachineOperand &OrigFlagsOp = OrigMINonConst->getOperand(2); + OrigFlagsOp = + MachineOperand::CreateImm(static_cast<unsigned>(FinalFlags)); + return; // Merge done, so we found a duplicate; don't add it to MAI.MS + } + } + assert(false && "No original instruction found for the duplicate " + "OpDecorate, but we found one in IS."); + } return; // insert failed, so we found a duplicate; don't add it to MAI.MS + } // No duplicates, so add it. if (Append) MAI.MS[MSType].push_back(&MI); @@ -934,6 +996,11 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex, } else if (Dec == SPIRV::Decoration::FPMaxErrorDecorationINTEL) { Reqs.addRequirements(SPIRV::Capability::FPMaxErrorINTEL); Reqs.addExtension(SPIRV::Extension::SPV_INTEL_fp_max_error); + } else if (Dec == SPIRV::Decoration::FPFastMathMode) { + if (ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2)) { + Reqs.addRequirements(SPIRV::Capability::FloatControls2); + Reqs.addExtension(SPIRV::Extension::SPV_KHR_float_controls2); + } } } @@ -1994,10 +2061,13 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, // Collect requirements for OpExecutionMode instructions. auto Node = M.getNamedMetadata("spirv.ExecutionMode"); if (Node) { - bool RequireFloatControls = false, RequireFloatControls2 = false, + bool RequireFloatControls = false, RequireIntelFloatControls2 = false, + RequireKHRFloatControls2 = false, VerLower14 = !ST.isAtLeastSPIRVVer(VersionTuple(1, 4)); - bool HasFloatControls2 = + bool HasIntelFloatControls2 = ST.canUseExtension(SPIRV::Extension::SPV_INTEL_float_controls2); + bool HasKHRFloatControls2 = + ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2); for (unsigned i = 0; i < Node->getNumOperands(); i++) { MDNode *MDN = cast<MDNode>(Node->getOperand(i)); const MDOperand &MDOp = MDN->getOperand(1); @@ -2010,7 +2080,6 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, switch (EM) { case SPIRV::ExecutionMode::DenormPreserve: case SPIRV::ExecutionMode::DenormFlushToZero: - case SPIRV::ExecutionMode::SignedZeroInfNanPreserve: case SPIRV::ExecutionMode::RoundingModeRTE: case SPIRV::ExecutionMode::RoundingModeRTZ: RequireFloatControls = VerLower14; @@ -2021,8 +2090,28 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, case SPIRV::ExecutionMode::RoundingModeRTNINTEL: case SPIRV::ExecutionMode::FloatingPointModeALTINTEL: case SPIRV::ExecutionMode::FloatingPointModeIEEEINTEL: - if (HasFloatControls2) { - RequireFloatControls2 = true; + if (HasIntelFloatControls2) { + RequireIntelFloatControls2 = true; + MAI.Reqs.getAndAddRequirements( + SPIRV::OperandCategory::ExecutionModeOperand, EM, ST); + } + break; + case SPIRV::ExecutionMode::FPFastMathDefault: { + if (HasKHRFloatControls2) { + RequireKHRFloatControls2 = true; + MAI.Reqs.getAndAddRequirements( + SPIRV::OperandCategory::ExecutionModeOperand, EM, ST); + } + break; + } + case SPIRV::ExecutionMode::ContractionOff: + case SPIRV::ExecutionMode::SignedZeroInfNanPreserve: + if (HasKHRFloatControls2) { + RequireKHRFloatControls2 = true; + MAI.Reqs.getAndAddRequirements( + SPIRV::OperandCategory::ExecutionModeOperand, + SPIRV::ExecutionMode::FPFastMathDefault, ST); + } else { MAI.Reqs.getAndAddRequirements( SPIRV::OperandCategory::ExecutionModeOperand, EM, ST); } @@ -2037,8 +2126,10 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, if (RequireFloatControls && ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls)) MAI.Reqs.addExtension(SPIRV::Extension::SPV_KHR_float_controls); - if (RequireFloatControls2) + if (RequireIntelFloatControls2) MAI.Reqs.addExtension(SPIRV::Extension::SPV_INTEL_float_controls2); + if (RequireKHRFloatControls2) + MAI.Reqs.addExtension(SPIRV::Extension::SPV_KHR_float_controls2); } for (auto FI = M.begin(), E = M.end(); FI != E; ++FI) { const Function &F = *FI; @@ -2078,8 +2169,11 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, } } -static unsigned getFastMathFlags(const MachineInstr &I) { +static unsigned getFastMathFlags(const MachineInstr &I, + const SPIRVSubtarget &ST) { unsigned Flags = SPIRV::FPFastMathMode::None; + bool CanUseKHRFloatControls2 = + ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2); if (I.getFlag(MachineInstr::MIFlag::FmNoNans)) Flags |= SPIRV::FPFastMathMode::NotNaN; if (I.getFlag(MachineInstr::MIFlag::FmNoInfs)) @@ -2088,12 +2182,45 @@ static unsigned getFastMathFlags(const MachineInstr &I) { Flags |= SPIRV::FPFastMathMode::NSZ; if (I.getFlag(MachineInstr::MIFlag::FmArcp)) Flags |= SPIRV::FPFastMathMode::AllowRecip; - if (I.getFlag(MachineInstr::MIFlag::FmReassoc)) - Flags |= SPIRV::FPFastMathMode::Fast; + if (I.getFlag(MachineInstr::MIFlag::FmContract) && CanUseKHRFloatControls2) + Flags |= SPIRV::FPFastMathMode::AllowContract; + if (I.getFlag(MachineInstr::MIFlag::FmReassoc)) { + if (CanUseKHRFloatControls2) + // LLVM reassoc maps to SPIRV transform, see + // https://github.com/KhronosGroup/SPIRV-Registry/issues/326 for details. + // Because we are enabling AllowTransform, we must enable AllowReassoc and + // AllowContract too, as required by SPIRV spec. Also, we used to map + // MIFlag::FmReassoc to FPFastMathMode::Fast, which now should instead by + // replaced by turning all the other bits instead. Therefore, we're + // enabling every bit here except None and Fast. + Flags |= SPIRV::FPFastMathMode::NotNaN | SPIRV::FPFastMathMode::NotInf | + SPIRV::FPFastMathMode::NSZ | SPIRV::FPFastMathMode::AllowRecip | + SPIRV::FPFastMathMode::AllowTransform | + SPIRV::FPFastMathMode::AllowReassoc | + SPIRV::FPFastMathMode::AllowContract; + else + Flags |= SPIRV::FPFastMathMode::Fast; + } + + if (CanUseKHRFloatControls2) { + // Error out if SPIRV::FPFastMathMode::Fast is enabled. + assert(!(Flags & SPIRV::FPFastMathMode::Fast) && + "SPIRV::FPFastMathMode::Fast is deprecated and should not be used " + "anymore."); + + // Error out if AllowTransform is enabled without AllowReassoc and + // AllowContract. + assert((!(Flags & SPIRV::FPFastMathMode::AllowTransform) || + ((Flags & SPIRV::FPFastMathMode::AllowReassoc && + Flags & SPIRV::FPFastMathMode::AllowContract))) && + "SPIRV::FPFastMathMode::AllowTransform requires AllowReassoc and " + "AllowContract flags to be enabled as well."); + } + return Flags; } -static bool isFastMathMathModeAvailable(const SPIRVSubtarget &ST) { +static bool isFastMathModeAvailable(const SPIRVSubtarget &ST) { if (ST.isKernel()) return true; if (ST.getSPIRVVersion() < VersionTuple(1, 2)) @@ -2101,9 +2228,10 @@ static bool isFastMathMathModeAvailable(const SPIRVSubtarget &ST) { return ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2); } -static void handleMIFlagDecoration(MachineInstr &I, const SPIRVSubtarget &ST, - const SPIRVInstrInfo &TII, - SPIRV::RequirementHandler &Reqs) { +static void handleMIFlagDecoration( + MachineInstr &I, const SPIRVSubtarget &ST, const SPIRVInstrInfo &TII, + SPIRV::RequirementHandler &Reqs, const SPIRVGlobalRegistry *GR, + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec) { if (I.getFlag(MachineInstr::MIFlag::NoSWrap) && TII.canUseNSW(I) && getSymbolicOperandRequirements(SPIRV::OperandCategory::DecorationOperand, SPIRV::Decoration::NoSignedWrap, ST, Reqs) @@ -2119,13 +2247,53 @@ static void handleMIFlagDecoration(MachineInstr &I, const SPIRVSubtarget &ST, buildOpDecorate(I.getOperand(0).getReg(), I, TII, SPIRV::Decoration::NoUnsignedWrap, {}); } - if (!TII.canUseFastMathFlags(I)) - return; - unsigned FMFlags = getFastMathFlags(I); - if (FMFlags == SPIRV::FPFastMathMode::None) + if (!TII.canUseFastMathFlags( + I, ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2))) return; - if (isFastMathMathModeAvailable(ST)) { + unsigned FMFlags = getFastMathFlags(I, ST); + if (FMFlags == SPIRV::FPFastMathMode::None) { + // We also need to check if any FPFastMathDefault info was set for the + // types used in this instruction. + if (FPFastMathDefaultInfoVec.empty()) + return; + + // There are three types of instructions that can use fast math flags: + // 1. Arithmetic instructions (FAdd, FMul, FSub, FDiv, FRem, etc.) + // 2. Relational instructions (FCmp, FOrd, FUnord, etc.) + // 3. Extended instructions (ExtInst) + // For arithmetic instructions, the floating point type can be in the + // result type or in the operands, but they all must be the same. + // For the relational and logical instructions, the floating point type + // can only be in the operands 1 and 2, not the result type. Also, the + // operands must have the same type. For the extended instructions, the + // floating point type can be in the result type or in the operands. It's + // unclear if the operands and the result type must be the same. Let's + // assume they must be. Therefore, for 1. and 2., we can check the first + // operand type, and for 3. we can check the result type. + assert(I.getNumOperands() >= 3 && "Expected at least 3 operands"); + Register ResReg = I.getOpcode() == SPIRV::OpExtInst + ? I.getOperand(1).getReg() + : I.getOperand(2).getReg(); + SPIRVType *ResType = GR->getSPIRVTypeForVReg(ResReg, I.getMF()); + const Type *Ty = GR->getTypeForSPIRVType(ResType); + Ty = Ty->isVectorTy() ? cast<VectorType>(Ty)->getElementType() : Ty; + + // Match instruction type with the FPFastMathDefaultInfoVec. + bool Emit = false; + for (SPIRV::FPFastMathDefaultInfo &Elem : FPFastMathDefaultInfoVec) { + if (Ty == Elem.Ty) { + FMFlags = Elem.FastMathFlags; + Emit = Elem.ContractionOff || Elem.SignedZeroInfNanPreserve || + Elem.FPFastMathDefault; + break; + } + } + + if (FMFlags == SPIRV::FPFastMathMode::None && !Emit) + return; + } + if (isFastMathModeAvailable(ST)) { Register DstReg = I.getOperand(0).getReg(); buildOpDecorate(DstReg, I, TII, SPIRV::Decoration::FPFastMathMode, {FMFlags}); @@ -2135,14 +2303,17 @@ static void handleMIFlagDecoration(MachineInstr &I, const SPIRVSubtarget &ST, // Walk all functions and add decorations related to MI flags. static void addDecorations(const Module &M, const SPIRVInstrInfo &TII, MachineModuleInfo *MMI, const SPIRVSubtarget &ST, - SPIRV::ModuleAnalysisInfo &MAI) { + SPIRV::ModuleAnalysisInfo &MAI, + const SPIRVGlobalRegistry *GR) { for (auto F = M.begin(), E = M.end(); F != E; ++F) { MachineFunction *MF = MMI->getMachineFunction(*F); if (!MF) continue; + for (auto &MBB : *MF) for (auto &MI : MBB) - handleMIFlagDecoration(MI, ST, TII, MAI.Reqs); + handleMIFlagDecoration(MI, ST, TII, MAI.Reqs, GR, + MAI.FPFastMathDefaultInfoMap[&(*F)]); } } @@ -2188,6 +2359,111 @@ static void patchPhis(const Module &M, SPIRVGlobalRegistry *GR, } } +static SPIRV::FPFastMathDefaultInfoVector &getOrCreateFPFastMathDefaultInfoVec( + const Module &M, SPIRV::ModuleAnalysisInfo &MAI, const Function *F) { + auto it = MAI.FPFastMathDefaultInfoMap.find(F); + if (it != MAI.FPFastMathDefaultInfoMap.end()) + return it->second; + + // If the map does not contain the entry, create a new one. Initialize it to + // contain all 3 elements sorted by bit width of target type: {half, float, + // double}. + SPIRV::FPFastMathDefaultInfoVector FPFastMathDefaultInfoVec; + FPFastMathDefaultInfoVec.emplace_back(Type::getHalfTy(M.getContext()), + SPIRV::FPFastMathMode::None); + FPFastMathDefaultInfoVec.emplace_back(Type::getFloatTy(M.getContext()), + SPIRV::FPFastMathMode::None); + FPFastMathDefaultInfoVec.emplace_back(Type::getDoubleTy(M.getContext()), + SPIRV::FPFastMathMode::None); + return MAI.FPFastMathDefaultInfoMap[F] = std::move(FPFastMathDefaultInfoVec); +} + +static SPIRV::FPFastMathDefaultInfo &getFPFastMathDefaultInfo( + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec, + const Type *Ty) { + size_t BitWidth = Ty->getScalarSizeInBits(); + int Index = + SPIRV::FPFastMathDefaultInfoVector::computeFPFastMathDefaultInfoVecIndex( + BitWidth); + assert(Index >= 0 && Index < 3 && + "Expected FPFastMathDefaultInfo for half, float, or double"); + assert(FPFastMathDefaultInfoVec.size() == 3 && + "Expected FPFastMathDefaultInfoVec to have exactly 3 elements"); + return FPFastMathDefaultInfoVec[Index]; +} + +static void collectFPFastMathDefaults(const Module &M, + SPIRV::ModuleAnalysisInfo &MAI, + const SPIRVSubtarget &ST) { + if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_float_controls2)) + return; + + // Store the FPFastMathDefaultInfo in the FPFastMathDefaultInfoMap. + // We need the entry point (function) as the key, and the target + // type and flags as the value. + // We also need to check ContractionOff and SignedZeroInfNanPreserve + // execution modes, as they are now deprecated and must be replaced + // with FPFastMathDefaultInfo. + auto Node = M.getNamedMetadata("spirv.ExecutionMode"); + if (!Node) + return; + + for (unsigned i = 0; i < Node->getNumOperands(); i++) { + MDNode *MDN = cast<MDNode>(Node->getOperand(i)); + assert(MDN->getNumOperands() >= 2 && "Expected at least 2 operands"); + const Function *F = cast<Function>( + cast<ConstantAsMetadata>(MDN->getOperand(0))->getValue()); + const auto EM = + cast<ConstantInt>( + cast<ConstantAsMetadata>(MDN->getOperand(1))->getValue()) + ->getZExtValue(); + if (EM == SPIRV::ExecutionMode::FPFastMathDefault) { + assert(MDN->getNumOperands() == 4 && + "Expected 4 operands for FPFastMathDefault"); + + const Type *T = cast<ValueAsMetadata>(MDN->getOperand(2))->getType(); + unsigned Flags = + cast<ConstantInt>( + cast<ConstantAsMetadata>(MDN->getOperand(3))->getValue()) + ->getZExtValue(); + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec = + getOrCreateFPFastMathDefaultInfoVec(M, MAI, F); + SPIRV::FPFastMathDefaultInfo &Info = + getFPFastMathDefaultInfo(FPFastMathDefaultInfoVec, T); + Info.FastMathFlags = Flags; + Info.FPFastMathDefault = true; + } else if (EM == SPIRV::ExecutionMode::ContractionOff) { + assert(MDN->getNumOperands() == 2 && + "Expected no operands for ContractionOff"); + + // We need to save this info for every possible FP type, i.e. {half, + // float, double, fp128}. + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec = + getOrCreateFPFastMathDefaultInfoVec(M, MAI, F); + for (SPIRV::FPFastMathDefaultInfo &Info : FPFastMathDefaultInfoVec) { + Info.ContractionOff = true; + } + } else if (EM == SPIRV::ExecutionMode::SignedZeroInfNanPreserve) { + assert(MDN->getNumOperands() == 3 && + "Expected 1 operand for SignedZeroInfNanPreserve"); + unsigned TargetWidth = + cast<ConstantInt>( + cast<ConstantAsMetadata>(MDN->getOperand(2))->getValue()) + ->getZExtValue(); + // We need to save this info only for the FP type with TargetWidth. + SPIRV::FPFastMathDefaultInfoVector &FPFastMathDefaultInfoVec = + getOrCreateFPFastMathDefaultInfoVec(M, MAI, F); + int Index = SPIRV::FPFastMathDefaultInfoVector:: + computeFPFastMathDefaultInfoVecIndex(TargetWidth); + assert(Index >= 0 && Index < 3 && + "Expected FPFastMathDefaultInfo for half, float, or double"); + assert(FPFastMathDefaultInfoVec.size() == 3 && + "Expected FPFastMathDefaultInfoVec to have exactly 3 elements"); + FPFastMathDefaultInfoVec[Index].SignedZeroInfNanPreserve = true; + } + } +} + struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI; void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { @@ -2209,7 +2485,8 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) { patchPhis(M, GR, *TII, MMI); addMBBNames(M, *TII, MMI, *ST, MAI); - addDecorations(M, *TII, MMI, *ST, MAI); + collectFPFastMathDefaults(M, MAI, *ST); + addDecorations(M, *TII, MMI, *ST, MAI, GR); collectReqs(M, MAI, MMI, *ST); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 41c792a..d8376cd 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -159,6 +159,13 @@ struct ModuleAnalysisInfo { InstrList MS[NUM_MODULE_SECTIONS]; // The table maps MBB number to SPIR-V unique ID register. DenseMap<std::pair<const MachineFunction *, int>, MCRegister> BBNumToRegMap; + // The table maps function pointers to their default FP fast math info. It can + // be assumed that the SmallVector is sorted by the bit width of the type. The + // first element is the smallest bit width, and the last element is the + // largest bit width, therefore, we will have {half, float, double} in + // the order of their bit widths. + DenseMap<const Function *, SPIRV::FPFastMathDefaultInfoVector> + FPFastMathDefaultInfoMap; MCRegister getFuncReg(const Function *F) { assert(F && "Function is null"); diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 1a08c6a..db6f2d6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -839,6 +839,7 @@ static uint32_t convertFloatToSPIRVWord(float F) { static void insertSpirvDecorations(MachineFunction &MF, SPIRVGlobalRegistry *GR, MachineIRBuilder MIB) { + const SPIRVSubtarget &ST = cast<SPIRVSubtarget>(MIB.getMF().getSubtarget()); SmallVector<MachineInstr *, 10> ToErase; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { @@ -849,7 +850,7 @@ static void insertSpirvDecorations(MachineFunction &MF, SPIRVGlobalRegistry *GR, MIB.setInsertPt(*MI.getParent(), MI.getNextNode()); if (isSpvIntrinsic(MI, Intrinsic::spv_assign_decoration)) { buildOpSpirvDecorations(MI.getOperand(1).getReg(), MIB, - MI.getOperand(2).getMetadata()); + MI.getOperand(2).getMetadata(), ST); } else if (isSpvIntrinsic(MI, Intrinsic::spv_assign_fpmaxerror_decoration)) { ConstantFP *OpV = mdconst::dyn_extract<ConstantFP>( diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 66ce5a2..6a32dba 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -802,6 +802,7 @@ defm RoundingModeRTPINTEL : ExecutionModeOperand<5620, [RoundToInfinityINTEL]>; defm RoundingModeRTNINTEL : ExecutionModeOperand<5621, [RoundToInfinityINTEL]>; defm FloatingPointModeALTINTEL : ExecutionModeOperand<5622, [FloatingPointModeINTEL]>; defm FloatingPointModeIEEEINTEL : ExecutionModeOperand<5623, [FloatingPointModeINTEL]>; +defm FPFastMathDefault : ExecutionModeOperand<6028, [FloatControls2]>; //===----------------------------------------------------------------------===// // Multiclass used to define StorageClass enum values and at the same time @@ -1153,6 +1154,9 @@ defm NotInf : FPFastMathModeOperand<0x2, [Kernel]>; defm NSZ : FPFastMathModeOperand<0x4, [Kernel]>; defm AllowRecip : FPFastMathModeOperand<0x8, [Kernel]>; defm Fast : FPFastMathModeOperand<0x10, [Kernel]>; +defm AllowContract : FPFastMathModeOperand<0x10000, [FloatControls2]>; +defm AllowReassoc : FPFastMathModeOperand<0x20000, [FloatControls2]>; +defm AllowTransform : FPFastMathModeOperand<0x40000, [FloatControls2]>; //===----------------------------------------------------------------------===// // Multiclass used to define FPRoundingMode enum values and at the same time diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 820e56b..327c011 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -181,7 +181,7 @@ void buildOpMemberDecorate(Register Reg, MachineInstr &I, } void buildOpSpirvDecorations(Register Reg, MachineIRBuilder &MIRBuilder, - const MDNode *GVarMD) { + const MDNode *GVarMD, const SPIRVSubtarget &ST) { for (unsigned I = 0, E = GVarMD->getNumOperands(); I != E; ++I) { auto *OpMD = dyn_cast<MDNode>(GVarMD->getOperand(I)); if (!OpMD) @@ -193,6 +193,20 @@ void buildOpSpirvDecorations(Register Reg, MachineIRBuilder &MIRBuilder, if (!DecorationId) report_fatal_error("Expect SPIR-V <Decoration> operand to be the first " "element of the decoration"); + + // The goal of `spirv.Decorations` metadata is to provide a way to + // represent SPIR-V entities that do not map to LLVM in an obvious way. + // FP flags do have obvious matches between LLVM IR and SPIR-V. + // Additionally, we have no guarantee at this point that the flags passed + // through the decoration are not violated already in the optimizer passes. + // Therefore, we simply ignore FP flags, including NoContraction, and + // FPFastMathMode. + if (DecorationId->getZExtValue() == + static_cast<uint32_t>(SPIRV::Decoration::NoContraction) || + DecorationId->getZExtValue() == + static_cast<uint32_t>(SPIRV::Decoration::FPFastMathMode)) { + continue; // Ignored. + } auto MIB = MIRBuilder.buildInstr(SPIRV::OpDecorate) .addUse(Reg) .addImm(static_cast<uint32_t>(DecorationId->getZExtValue())); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 45c520a..409a0fd 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -113,6 +113,54 @@ public: std::function<bool(BasicBlock *)> Op); }; +namespace SPIRV { +struct FPFastMathDefaultInfo { + const Type *Ty = nullptr; + unsigned FastMathFlags = 0; + // When SPV_KHR_float_controls2 ContractionOff and SignzeroInfNanPreserve are + // deprecated, and we replace them with FPFastMathDefault appropriate flags + // instead. However, we have no guarantee about the order in which we will + // process execution modes. Therefore it could happen that we first process + // ContractionOff, setting AllowContraction bit to 0, and then we process + // FPFastMathDefault enabling AllowContraction bit, effectively invalidating + // ContractionOff. Because of that, it's best to keep separate bits for the + // different execution modes, and we will try and combine them later when we + // emit OpExecutionMode instructions. + bool ContractionOff = false; + bool SignedZeroInfNanPreserve = false; + bool FPFastMathDefault = false; + + FPFastMathDefaultInfo() = default; + FPFastMathDefaultInfo(const Type *Ty, unsigned FastMathFlags) + : Ty(Ty), FastMathFlags(FastMathFlags) {} + bool operator==(const FPFastMathDefaultInfo &Other) const { + return Ty == Other.Ty && FastMathFlags == Other.FastMathFlags && + ContractionOff == Other.ContractionOff && + SignedZeroInfNanPreserve == Other.SignedZeroInfNanPreserve && + FPFastMathDefault == Other.FPFastMathDefault; + } +}; + +struct FPFastMathDefaultInfoVector + : public SmallVector<SPIRV::FPFastMathDefaultInfo, 3> { + static size_t computeFPFastMathDefaultInfoVecIndex(size_t BitWidth) { + switch (BitWidth) { + case 16: // half + return 0; + case 32: // float + return 1; + case 64: // double + return 2; + default: + report_fatal_error("Expected BitWidth to be 16, 32, 64", false); + } + llvm_unreachable( + "Unreachable code in computeFPFastMathDefaultInfoVecIndex"); + } +}; + +} // namespace SPIRV + // Add the given string as a series of integer operand, inserting null // terminators and padding to make sure the operands all have 32-bit // little-endian words. @@ -161,7 +209,7 @@ void buildOpMemberDecorate(Register Reg, MachineInstr &I, // Add an OpDecorate instruction by "spirv.Decorations" metadata node. void buildOpSpirvDecorations(Register Reg, MachineIRBuilder &MIRBuilder, - const MDNode *GVarMD); + const MDNode *GVarMD, const SPIRVSubtarget &ST); // Return a valid position for the OpVariable instruction inside a function, // i.e., at the beginning of the first block of the function. @@ -508,6 +556,5 @@ unsigned getArrayComponentCount(const MachineRegisterInfo *MRI, const MachineInstr *ResType); MachineBasicBlock::iterator getFirstValidInstructionInsertPoint(MachineBasicBlock &BB); - } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H |