diff options
Diffstat (limited to 'llvm/lib/Target')
45 files changed, 854 insertions, 311 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 47c1ac4..e8352be 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -308,9 +308,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits; } -bool AArch64TTIImpl::areTypesABICompatible( - const Function *Caller, const Function *Callee, - const ArrayRef<Type *> &Types) const { +bool AArch64TTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + ArrayRef<Type *> Types) const { if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; @@ -2227,7 +2227,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC, return std::nullopt; } -template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc> +template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc> static std::optional<Instruction *> instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, bool MergeIntoAddendOp) { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index fe2e849..b39546a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -84,7 +84,7 @@ public: const Function *Callee) const override; bool areTypesABICompatible(const Function *Caller, const Function *Callee, - const ArrayRef<Type *> &Types) const override; + ArrayRef<Type *> Types) const override; unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index cd8b249..67042b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass(); ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *); struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { - AMDGPUSimplifyLibCallsPass() {} + AMDGPUSimplifyLibCallsPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -371,13 +371,13 @@ public: class AMDGPUAnnotateUniformValuesPass : public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> { public: - AMDGPUAnnotateUniformValuesPass() {} + AMDGPUAnnotateUniformValuesPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> { public: - SIModeRegisterPass() {} + SIModeRegisterPass() = default; PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 1064e57..dad94b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) { } struct KernArgPreloadDescriptor : public ArgDescriptor { - KernArgPreloadDescriptor() {} + KernArgPreloadDescriptor() = default; SmallVector<MCRegister> Regs; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 9907c88f..8669978 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1555,7 +1555,7 @@ private: AMDGPU::ClusterDimsAttr Attr; - static constexpr const char AttrName[] = "amdgpu-cluster-dims"; + static constexpr char AttrName[] = "amdgpu-cluster-dims"; }; AAAMDGPUClusterDims & diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index cf2ab825..a3be0f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -48,7 +48,7 @@ private: FuncInfoMap FIM; public: - AMDGPUPerfHintAnalysis() {} + AMDGPUPerfHintAnalysis() = default; // OldPM bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 103cdec..dd474ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI, return true; } -SetOfRulesForOpcode::SetOfRulesForOpcode() {} +SetOfRulesForOpcode::SetOfRulesForOpcode() = default; SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes) : FastTypes(FastTypes) {} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b28c50e..b87b54f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -816,7 +816,7 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) { Params.consume_front("strategy="); auto Result = StringSwitch<std::optional<ScanOptions>>(Params) .Case("dpp", ScanOptions::DPP) - .Cases("iterative", "", ScanOptions::Iterative) + .Cases({"iterative", ""}, ScanOptions::Iterative) .Case("none", ScanOptions::None) .Default(std::nullopt); if (Result) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 61c5dcd..ded2f5a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -54,7 +54,7 @@ public: bool CullSGPRHazardsAtMemWait; unsigned CullSGPRHazardsMemWaitThreshold; - AMDGPUWaitSGPRHazards() {} + AMDGPUWaitSGPRHazards() = default; // Return the numeric ID 0-127 for a given SGPR. static std::optional<unsigned> sgprNumber(Register Reg, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 975781f..f357981 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -183,7 +183,7 @@ class ScheduleMetrics { unsigned BubbleCycles; public: - ScheduleMetrics() {} + ScheduleMetrics() = default; ScheduleMetrics(unsigned L, unsigned BC) : ScheduleLength(L), BubbleCycles(BC) {} unsigned getLength() const { return ScheduleLength; } @@ -217,7 +217,7 @@ class RegionPressureMap { bool IsLiveOut; public: - RegionPressureMap() {} + RegionPressureMap() = default; RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut) : DAG(GCNDAG), IsLiveOut(LiveOut) {} // Build the Instr->LiveReg and RegionIdx->Instr maps diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6dcbced..b7fa899 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1288,18 +1288,38 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) { } void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) { + // On entry to a block with multiple predescessors, there may + // be pending SMEM and VMEM events active at the same time. + // In such cases, only clear one active event at a time. + auto applyPendingXcntGroup = [this](unsigned E) { + unsigned LowerBound = getScoreLB(X_CNT); + applyWaitcnt(X_CNT, 0); + PendingEvents |= (1 << E); + setScoreLB(X_CNT, LowerBound); + }; + // Wait on XCNT is redundant if we are already waiting for a load to complete. // SMEM can return out of order, so only omit XCNT wait if we are waiting till // zero. - if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) - return applyWaitcnt(X_CNT, 0); + if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) { + if (hasPendingEvent(VMEM_GROUP)) + applyPendingXcntGroup(VMEM_GROUP); + else + applyWaitcnt(X_CNT, 0); + return; + } // If we have pending store we cannot optimize XCnt because we do not wait for // stores. VMEM loads retun in order, so if we only have loads XCnt is // decremented to the same number as LOADCnt. if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) && - !hasPendingEvent(STORE_CNT)) - return applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt)); + !hasPendingEvent(STORE_CNT)) { + if (hasPendingEvent(SMEM_GROUP)) + applyPendingXcntGroup(SMEM_GROUP); + else + applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt)); + return; + } applyWaitcnt(X_CNT, Wait.XCnt); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d9f76c9..45f5919 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, // information. if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) && MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { - constexpr const AMDGPU::OpName OpNames[] = { + constexpr AMDGPU::OpName OpNames[] = { AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}; for (auto [I, OpName] : enumerate(OpNames)) { @@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand( const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO) const { - constexpr const unsigned NumOps = 3; - constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { + constexpr unsigned NumOps = 3; + constexpr AMDGPU::OpName OpNames[NumOps * 2] = { AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index e5b4f6e..08f196b 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -884,13 +884,13 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, .Case("{t4}", CSKY::R20) .Case("{t5}", CSKY::R21) .Case("{t6}", CSKY::R22) - .Cases("{t7}", "{fp}", CSKY::R23) - .Cases("{t8}", "{top}", CSKY::R24) - .Cases("{t9}", "{bsp}", CSKY::R25) + .Cases({"{t7}", "{fp}"}, CSKY::R23) + .Cases({"{t8}", "{top}"}, CSKY::R24) + .Cases({"{t9}", "{bsp}"}, CSKY::R25) .Case("{r26}", CSKY::R26) .Case("{r27}", CSKY::R27) - .Cases("{gb}", "{rgb}", "{rdb}", CSKY::R28) - .Cases("{tb}", "{rtb}", CSKY::R29) + .Cases({"{gb}", "{rgb}", "{rdb}"}, CSKY::R28) + .Cases({"{tb}", "{rtb}"}, CSKY::R29) .Case("{svbr}", CSKY::R30) .Case("{tls}", CSKY::R31) .Default(CSKY::NoRegister); @@ -907,38 +907,38 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // use the ABI names in register constraint lists. if (Subtarget.useHardFloat()) { unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) - .Cases("{fr0}", "{vr0}", CSKY::F0_32) - .Cases("{fr1}", "{vr1}", CSKY::F1_32) - .Cases("{fr2}", "{vr2}", CSKY::F2_32) - .Cases("{fr3}", "{vr3}", CSKY::F3_32) - .Cases("{fr4}", "{vr4}", CSKY::F4_32) - .Cases("{fr5}", "{vr5}", CSKY::F5_32) - .Cases("{fr6}", "{vr6}", CSKY::F6_32) - .Cases("{fr7}", "{vr7}", CSKY::F7_32) - .Cases("{fr8}", "{vr8}", CSKY::F8_32) - .Cases("{fr9}", "{vr9}", CSKY::F9_32) - .Cases("{fr10}", "{vr10}", CSKY::F10_32) - .Cases("{fr11}", "{vr11}", CSKY::F11_32) - .Cases("{fr12}", "{vr12}", CSKY::F12_32) - .Cases("{fr13}", "{vr13}", CSKY::F13_32) - .Cases("{fr14}", "{vr14}", CSKY::F14_32) - .Cases("{fr15}", "{vr15}", CSKY::F15_32) - .Cases("{fr16}", "{vr16}", CSKY::F16_32) - .Cases("{fr17}", "{vr17}", CSKY::F17_32) - .Cases("{fr18}", "{vr18}", CSKY::F18_32) - .Cases("{fr19}", "{vr19}", CSKY::F19_32) - .Cases("{fr20}", "{vr20}", CSKY::F20_32) - .Cases("{fr21}", "{vr21}", CSKY::F21_32) - .Cases("{fr22}", "{vr22}", CSKY::F22_32) - .Cases("{fr23}", "{vr23}", CSKY::F23_32) - .Cases("{fr24}", "{vr24}", CSKY::F24_32) - .Cases("{fr25}", "{vr25}", CSKY::F25_32) - .Cases("{fr26}", "{vr26}", CSKY::F26_32) - .Cases("{fr27}", "{vr27}", CSKY::F27_32) - .Cases("{fr28}", "{vr28}", CSKY::F28_32) - .Cases("{fr29}", "{vr29}", CSKY::F29_32) - .Cases("{fr30}", "{vr30}", CSKY::F30_32) - .Cases("{fr31}", "{vr31}", CSKY::F31_32) + .Cases({"{fr0}", "{vr0}"}, CSKY::F0_32) + .Cases({"{fr1}", "{vr1}"}, CSKY::F1_32) + .Cases({"{fr2}", "{vr2}"}, CSKY::F2_32) + .Cases({"{fr3}", "{vr3}"}, CSKY::F3_32) + .Cases({"{fr4}", "{vr4}"}, CSKY::F4_32) + .Cases({"{fr5}", "{vr5}"}, CSKY::F5_32) + .Cases({"{fr6}", "{vr6}"}, CSKY::F6_32) + .Cases({"{fr7}", "{vr7}"}, CSKY::F7_32) + .Cases({"{fr8}", "{vr8}"}, CSKY::F8_32) + .Cases({"{fr9}", "{vr9}"}, CSKY::F9_32) + .Cases({"{fr10}", "{vr10}"}, CSKY::F10_32) + .Cases({"{fr11}", "{vr11}"}, CSKY::F11_32) + .Cases({"{fr12}", "{vr12}"}, CSKY::F12_32) + .Cases({"{fr13}", "{vr13}"}, CSKY::F13_32) + .Cases({"{fr14}", "{vr14}"}, CSKY::F14_32) + .Cases({"{fr15}", "{vr15}"}, CSKY::F15_32) + .Cases({"{fr16}", "{vr16}"}, CSKY::F16_32) + .Cases({"{fr17}", "{vr17}"}, CSKY::F17_32) + .Cases({"{fr18}", "{vr18}"}, CSKY::F18_32) + .Cases({"{fr19}", "{vr19}"}, CSKY::F19_32) + .Cases({"{fr20}", "{vr20}"}, CSKY::F20_32) + .Cases({"{fr21}", "{vr21}"}, CSKY::F21_32) + .Cases({"{fr22}", "{vr22}"}, CSKY::F22_32) + .Cases({"{fr23}", "{vr23}"}, CSKY::F23_32) + .Cases({"{fr24}", "{vr24}"}, CSKY::F24_32) + .Cases({"{fr25}", "{vr25}"}, CSKY::F25_32) + .Cases({"{fr26}", "{vr26}"}, CSKY::F26_32) + .Cases({"{fr27}", "{vr27}"}, CSKY::F27_32) + .Cases({"{fr28}", "{vr28}"}, CSKY::F28_32) + .Cases({"{fr29}", "{vr29}"}, CSKY::F29_32) + .Cases({"{fr30}", "{vr30}"}, CSKY::F30_32) + .Cases({"{fr31}", "{vr31}"}, CSKY::F31_32) .Default(CSKY::NoRegister); if (FReg != CSKY::NoRegister) { assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg"); diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index f4e36fa7..e661c94 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -26,6 +26,7 @@ def tc_20a4bbec : InstrItinClass; def tc_227864f7 : InstrItinClass; def tc_257f6f7c : InstrItinClass; def tc_26a377fe : InstrItinClass; +def tc_2a698a03 : InstrItinClass; def tc_2b4c548e : InstrItinClass; def tc_2c745bb8 : InstrItinClass; def tc_2d4051cd : InstrItinClass; @@ -52,6 +53,7 @@ def tc_561aaa58 : InstrItinClass; def tc_56c4f9fe : InstrItinClass; def tc_56e64202 : InstrItinClass; def tc_58d21193 : InstrItinClass; +def tc_57a4709c : InstrItinClass; def tc_5bf8afbb : InstrItinClass; def tc_5cdf8c84 : InstrItinClass; def tc_61bf7c03 : InstrItinClass; @@ -220,6 +222,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -356,6 +363,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -812,6 +824,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -948,6 +965,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -1404,6 +1426,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -1540,6 +1567,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -1996,6 +2028,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -2132,6 +2169,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -2588,6 +2630,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -2724,6 +2771,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -3180,6 +3232,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -3316,6 +3373,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -3772,6 +3834,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -3908,6 +3975,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -4364,6 +4436,11 @@ class DepHVXItinV69 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -4500,6 +4577,11 @@ class DepHVXItinV69 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -4956,6 +5038,11 @@ class DepHVXItinV71 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -5092,6 +5179,11 @@ class DepHVXItinV71 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -5548,6 +5640,11 @@ class DepHVXItinV73 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -5684,6 +5781,11 @@ class DepHVXItinV73 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -6140,6 +6242,11 @@ class DepHVXItinV75 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -6276,6 +6383,11 @@ class DepHVXItinV75 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -6732,6 +6844,11 @@ class DepHVXItinV79 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -6868,6 +6985,11 @@ class DepHVXItinV79 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -7324,6 +7446,11 @@ class DepHVXItinV81 { InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ [InstrStage<1, [SLOT2, SLOT3], 0>, InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -7460,6 +7587,11 @@ class DepHVXItinV81 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, InstrStage<1, [CVI_XLANE]>], [9, 2], diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index f8f1c2a..b188134 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -29939,6 +29939,58 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vabs_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vabs($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vabs($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vabs($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vabs($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vabs_sf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -31302,6 +31354,21 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_valign4 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), +"$Vd32 = valign4($Vu32,$Vv32,$Rt8)", +tc_57a4709c, TypeCVI_VA>, Enc_a30110, Requires<[UseHVXV81]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b0; +let Inst{31-24} = 0b00011000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_valignb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), @@ -32583,6 +32650,32 @@ let isCVI = 1; let hasHvxTmp = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vconv_bf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32), +"$Vd32.bf = $Vuu32.qf32", +tc_2a698a03, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_f8_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.f8 = $Vu32.qf16", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vconv_h_hf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -32596,6 +32689,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vconv_h_hf_rnd : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.h = $Vu32.hf:rnd", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vconv_hf_h : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -32635,6 +32741,71 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vconv_qf16_f8 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.qf16 = $Vu32.f8", +tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = $Vu32.hf", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = $Vu32.qf16", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = $Vu32.qf32", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = $Vu32.sf", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vconv_sf_qf32 : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -33720,6 +33891,122 @@ let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } +def V6_veqhf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000111; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_veqhf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqhf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b010111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqhf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b100111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000011; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_veqsf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b010011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b100011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} def V6_veqw : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34538,6 +34825,58 @@ let Inst{31-24} = 0b00011110; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vilog2_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vinsertwr : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, IntRegs:$Rt32), @@ -37170,6 +37509,58 @@ let isCVI = 1; let isHVXALU = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vneg_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vneg($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vneg($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vneg($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vneg($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vnormamth : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 23f4b3a..c11483b 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3830,6 +3830,122 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2), // V81 HVX Instructions. +def: Pat<(int_hexagon_V6_vabs_qf16_hf HvxVR:$src1), + (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_hf_128B HvxVR:$src1), + (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_qf16 HvxVR:$src1), + (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_qf16_128B HvxVR:$src1), + (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_qf32 HvxVR:$src1), + (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_qf32_128B HvxVR:$src1), + (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_sf HvxVR:$src1), + (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_sf_128B HvxVR:$src1), + (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), + (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX64B]>; +def: Pat<(int_hexagon_V6_valign4_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), + (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_bf_qf32 HvxWR:$src1), + (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_bf_qf32_128B HvxWR:$src1), + (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_f8_qf16 HvxVR:$src1), + (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_f8_qf16_128B HvxVR:$src1), + (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_rnd HvxVR:$src1), + (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_rnd_128B HvxVR:$src1), + (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_qf16_f8 HvxVR:$src1), + (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_f8_128B HvxVR:$src1), + (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_hf HvxVR:$src1), + (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_hf_128B HvxVR:$src1), + (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_qf16 HvxVR:$src1), + (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_qf16_128B HvxVR:$src1), + (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_qf32 HvxVR:$src1), + (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_qf32_128B HvxVR:$src1), + (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_sf HvxVR:$src1), + (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_sf_128B HvxVR:$src1), + (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf HvxVR:$src1, HvxVR:$src2), + (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_128B HvxVR:$src1, HvxVR:$src2), + (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf HvxVR:$src1, HvxVR:$src2), + (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_128B HvxVR:$src1, HvxVR:$src2), + (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_hf HvxVR:$src1), + (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_hf_128B HvxVR:$src1), + (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf16 HvxVR:$src1), + (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf16_128B HvxVR:$src1), + (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf32 HvxVR:$src1), + (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf32_128B HvxVR:$src1), + (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_sf HvxVR:$src1), + (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_sf_128B HvxVR:$src1), + (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_hf HvxVR:$src1), + (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_hf_128B HvxVR:$src1), + (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_qf16 HvxVR:$src1), + (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_qf16_128B HvxVR:$src1), + (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_qf32 HvxVR:$src1), + (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_qf32_128B HvxVR:$src1), + (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_sf HvxVR:$src1), + (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_sf_128B HvxVR:$src1), + (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2), diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 7ee280d..eadf020 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1815,7 +1815,7 @@ struct WeightedLeaf { int Weight; int InsertionOrder; - WeightedLeaf() {} + WeightedLeaf() = default; WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 904aabed..fe700e1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -375,6 +375,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); } setOperationAction(ISD::CTPOP, GRLenVT, Legal); setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -461,6 +463,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); } } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 610ba05..b502b056 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">; // XVFDIV_{S/D} defm : PatXrXrF<fdiv, "XVFDIV">; +// XVFMAX_{S/D}, XVFMIN_{S/D} +defm : PatXrXrF<fmaxnum, "XVFMAX">; +defm : PatXrXrF<fminnum, "XVFMIN">; + // XVFMADD_{S/D} def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 6470842..6b74a4b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1760,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">; // VFDIV_{S/D} defm : PatVrVrF<fdiv, "VFDIV">; +// VFMAX_{S/D}, VFMIN_{S/D} +defm : PatVrVrF<fmaxnum, "VFMAX">; +defm : PatVrVrF<fminnum, "VFMIN">; + // VFMADD_{S/D} def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 7d54565..6d69af5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, /*HasRelocationAddend=*/true) {} -LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} +LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default; unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup, const MCValue &Target, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index f0e2bc4..08fa51d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -38,7 +38,7 @@ public: LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) : Ctx(ctx), MCII(MCII) {} - ~LoongArchMCCodeEmitter() override {} + ~LoongArchMCCodeEmitter() override = default; void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB, SmallVectorImpl<MCFixup> &Fixups, diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index e37f3a66..fb5cd5c2 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -690,9 +690,9 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc, } else { // Floating point control register. RegNo = StringSwitch<unsigned>(RegisterNameLower) - .Cases("fpc", "fpcr", M68k::FPC) - .Cases("fps", "fpsr", M68k::FPS) - .Cases("fpi", "fpiar", M68k::FPIAR) + .Cases({"fpc", "fpcr"}, M68k::FPC) + .Cases({"fps", "fpsr"}, M68k::FPS) + .Cases({"fpi", "fpiar"}, M68k::FPIAR) .Default(M68k::NoRegister); assert(RegNo != M68k::NoRegister && "Unrecognized FP control register name"); diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp index fe83dc6..51bafe4 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp @@ -49,7 +49,7 @@ public: M68kAsmBackend(const Target &T, const MCSubtargetInfo &STI) : MCAsmBackend(llvm::endianness::big), Allows32BitBranch(llvm::StringSwitch<bool>(STI.getCPU()) - .CasesLower("m68020", "m68030", "m68040", true) + .CasesLower({"m68020", "m68030", "m68040"}, true) .Default(false)) {} void applyFixup(const MCFragment &, const MCFixup &, const MCValue &, diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 97379d7..f588e56 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -6176,7 +6176,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { CC = StringSwitch<unsigned>(Name) .Case("zero", 0) - .Cases("at", "AT", 1) + .Cases({"at", "AT"}, 1) .Case("a0", 4) .Case("a1", 5) .Case("a2", 6) diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index caef8fe7..b832b82 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -20,7 +20,7 @@ class MemoryLocation; class NVPTXAAResult : public AAResultBase { public: - NVPTXAAResult() {} + NVPTXAAResult() = default; NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {} /// Handle invalidation events from the new pass manager. diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index e8758aa..50827bd 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1562,12 +1562,17 @@ def : Pat<(int_nvvm_saturate_d f64:$a), (CVT_f64_f64 $a, CvtSAT)>; // Exp2 Log2 // -def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>; -def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>; +def : Pat<(f32 (int_nvvm_ex2_approx_ftz f32:$a)), (EX2_APPROX_f32 $a, FTZ)>; +def : Pat<(f32 (int_nvvm_ex2_approx f32:$a)), (EX2_APPROX_f32 $a, NoFTZ)>; let Predicates = [hasPTX<70>, hasSM<75>] in { - def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>; - def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>; + def : Pat<(f16 (int_nvvm_ex2_approx f16:$a)), (EX2_APPROX_f16 $a)>; + def : Pat<(v2f16 (int_nvvm_ex2_approx v2f16:$a)), (EX2_APPROX_f16x2 $a)>; +} + +let Predicates = [hasPTX<78>, hasSM<90>] in { + def : Pat<(bf16 (int_nvvm_ex2_approx_ftz bf16:$a)), (EX2_APPROX_bf16 $a)>; + def : Pat<(v2bf16 (int_nvvm_ex2_approx_ftz v2bf16:$a)), (EX2_APPROX_bf16x2 $a)>; } def LG2_APPROX_f32 : diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 729c077..64593e6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -318,7 +318,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, // answer. These include: // // - nvvm_cos_approx_{f,ftz_f} - // - nvvm_ex2_approx_{d,f,ftz_f} + // - nvvm_ex2_approx(_ftz) // - nvvm_lg2_approx_{d,f,ftz_f} // - nvvm_sin_approx_{f,ftz_f} // - nvvm_sqrt_approx_{f,ftz_f} diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index bcb3f50..780e124 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2702,7 +2702,7 @@ static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) { static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) { return StringSwitch<bool>(GV->getName()) - .Cases("llvm.global_ctors", "llvm.global_dtors", true) + .Cases({"llvm.global_ctors", "llvm.global_dtors"}, true) .Default(false); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2fba090..b04e887 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -912,7 +912,7 @@ bool PPCTTIImpl::areInlineCompatible(const Function *Caller, bool PPCTTIImpl::areTypesABICompatible(const Function *Caller, const Function *Callee, - const ArrayRef<Type *> &Types) const { + ArrayRef<Type *> Types) const { // We need to ensure that argument promotion does not // attempt to promote pointers to MMA types (__vector_pair diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 475472a..8d7f255 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -147,7 +147,7 @@ public: bool areInlineCompatible(const Function *Caller, const Function *Callee) const override; bool areTypesABICompatible(const Function *Caller, const Function *Callee, - const ArrayRef<Type *> &Types) const override; + ArrayRef<Type *> Types) const override; bool supportsTailCallFor(const CallBase *CB) const override; private: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c6a8b84..e0cf739 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -23946,7 +23946,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, .Case("{t0}", RISCV::X5) .Case("{t1}", RISCV::X6) .Case("{t2}", RISCV::X7) - .Cases("{s0}", "{fp}", RISCV::X8) + .Cases({"{s0}", "{fp}"}, RISCV::X8) .Case("{s1}", RISCV::X9) .Case("{a0}", RISCV::X10) .Case("{a1}", RISCV::X11) @@ -23983,38 +23983,38 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // use the ABI names in register constraint lists. if (Subtarget.hasStdExtF()) { unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) - .Cases("{f0}", "{ft0}", RISCV::F0_F) - .Cases("{f1}", "{ft1}", RISCV::F1_F) - .Cases("{f2}", "{ft2}", RISCV::F2_F) - .Cases("{f3}", "{ft3}", RISCV::F3_F) - .Cases("{f4}", "{ft4}", RISCV::F4_F) - .Cases("{f5}", "{ft5}", RISCV::F5_F) - .Cases("{f6}", "{ft6}", RISCV::F6_F) - .Cases("{f7}", "{ft7}", RISCV::F7_F) - .Cases("{f8}", "{fs0}", RISCV::F8_F) - .Cases("{f9}", "{fs1}", RISCV::F9_F) - .Cases("{f10}", "{fa0}", RISCV::F10_F) - .Cases("{f11}", "{fa1}", RISCV::F11_F) - .Cases("{f12}", "{fa2}", RISCV::F12_F) - .Cases("{f13}", "{fa3}", RISCV::F13_F) - .Cases("{f14}", "{fa4}", RISCV::F14_F) - .Cases("{f15}", "{fa5}", RISCV::F15_F) - .Cases("{f16}", "{fa6}", RISCV::F16_F) - .Cases("{f17}", "{fa7}", RISCV::F17_F) - .Cases("{f18}", "{fs2}", RISCV::F18_F) - .Cases("{f19}", "{fs3}", RISCV::F19_F) - .Cases("{f20}", "{fs4}", RISCV::F20_F) - .Cases("{f21}", "{fs5}", RISCV::F21_F) - .Cases("{f22}", "{fs6}", RISCV::F22_F) - .Cases("{f23}", "{fs7}", RISCV::F23_F) - .Cases("{f24}", "{fs8}", RISCV::F24_F) - .Cases("{f25}", "{fs9}", RISCV::F25_F) - .Cases("{f26}", "{fs10}", RISCV::F26_F) - .Cases("{f27}", "{fs11}", RISCV::F27_F) - .Cases("{f28}", "{ft8}", RISCV::F28_F) - .Cases("{f29}", "{ft9}", RISCV::F29_F) - .Cases("{f30}", "{ft10}", RISCV::F30_F) - .Cases("{f31}", "{ft11}", RISCV::F31_F) + .Cases({"{f0}", "{ft0}"}, RISCV::F0_F) + .Cases({"{f1}", "{ft1}"}, RISCV::F1_F) + .Cases({"{f2}", "{ft2}"}, RISCV::F2_F) + .Cases({"{f3}", "{ft3}"}, RISCV::F3_F) + .Cases({"{f4}", "{ft4}"}, RISCV::F4_F) + .Cases({"{f5}", "{ft5}"}, RISCV::F5_F) + .Cases({"{f6}", "{ft6}"}, RISCV::F6_F) + .Cases({"{f7}", "{ft7}"}, RISCV::F7_F) + .Cases({"{f8}", "{fs0}"}, RISCV::F8_F) + .Cases({"{f9}", "{fs1}"}, RISCV::F9_F) + .Cases({"{f10}", "{fa0}"}, RISCV::F10_F) + .Cases({"{f11}", "{fa1}"}, RISCV::F11_F) + .Cases({"{f12}", "{fa2}"}, RISCV::F12_F) + .Cases({"{f13}", "{fa3}"}, RISCV::F13_F) + .Cases({"{f14}", "{fa4}"}, RISCV::F14_F) + .Cases({"{f15}", "{fa5}"}, RISCV::F15_F) + .Cases({"{f16}", "{fa6}"}, RISCV::F16_F) + .Cases({"{f17}", "{fa7}"}, RISCV::F17_F) + .Cases({"{f18}", "{fs2}"}, RISCV::F18_F) + .Cases({"{f19}", "{fs3}"}, RISCV::F19_F) + .Cases({"{f20}", "{fs4}"}, RISCV::F20_F) + .Cases({"{f21}", "{fs5}"}, RISCV::F21_F) + .Cases({"{f22}", "{fs6}"}, RISCV::F22_F) + .Cases({"{f23}", "{fs7}"}, RISCV::F23_F) + .Cases({"{f24}", "{fs8}"}, RISCV::F24_F) + .Cases({"{f25}", "{fs9}"}, RISCV::F25_F) + .Cases({"{f26}", "{fs10}"}, RISCV::F26_F) + .Cases({"{f27}", "{fs11}"}, RISCV::F27_F) + .Cases({"{f28}", "{ft8}"}, RISCV::F28_F) + .Cases({"{f29}", "{ft9}"}, RISCV::F29_F) + .Cases({"{f30}", "{ft10}"}, RISCV::F30_F) + .Cases({"{f31}", "{ft11}"}, RISCV::F31_F) .Default(RISCV::NoRegister); if (FReg != RISCV::NoRegister) { assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp index a1c8e23..c58a5c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -48,7 +48,7 @@ class VXRMInfo { } State = Uninitialized; public: - VXRMInfo() {} + VXRMInfo() = default; static VXRMInfo getUnknown() { VXRMInfo Info; diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp index 0a318e0..ed6d355 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp @@ -15,4 +15,4 @@ using namespace llvm; SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} -SPIRVTargetStreamer::~SPIRVTargetStreamer() {} +SPIRVTargetStreamer::~SPIRVTargetStreamer() = default; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 2d19f6de..44b6c66 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -81,7 +81,7 @@ private: void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST); public: - RequirementHandler() {} + RequirementHandler() = default; void clear() { MinimalCaps.clear(); AllCaps.clear(); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 7dd0b95..5ba0356 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { } // Pin SPIRVTargetObjectFile's vtables to this file. -SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {} +SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default; SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h index 9d0adbb..87ec256 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h @@ -16,7 +16,7 @@ namespace llvm { /// This implementation is used for SystemZ ELF targets. class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF { public: - SystemZELFTargetObjectFile() {} + SystemZELFTargetObjectFile() = default; /// Describe a TLS variable address within debug info. const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h index 7845cdf..1bfc61f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -76,7 +76,7 @@ public: BlockSet.insert(MBB); } ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; } - using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; + using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator; block_iterator block_begin() const { return getBlocks().begin(); } block_iterator block_end() const { return getBlocks().end(); } inline iterator_range<block_iterator> blocks() const { @@ -96,7 +96,7 @@ public: void addSubException(std::unique_ptr<WebAssemblyException> E) { SubExceptions.push_back(std::move(E)); } - using iterator = typename decltype(SubExceptions)::const_iterator; + using iterator = decltype(SubExceptions)::const_iterator; iterator begin() const { return SubExceptions.begin(); } iterator end() const { return SubExceptions.end(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index ff4d6469..ee575e3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -207,8 +207,7 @@ template <> struct MappingTraits<WebAssemblyFunctionInfo> { template <> struct CustomMappingTraits<BBNumberMap> { static void inputOne(IO &YamlIO, StringRef Key, BBNumberMap &SrcToUnwindDest) { - YamlIO.mapRequired(Key.str().c_str(), - SrcToUnwindDest[std::atoi(Key.str().c_str())]); + YamlIO.mapRequired(Key, SrcToUnwindDest[std::atoi(Key.str().c_str())]); } static void output(IO &YamlIO, BBNumberMap &SrcToUnwindDest) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h index e92bf17..96b8a4e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h @@ -35,7 +35,7 @@ public: virtual MachineBasicBlock *getHeader() const = 0; virtual bool contains(const MachineBasicBlock *MBB) const = 0; virtual unsigned getNumBlocks() const = 0; - using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; + using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator; virtual iterator_range<block_iterator> blocks() const = 0; virtual bool isLoop() const = 0; }; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index b7ea672..bac3692 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2470,10 +2470,10 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, // Report back its kind, or IOK_INVALID if does not evaluated as a known one unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { return StringSwitch<unsigned>(Name) - .Cases("TYPE","type",IOK_TYPE) - .Cases("SIZE","size",IOK_SIZE) - .Cases("LENGTH","length",IOK_LENGTH) - .Default(IOK_INVALID); + .Cases({"TYPE", "type"}, IOK_TYPE) + .Cases({"SIZE", "size"}, IOK_SIZE) + .Cases({"LENGTH", "length"}, IOK_LENGTH) + .Default(IOK_INVALID); } /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator @@ -2516,8 +2516,8 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { return StringSwitch<unsigned>(Name.lower()) .Case("type", MOK_TYPE) - .Cases("size", "sizeof", MOK_SIZEOF) - .Cases("length", "lengthof", MOK_LENGTHOF) + .Cases({"size", "sizeof"}, MOK_SIZEOF) + .Cases({"length", "lengthof"}, MOK_LENGTHOF) .Default(MOK_INVALID); } @@ -2581,21 +2581,21 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size, StringRef *SizeStr) { Size = StringSwitch<unsigned>(getTok().getString()) - .Cases("BYTE", "byte", 8) - .Cases("WORD", "word", 16) - .Cases("DWORD", "dword", 32) - .Cases("FLOAT", "float", 32) - .Cases("LONG", "long", 32) - .Cases("FWORD", "fword", 48) - .Cases("DOUBLE", "double", 64) - .Cases("QWORD", "qword", 64) - .Cases("MMWORD","mmword", 64) - .Cases("XWORD", "xword", 80) - .Cases("TBYTE", "tbyte", 80) - .Cases("XMMWORD", "xmmword", 128) - .Cases("YMMWORD", "ymmword", 256) - .Cases("ZMMWORD", "zmmword", 512) - .Default(0); + .Cases({"BYTE", "byte"}, 8) + .Cases({"WORD", "word"}, 16) + .Cases({"DWORD", "dword"}, 32) + .Cases({"FLOAT", "float"}, 32) + .Cases({"LONG", "long"}, 32) + .Cases({"FWORD", "fword"}, 48) + .Cases({"DOUBLE", "double"}, 64) + .Cases({"QWORD", "qword"}, 64) + .Cases({"MMWORD", "mmword"}, 64) + .Cases({"XWORD", "xword"}, 80) + .Cases({"TBYTE", "tbyte"}, 80) + .Cases({"XMMWORD", "xmmword"}, 128) + .Cases({"YMMWORD", "ymmword"}, 256) + .Cases({"ZMMWORD", "zmmword"}, 512) + .Default(0); if (Size) { if (SizeStr) *SizeStr = getTok().getString(); @@ -2886,22 +2886,22 @@ bool X86AsmParser::parseATTOperand(OperandVector &Operands) { // otherwise the EFLAGS Condition Code enumerator. X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { return StringSwitch<X86::CondCode>(CC) - .Case("o", X86::COND_O) // Overflow - .Case("no", X86::COND_NO) // No Overflow - .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal - .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below - .Cases("e", "z", X86::COND_E) // Equal/Zero - .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero - .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above - .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal - .Case("s", X86::COND_S) // Sign - .Case("ns", X86::COND_NS) // No Sign - .Cases("p", "pe", X86::COND_P) // Parity/Parity Even - .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd - .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal - .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less - .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater - .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal + .Case("o", X86::COND_O) // Overflow + .Case("no", X86::COND_NO) // No Overflow + .Cases({"b", "nae"}, X86::COND_B) // Below/Neither Above nor Equal + .Cases({"ae", "nb"}, X86::COND_AE) // Above or Equal/Not Below + .Cases({"e", "z"}, X86::COND_E) // Equal/Zero + .Cases({"ne", "nz"}, X86::COND_NE) // Not Equal/Not Zero + .Cases({"be", "na"}, X86::COND_BE) // Below or Equal/Not Above + .Cases({"a", "nbe"}, X86::COND_A) // Above/Neither Below nor Equal + .Case("s", X86::COND_S) // Sign + .Case("ns", X86::COND_NS) // No Sign + .Cases({"p", "pe"}, X86::COND_P) // Parity/Parity Even + .Cases({"np", "po"}, X86::COND_NP) // No Parity/Parity Odd + .Cases({"l", "nge"}, X86::COND_L) // Less/Neither Greater nor Equal + .Cases({"ge", "nl"}, X86::COND_GE) // Greater or Equal/Not Less + .Cases({"le", "ng"}, X86::COND_LE) // Less or Equal/Not Greater + .Cases({"g", "nle"}, X86::COND_G) // Greater/Neither Less nor Equal .Default(X86::COND_INVALID); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 007074c..133406b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, if (!OpVT.isScalarInteger() || OpSize < 128) return SDValue(); + // Don't do this if we're not supposed to use the FPU. + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (Subtarget.useSoftFloat() || NoImplicitFloatOps) + return SDValue(); + // Ignore a comparison with zero because that gets special treatment in // EmitTest(). But make an exception for the special case of a pair of // logically-combined vector-sized operands compared to zero. This pattern may @@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. - bool NoImplicitFloatOps = - DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && - ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX()) || - (OpSize == 512 && Subtarget.useAVX512Regs()))) { + if ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX()) || + (OpSize == 512 && Subtarget.useAVX512Regs())) { bool HasPT = Subtarget.hasSSE41(); // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened @@ -53344,105 +53347,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Look for a RMW operation that only touches one bit of a larger than legal -// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single -// i32 sub value. -static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - using namespace SDPatternMatch; - - // Only handle normal stores and its chain was a matching normal load. - auto *Ld = dyn_cast<LoadSDNode>(St->getChain()); - if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld || - !ISD::isNormalLoad(Ld) || !Ld->isSimple() || - Ld->getBasePtr() != St->getBasePtr() || - Ld->getOffset() != St->getOffset()) - return SDValue(); - - SDValue LoadVal(Ld, 0); - SDValue StoredVal = St->getValue(); - EVT VT = StoredVal.getValueType(); - - // Only narrow larger than legal scalar integers. - if (!VT.isScalarInteger() || - VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32)) - return SDValue(); - - // BTR: X & ~(1 << ShAmt) - // BTS: X | (1 << ShAmt) - // BTC: X ^ (1 << ShAmt) - // - // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt) - SDValue InsertBit, ShAmt; - if (!StoredVal.hasOneUse() || - !(sd_match(StoredVal, m_And(m_Specific(LoadVal), - m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || - sd_match(StoredVal, - m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || - sd_match(StoredVal, - m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || - sd_match(StoredVal, - m_Or(m_And(m_Specific(LoadVal), - m_Not(m_Shl(m_One(), m_Value(ShAmt)))), - m_Shl(m_Value(InsertBit), m_Deferred(ShAmt)))))) - return SDValue(); - - // Ensure the shift amount is in bounds. - KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); - if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) - return SDValue(); - - // If we're inserting a bit then it must be the LSB. - if (InsertBit) { - KnownBits KnownInsert = DAG.computeKnownBits(InsertBit); - if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1)) - return SDValue(); - } - - // Split the shift into an alignment shift that moves the active i32 block to - // the bottom bits for truncation and a modulo shift that can act on the i32. - EVT AmtVT = ShAmt.getValueType(); - SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getSignedConstant(-32LL, DL, AmtVT)); - SDValue ModuloAmt = - DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); - ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8); - - // Compute the byte offset for the i32 block that is changed by the RMW. - // combineTruncate will adjust the load for us in a similar way. - EVT PtrVT = St->getBasePtr().getValueType(); - SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT); - SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, - DAG.getShiftAmountConstant(3, PtrVT, DL)); - SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL, - SDNodeFlags::NoUnsignedWrap); - - // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store. - SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - - SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getConstant(1, DL, MVT::i32), ModuloAmt); - - SDValue Res; - if (InsertBit) { - SDValue BitMask = - DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt); - Res = - DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32)); - Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask); - } else { - if (StoredVal.getOpcode() == ISD::AND) - Mask = DAG.getNOT(DL, Mask, MVT::i32); - Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); - } - - return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), - Align(), St->getMemOperand()->getFlags()); -} - static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -53669,9 +53573,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } } - if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) - return R; - // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC) // store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC) if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -54604,9 +54505,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, // truncation, see if we can convert the shift into a pointer offset instead. // Limit this to normal (non-ext) scalar integer loads. if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && - Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) && - (Src.getOperand(0).hasOneUse() || - !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) { + Src.hasOneUse() && Src.getOperand(0).hasOneUse() && + ISD::isNormalLoad(Src.getOperand(0).getNode())) { auto *Ld = cast<LoadSDNode>(Src.getOperand(0)); if (Ld->isSimple() && VT.isByteSized() && isPowerOf2_64(VT.getSizeInBits())) { @@ -56406,7 +56306,6 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC, static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - using namespace SDPatternMatch; const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); const SDValue LHS = N->getOperand(0); const SDValue RHS = N->getOperand(1); @@ -56465,37 +56364,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, if (SDValue AndN = MatchAndCmpEq(RHS, LHS)) return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); - // If we're performing a bit test on a larger than legal type, attempt - // to (aligned) shift down the value to the bottom 32-bits and then - // perform the bittest on the i32 value. - // ICMP_ZERO(AND(X,SHL(1,IDX))) - // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31)))) - if (isNullConstant(RHS) && - OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) { - SDValue X, ShAmt; - if (sd_match(LHS, m_OneUse(m_And(m_Value(X), - m_Shl(m_One(), m_Value(ShAmt)))))) { - // Only attempt this if the shift amount is known to be in bounds. - KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); - if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) { - EVT AmtVT = ShAmt.getValueType(); - SDValue AlignAmt = - DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getSignedConstant(-32LL, DL, AmtVT)); - SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getConstant(31, DL, AmtVT)); - SDValue Mask = DAG.getNode( - ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), - DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); - X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt); - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask); - return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), - CC); - } - } - } - // cmpeq(trunc(x),C) --> cmpeq(x,C) // cmpne(trunc(x),C) --> cmpne(x,C) // iff x upper bits are zero. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp index 090060e..b655183 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -115,9 +115,9 @@ struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> { static constexpr MachineInstr *const ArgNodeSentinel = nullptr; using GraphT = ImmutableGraph<MachineInstr *, int>; - using Node = typename GraphT::Node; - using Edge = typename GraphT::Edge; - using size_type = typename GraphT::size_type; + using Node = GraphT::Node; + using Edge = GraphT::Edge; + using size_type = GraphT::size_type; MachineGadgetGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges, size_type NodesSize, size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) @@ -191,10 +191,10 @@ template <> struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits { using GraphType = MachineGadgetGraph; using Traits = llvm::GraphTraits<GraphType *>; - using NodeRef = typename Traits::NodeRef; - using EdgeRef = typename Traits::EdgeRef; - using ChildIteratorType = typename Traits::ChildIteratorType; - using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; + using NodeRef = Traits::NodeRef; + using EdgeRef = Traits::EdgeRef; + using ChildIteratorType = Traits::ChildIteratorType; + using ChildEdgeIteratorType = Traits::ChildEdgeIteratorType; DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} @@ -335,7 +335,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( L.computePhiInfo(); GraphBuilder Builder; - using GraphIter = typename GraphBuilder::BuilderNodeRef; + using GraphIter = GraphBuilder::BuilderNodeRef; DenseMap<MachineInstr *, GraphIter> NodeMap; int FenceCount = 0, GadgetCount = 0; auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 3d8d0a23..0b1430e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6562,7 +6562,7 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, bool X86TTIImpl::areTypesABICompatible(const Function *Caller, const Function *Callee, - const ArrayRef<Type *> &Types) const { + ArrayRef<Type *> Types) const { if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 133b366..de5e1c2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -296,7 +296,7 @@ public: bool areInlineCompatible(const Function *Caller, const Function *Callee) const override; bool areTypesABICompatible(const Function *Caller, const Function *Callee, - const ArrayRef<Type *> &Type) const override; + ArrayRef<Type *> Type) const override; uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override { return ST->getMaxInlineSizeThreshold(); |
