diff options
Diffstat (limited to 'llvm/lib/Target')
54 files changed, 1144 insertions, 347 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7e03b97..45b7120 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {            {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};  } +SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations( +    SVEFrameSizes const &SVE) { +  StackOffset AfterZPRs = SVE.ZPR.LocalsSize; +  StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize; +  StackOffset AfterPPRs = {}; +  if (SVELayout == SVEStackLayout::Split) { +    BeforePPRs = SVE.PPR.CalleeSavesSize; +    // If there are no ZPR CSRs, place all local allocations after the ZPRs. +    if (SVE.ZPR.CalleeSavesSize) +      AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize; +    else +      AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals. +  } +  return {BeforePPRs, AfterPPRs, AfterZPRs}; +} +  struct SVEPartitions {    struct {      MachineBasicBlock::iterator Begin, End; @@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {    // All of the remaining stack allocations are for locals.    determineLocalsStackSize(NumBytes, PrologueSaveSize); +  auto [PPR, ZPR] = getSVEStackFrameSizes(); +  SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR}); +    MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;    if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { +    assert(!SVEAllocs.AfterPPRs && +           "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");      // If we're doing SVE saves first, we need to immediately allocate space      // for fixed objects, then space for the SVE callee saves.      //      // Windows unwind requires that the scalable size is a multiple of 16;      // that's handled when the callee-saved size is computed. -    auto SaveSize = -        StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) + -        StackOffset::getFixed(FixedObject); +    auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);      allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},                         /*FollowupAllocs=*/true);      NumBytes -= FixedObject; @@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {    if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))      emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding); -  auto [PPR, ZPR] = getSVEStackFrameSizes(); -  StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;    StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes); +  SVEAllocs.AfterZPRs += NonSVELocalsSize; +    StackOffset CFAOffset =        StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize; -    MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;    // Allocate space for the callee saves and PPR locals (if any).    if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) { @@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {      if (EmitAsyncCFI)        emitCalleeSavedSVELocations(AfterSVESavesI); -    StackOffset AllocateBeforePPRs = SVECalleeSavesSize; -    StackOffset AllocateAfterPPRs = PPR.LocalsSize; -    if (SVELayout == SVEStackLayout::Split) { -      AllocateBeforePPRs = PPR.CalleeSavesSize; -      AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize; -    } -    allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs, +    allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,                         EmitAsyncCFI && !HasFP, CFAOffset, -                       MFI.hasVarSizedObjects() || AllocateAfterPPRs || -                           ZPR.LocalsSize || NonSVELocalsSize); -    CFAOffset += AllocateBeforePPRs; +                       MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs || +                           SVEAllocs.AfterZPRs); +    CFAOffset += SVEAllocs.BeforePPRs;      assert(PPRRange.End == ZPRRange.Begin &&             "Expected ZPR callee saves after PPR locals"); -    allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs, +    allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs,                         EmitAsyncCFI && !HasFP, CFAOffset, -                       MFI.hasVarSizedObjects() || ZPR.LocalsSize || -                           NonSVELocalsSize); -    CFAOffset += AllocateAfterPPRs; +                       MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs); +    CFAOffset += SVEAllocs.AfterPPRs;    } else {      assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord); -    // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been -    // allocated (and separate PPR locals are not supported, all SVE locals, -    // both PPR and ZPR, are within the ZPR locals area). -    assert(!PPR.LocalsSize && "Unexpected PPR locals!"); -    CFAOffset += SVECalleeSavesSize; +    // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have +    // already been allocated. PPR locals (included in AfterPPRs) are not +    // supported (note: this is asserted above). +    CFAOffset += SVEAllocs.BeforePPRs;    }    // Allocate space for the rest of the frame including ZPR locals. Align the @@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the      // correct value here, as NumBytes also includes padding bytes, which      // shouldn't be counted here. -    allocateStackSpace( -        AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize, -        EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects()); +    allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs, +                       EmitAsyncCFI && !HasFP, CFAOffset, +                       MFI.hasVarSizedObjects());    }    // If we need a base pointer, set it up here. It's whatever the value of the @@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {    assert(NumBytes >= 0 && "Negative stack allocation size!?");    StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; -  StackOffset SVEStackSize = -      SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; +  SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});    MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin; -  MachineBasicBlock::iterator RestoreEnd = PPRRange.End;    // Deallocate the SVE area.    if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { -    StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize; +    assert(!SVEAllocs.AfterPPRs && +           "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");      // If the callee-save area is before FP, restoring the FP implicitly -    // deallocates non-callee-save SVE allocations.  Otherwise, deallocate them +    // deallocates non-callee-save SVE allocations. Otherwise, deallocate them      // explicitly.      if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {        emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, -                      SVELocalsSize, TII, MachineInstr::FrameDestroy, false, -                      NeedsWinCFI, &HasWinCFI); +                      SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI);      }      // Deallocate callee-save SVE registers. -    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, -                    SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, -                    NeedsWinCFI, &HasWinCFI); +    emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, +                    SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, +                    false, NeedsWinCFI, &HasWinCFI);    } else if (AFI->hasSVEStackSize()) {      // If we have stack realignment or variable-sized objects we must use the FP      // to restore SVE callee saves (as there is an unknown amount of @@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {        emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,                        -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);      } else if (BaseForSVEDealloc == AArch64::SP) { -      auto CFAOffset = -          SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); - -      if (SVECalleeSavesSize) { -        // Deallocate the non-SVE locals first before we can deallocate (and -        // restore callee saves) from the SVE area. -        auto NonSVELocals = StackOffset::getFixed(NumBytes); -        emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        NonSVELocals, TII, MachineInstr::FrameDestroy, false, -                        NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= NonSVELocals; -        NumBytes = 0; -      } - -      if (ZPR.LocalsSize) { -        emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false, -                        NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= ZPR.LocalsSize; +      auto NonSVELocals = StackOffset::getFixed(NumBytes); +      auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) + +                       SVEAllocs.totalSize(); + +      if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) { +        // Deallocate non-SVE locals now. This is needed to reach the SVE callee +        // saves, but may also allow combining stack hazard bumps for split SVE. +        SVEAllocs.AfterZPRs += NonSVELocals; +        NumBytes -= NonSVELocals.getFixed();        } - -      StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize; -      if (SVELayout == SVEStackLayout::Split && -          (PPR.LocalsSize || ZPR.CalleeSavesSize)) { -        assert(PPRRange.Begin == ZPRRange.End && -               "Expected PPR restores after ZPR"); -        emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        PPR.LocalsSize + ZPR.CalleeSavesSize, TII, -                        MachineInstr::FrameDestroy, false, NeedsWinCFI, -                        &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize; -        SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize; -      } - -      // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs: -      if (SVECalleeSavesToDealloc) -        emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, -                        SVECalleeSavesToDealloc, TII, -                        MachineInstr::FrameDestroy, false, NeedsWinCFI, -                        &HasWinCFI, EmitCFI && !HasFP, CFAOffset); +      // To deallocate the SVE stack adjust by the allocations in reverse. +      emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset); +      CFAOffset -= SVEAllocs.AfterZPRs; +      assert(PPRRange.Begin == ZPRRange.End && +             "Expected PPR restores after ZPR"); +      emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset); +      CFAOffset -= SVEAllocs.AfterPPRs; +      emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset);      }      if (EmitCFI) diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h index bccadda..6e0e283 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h @@ -33,6 +33,11 @@ struct SVEFrameSizes {    } PPR, ZPR;  }; +struct SVEStackAllocations { +  StackOffset BeforePPRs, AfterPPRs, AfterZPRs; +  StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; } +}; +  class AArch64PrologueEpilogueCommon {  public:    AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, @@ -66,6 +71,7 @@ protected:    bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;    SVEFrameSizes getSVEStackFrameSizes() const; +  SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);    MachineFunction &MF;    MachineBasicBlock &MBB; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 47c1ac4..5b5565a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -308,9 +308,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,    return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;  } -bool AArch64TTIImpl::areTypesABICompatible( -    const Function *Caller, const Function *Callee, -    const ArrayRef<Type *> &Types) const { +bool AArch64TTIImpl::areTypesABICompatible(const Function *Caller, +                                           const Function *Callee, +                                           ArrayRef<Type *> Types) const {    if (!BaseT::areTypesABICompatible(Caller, Callee, Types))      return false; @@ -2227,7 +2227,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,    return std::nullopt;  } -template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc> +template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>  static std::optional<Instruction *>  instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,                                    bool MergeIntoAddendOp) { @@ -6657,10 +6657,15 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(            Ops.push_back(&Ext->getOperandUse(0));          Ops.push_back(&Op); -        if (isa<SExtInst>(Ext)) +        if (isa<SExtInst>(Ext)) {            NumSExts++; -        else +        } else {            NumZExts++; +          // A zext(a) is also a sext(zext(a)), if we take more than 2 steps. +          if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 < +              I->getType()->getScalarSizeInBits()) +            NumSExts++; +        }          continue;        } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index fe2e849..b39546a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -84,7 +84,7 @@ public:                             const Function *Callee) const override;    bool areTypesABICompatible(const Function *Caller, const Function *Callee, -                             const ArrayRef<Type *> &Types) const override; +                             ArrayRef<Type *> Types) const override;    unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,                                  unsigned DefaultCallPenalty) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index cd8b249..67042b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();  ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);  struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { -  AMDGPUSimplifyLibCallsPass() {} +  AMDGPUSimplifyLibCallsPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  }; @@ -371,13 +371,13 @@ public:  class AMDGPUAnnotateUniformValuesPass      : public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {  public: -  AMDGPUAnnotateUniformValuesPass() {} +  AMDGPUAnnotateUniformValuesPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  };  class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {  public: -  SIModeRegisterPass() {} +  SIModeRegisterPass() = default;    PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 1064e57..dad94b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {  }  struct KernArgPreloadDescriptor : public ArgDescriptor { -  KernArgPreloadDescriptor() {} +  KernArgPreloadDescriptor() = default;    SmallVector<MCRegister> Regs;  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 9907c88f..8669978 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1555,7 +1555,7 @@ private:    AMDGPU::ClusterDimsAttr Attr; -  static constexpr const char AttrName[] = "amdgpu-cluster-dims"; +  static constexpr char AttrName[] = "amdgpu-cluster-dims";  };  AAAMDGPUClusterDims & diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index cf2ab825..a3be0f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -48,7 +48,7 @@ private:    FuncInfoMap FIM;  public: -  AMDGPUPerfHintAnalysis() {} +  AMDGPUPerfHintAnalysis() = default;    // OldPM    bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 103cdec..1e5885a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,    return true;  } -SetOfRulesForOpcode::SetOfRulesForOpcode() {} +SetOfRulesForOpcode::SetOfRulesForOpcode() = default;  SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)      : FastTypes(FastTypes) {} @@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,    addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); +  addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}}); +    addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)        .Uni(S64, {{Sgpr64}, {}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b28c50e..b87b54f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -816,7 +816,7 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {    Params.consume_front("strategy=");    auto Result = StringSwitch<std::optional<ScanOptions>>(Params)                      .Case("dpp", ScanOptions::DPP) -                    .Cases("iterative", "", ScanOptions::Iterative) +                    .Cases({"iterative", ""}, ScanOptions::Iterative)                      .Case("none", ScanOptions::None)                      .Default(std::nullopt);    if (Result) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 733c5d5..fe81a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(    return NewRetBlock;  } +static BasicBlock * +createDummyReturnBlock(Function &F, +                       SmallVector<BasicBlock *, 4> &ReturningBlocks) { +  BasicBlock *DummyReturnBB = +      BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F); +  Type *RetTy = F.getReturnType(); +  Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); +  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); +  ReturningBlocks.push_back(DummyReturnBB); +  return DummyReturnBB; +} + +/// Handle conditional branch instructions (-> 2 targets) and callbr +/// instructions with N targets. +static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI, +                          BasicBlock *DummyReturnBB, +                          std::vector<DominatorTree::UpdateType> &Updates) { +  SmallVector<BasicBlock *, 2> Successors(successors(BB)); + +  // Create a new transition block to hold the conditional branch. +  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); + +  Updates.reserve(Updates.size() + 2 * Successors.size() + 2); + +  // 'Successors' become successors of TransitionBB instead of BB, +  // and TransitionBB becomes a single successor of BB. +  Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); +  for (BasicBlock *Successor : Successors) { +    Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); +    Updates.emplace_back(DominatorTree::Delete, BB, Successor); +  } + +  // Create a branch that will always branch to the transition block and +  // references DummyReturnBB. +  BB->getTerminator()->eraseFromParent(); +  BranchInst::Create(TransitionBB, DummyReturnBB, +                     ConstantInt::getTrue(F.getContext()), BB); +  Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +} +  bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,                                              const PostDominatorTree &PDT,                                              const UniformityInfo &UA) { -  assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); -    if (PDT.root_size() == 0 ||        (PDT.root_size() == 1 && -       !isa<BranchInst>(PDT.getRoot()->getTerminator()))) +       !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))      return false;    // Loop over all of the blocks in a function, tracking all of the blocks that @@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,        if (HasDivergentExitBlock)          UnreachableBlocks.push_back(BB);      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - -      ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext()); -      if (DummyReturnBB == nullptr) { -        DummyReturnBB = BasicBlock::Create(F.getContext(), -                                           "DummyReturnBlock", &F); -        Type *RetTy = F.getReturnType(); -        Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); -        ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); -        ReturningBlocks.push_back(DummyReturnBB); -      } +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);        if (BI->isUnconditional()) {          BasicBlock *LoopHeaderBB = BI->getSuccessor(0);          BI->eraseFromParent(); // Delete the unconditional branch.          // Add a new conditional branch with a dummy edge to the return block. -        BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB); -        Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); -      } else { // Conditional branch. -        SmallVector<BasicBlock *, 2> Successors(successors(BB)); - -        // Create a new transition block to hold the conditional branch. -        BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); - -        Updates.reserve(Updates.size() + 2 * Successors.size() + 2); - -        // 'Successors' become successors of TransitionBB instead of BB, -        // and TransitionBB becomes a single successor of BB. -        Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); -        for (BasicBlock *Successor : Successors) { -          Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); -          Updates.emplace_back(DominatorTree::Delete, BB, Successor); -        } - -        // Create a branch that will always branch to the transition block and -        // references DummyReturnBB. -        BB->getTerminator()->eraseFromParent(); -        BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB); +        BranchInst::Create(LoopHeaderBB, DummyReturnBB, +                           ConstantInt::getTrue(F.getContext()), BB);          Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +      } else { +        handleNBranch(F, BB, BI, DummyReturnBB, Updates);        }        Changed = true; +    } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) { +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks); + +      handleNBranch(F, BB, CBI, DummyReturnBB, Updates); +      Changed = true; +    } else { +      llvm_unreachable("unsupported block terminator");      }    } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 61c5dcd..ded2f5a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -54,7 +54,7 @@ public:    bool CullSGPRHazardsAtMemWait;    unsigned CullSGPRHazardsMemWaitThreshold; -  AMDGPUWaitSGPRHazards() {} +  AMDGPUWaitSGPRHazards() = default;    // Return the numeric ID 0-127 for a given SGPR.    static std::optional<unsigned> sgprNumber(Register Reg, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 975781f..f357981 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -183,7 +183,7 @@ class ScheduleMetrics {    unsigned BubbleCycles;  public: -  ScheduleMetrics() {} +  ScheduleMetrics() = default;    ScheduleMetrics(unsigned L, unsigned BC)        : ScheduleLength(L), BubbleCycles(BC) {}    unsigned getLength() const { return ScheduleLength; } @@ -217,7 +217,7 @@ class RegionPressureMap {    bool IsLiveOut;  public: -  RegionPressureMap() {} +  RegionPressureMap() = default;    RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)        : DAG(GCNDAG), IsLiveOut(LiveOut) {}    // Build the Instr->LiveReg and RegionIdx->Instr maps diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d9f76c9..45f5919 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,    // information.    if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&        MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { -    constexpr const AMDGPU::OpName OpNames[] = { +    constexpr AMDGPU::OpName OpNames[] = {          AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};      for (auto [I, OpName] : enumerate(OpNames)) { @@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,  bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(      const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,      const MachineOperand *MO) const { -  constexpr const unsigned NumOps = 3; -  constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { +  constexpr unsigned NumOps = 3; +  constexpr AMDGPU::OpName OpNames[NumOps * 2] = {        AMDGPU::OpName::src0,           AMDGPU::OpName::src1,        AMDGPU::OpName::src2,           AMDGPU::OpName::src0_modifiers,        AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6b06534..3a00267 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9869,32 +9869,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {    assert(Subtarget->isTargetDarwin());    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); -  auto PtrVT = getPointerTy(DAG.getDataLayout()); - -  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();    // Pair of floats / doubles used to pass the result.    Type *RetTy = StructType::get(ArgTy, ArgTy);    auto &DL = DAG.getDataLayout();    ArgListTy Args; -  bool ShouldUseSRet = getTM().isAPCS_ABI(); -  SDValue SRet; -  if (ShouldUseSRet) { -    // Create stack object for sret. -    const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); -    const Align StackAlign = DL.getPrefTypeAlign(RetTy); -    int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); -    SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); - -    ArgListEntry Entry(SRet, PointerType::getUnqual(RetTy->getContext())); -    Entry.IsSExt = false; -    Entry.IsZExt = false; -    Entry.IsSRet = true; -    Args.push_back(Entry); -    RetTy = Type::getVoidTy(*DAG.getContext()); -  } -    Args.emplace_back(Arg, ArgTy);    StringRef LibcallName = getLibcallImplName(SincosStret); @@ -9904,25 +9884,10 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {    TargetLowering::CallLoweringInfo CLI(DAG);    CLI.setDebugLoc(dl)        .setChain(DAG.getEntryNode()) -      .setCallee(CC, RetTy, Callee, std::move(Args)) -      .setDiscardResult(ShouldUseSRet); +      .setCallee(CC, RetTy, Callee, std::move(Args));    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); -  if (!ShouldUseSRet) -    return CallResult.first; - -  SDValue LoadSin = -      DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); - -  // Address of cos field. -  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, -                            DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); -  SDValue LoadCos = -      DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); - -  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); -  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, -                     LoadSin.getValue(0), LoadCos.getValue(0)); +  return CallResult.first;  }  SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp index 77dc4a7..b2a8204 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp @@ -88,6 +88,16 @@ bool BPFAsmPrinter::doFinalization(Module &M) {      }    } +  for (GlobalObject &GO : M.global_objects()) { +    if (!GO.hasExternalWeakLinkage()) +      continue; + +    if (!SawTrapCall && GO.getName() == BPF_TRAP) { +      GO.eraseFromParent(); +      break; +    } +  } +    return AsmPrinter::doFinalization(M);  } @@ -160,6 +170,20 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,  }  void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) { +  if (MI->isCall()) { +    for (const MachineOperand &Op : MI->operands()) { +      if (Op.isGlobal()) { +        if (const GlobalValue *GV = Op.getGlobal()) +          if (GV->getName() == BPF_TRAP) +            SawTrapCall = true; +      } else if (Op.isSymbol()) { +        if (const MCSymbol *Sym = Op.getMCSymbol()) +          if (Sym->getName() == BPF_TRAP) +            SawTrapCall = true; +      } +    } +  } +    BPF_MC::verifyInstructionPredicates(MI->getOpcode(),                                        getSubtargetInfo().getFeatureBits()); diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h index 90ef207..75a1d7e 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.h +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h @@ -39,6 +39,7 @@ public:  private:    BTFDebug *BTF;    TargetMachine &TM; +  bool SawTrapCall = false;    const BPFTargetMachine &getBTM() const;  }; diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index ab4ee55..08f196b 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -884,13 +884,13 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,                                 .Case("{t4}", CSKY::R20)                                 .Case("{t5}", CSKY::R21)                                 .Case("{t6}", CSKY::R22) -                               .Cases("{t7}", "{fp}", CSKY::R23) -                               .Cases("{t8}", "{top}", CSKY::R24) -                               .Cases("{t9}", "{bsp}", CSKY::R25) +                               .Cases({"{t7}", "{fp}"}, CSKY::R23) +                               .Cases({"{t8}", "{top}"}, CSKY::R24) +                               .Cases({"{t9}", "{bsp}"}, CSKY::R25)                                 .Case("{r26}", CSKY::R26)                                 .Case("{r27}", CSKY::R27)                                 .Cases({"{gb}", "{rgb}", "{rdb}"}, CSKY::R28) -                               .Cases("{tb}", "{rtb}", CSKY::R29) +                               .Cases({"{tb}", "{rtb}"}, CSKY::R29)                                 .Case("{svbr}", CSKY::R30)                                 .Case("{tls}", CSKY::R31)                                 .Default(CSKY::NoRegister); @@ -907,38 +907,38 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,    // use the ABI names in register constraint lists.    if (Subtarget.useHardFloat()) {      unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) -                        .Cases("{fr0}", "{vr0}", CSKY::F0_32) -                        .Cases("{fr1}", "{vr1}", CSKY::F1_32) -                        .Cases("{fr2}", "{vr2}", CSKY::F2_32) -                        .Cases("{fr3}", "{vr3}", CSKY::F3_32) -                        .Cases("{fr4}", "{vr4}", CSKY::F4_32) -                        .Cases("{fr5}", "{vr5}", CSKY::F5_32) -                        .Cases("{fr6}", "{vr6}", CSKY::F6_32) -                        .Cases("{fr7}", "{vr7}", CSKY::F7_32) -                        .Cases("{fr8}", "{vr8}", CSKY::F8_32) -                        .Cases("{fr9}", "{vr9}", CSKY::F9_32) -                        .Cases("{fr10}", "{vr10}", CSKY::F10_32) -                        .Cases("{fr11}", "{vr11}", CSKY::F11_32) -                        .Cases("{fr12}", "{vr12}", CSKY::F12_32) -                        .Cases("{fr13}", "{vr13}", CSKY::F13_32) -                        .Cases("{fr14}", "{vr14}", CSKY::F14_32) -                        .Cases("{fr15}", "{vr15}", CSKY::F15_32) -                        .Cases("{fr16}", "{vr16}", CSKY::F16_32) -                        .Cases("{fr17}", "{vr17}", CSKY::F17_32) -                        .Cases("{fr18}", "{vr18}", CSKY::F18_32) -                        .Cases("{fr19}", "{vr19}", CSKY::F19_32) -                        .Cases("{fr20}", "{vr20}", CSKY::F20_32) -                        .Cases("{fr21}", "{vr21}", CSKY::F21_32) -                        .Cases("{fr22}", "{vr22}", CSKY::F22_32) -                        .Cases("{fr23}", "{vr23}", CSKY::F23_32) -                        .Cases("{fr24}", "{vr24}", CSKY::F24_32) -                        .Cases("{fr25}", "{vr25}", CSKY::F25_32) -                        .Cases("{fr26}", "{vr26}", CSKY::F26_32) -                        .Cases("{fr27}", "{vr27}", CSKY::F27_32) -                        .Cases("{fr28}", "{vr28}", CSKY::F28_32) -                        .Cases("{fr29}", "{vr29}", CSKY::F29_32) -                        .Cases("{fr30}", "{vr30}", CSKY::F30_32) -                        .Cases("{fr31}", "{vr31}", CSKY::F31_32) +                        .Cases({"{fr0}", "{vr0}"}, CSKY::F0_32) +                        .Cases({"{fr1}", "{vr1}"}, CSKY::F1_32) +                        .Cases({"{fr2}", "{vr2}"}, CSKY::F2_32) +                        .Cases({"{fr3}", "{vr3}"}, CSKY::F3_32) +                        .Cases({"{fr4}", "{vr4}"}, CSKY::F4_32) +                        .Cases({"{fr5}", "{vr5}"}, CSKY::F5_32) +                        .Cases({"{fr6}", "{vr6}"}, CSKY::F6_32) +                        .Cases({"{fr7}", "{vr7}"}, CSKY::F7_32) +                        .Cases({"{fr8}", "{vr8}"}, CSKY::F8_32) +                        .Cases({"{fr9}", "{vr9}"}, CSKY::F9_32) +                        .Cases({"{fr10}", "{vr10}"}, CSKY::F10_32) +                        .Cases({"{fr11}", "{vr11}"}, CSKY::F11_32) +                        .Cases({"{fr12}", "{vr12}"}, CSKY::F12_32) +                        .Cases({"{fr13}", "{vr13}"}, CSKY::F13_32) +                        .Cases({"{fr14}", "{vr14}"}, CSKY::F14_32) +                        .Cases({"{fr15}", "{vr15}"}, CSKY::F15_32) +                        .Cases({"{fr16}", "{vr16}"}, CSKY::F16_32) +                        .Cases({"{fr17}", "{vr17}"}, CSKY::F17_32) +                        .Cases({"{fr18}", "{vr18}"}, CSKY::F18_32) +                        .Cases({"{fr19}", "{vr19}"}, CSKY::F19_32) +                        .Cases({"{fr20}", "{vr20}"}, CSKY::F20_32) +                        .Cases({"{fr21}", "{vr21}"}, CSKY::F21_32) +                        .Cases({"{fr22}", "{vr22}"}, CSKY::F22_32) +                        .Cases({"{fr23}", "{vr23}"}, CSKY::F23_32) +                        .Cases({"{fr24}", "{vr24}"}, CSKY::F24_32) +                        .Cases({"{fr25}", "{vr25}"}, CSKY::F25_32) +                        .Cases({"{fr26}", "{vr26}"}, CSKY::F26_32) +                        .Cases({"{fr27}", "{vr27}"}, CSKY::F27_32) +                        .Cases({"{fr28}", "{vr28}"}, CSKY::F28_32) +                        .Cases({"{fr29}", "{vr29}"}, CSKY::F29_32) +                        .Cases({"{fr30}", "{vr30}"}, CSKY::F30_32) +                        .Cases({"{fr31}", "{vr31}"}, CSKY::F31_32)                          .Default(CSKY::NoRegister);      if (FReg != CSKY::NoRegister) {        assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg"); diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 8ace2d2..eb4c884 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -194,9 +194,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {          dxbc::PSV::v2::ResourceBindInfo BindInfo;          BindInfo.Type = Type;          BindInfo.LowerBound = Binding.LowerBound; -        assert(Binding.Size == UINT32_MAX || -               (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && -                   "Resource range is too large"); +        assert( +            (Binding.Size == UINT32_MAX || +             (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) && +            "Resource range is too large");          BindInfo.UpperBound = (Binding.Size == UINT32_MAX)                                    ? UINT32_MAX                                    : Binding.LowerBound + Binding.Size - 1; diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index f4e36fa7..e661c94 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -26,6 +26,7 @@ def tc_20a4bbec : InstrItinClass;  def tc_227864f7 : InstrItinClass;  def tc_257f6f7c : InstrItinClass;  def tc_26a377fe : InstrItinClass; +def tc_2a698a03 : InstrItinClass;  def tc_2b4c548e : InstrItinClass;  def tc_2c745bb8 : InstrItinClass;  def tc_2d4051cd : InstrItinClass; @@ -52,6 +53,7 @@ def tc_561aaa58 : InstrItinClass;  def tc_56c4f9fe : InstrItinClass;  def tc_56e64202 : InstrItinClass;  def tc_58d21193 : InstrItinClass; +def tc_57a4709c : InstrItinClass;  def tc_5bf8afbb : InstrItinClass;  def tc_5cdf8c84 : InstrItinClass;  def tc_61bf7c03 : InstrItinClass; @@ -220,6 +222,11 @@ class DepHVXItinV55 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -356,6 +363,11 @@ class DepHVXItinV55 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -812,6 +824,11 @@ class DepHVXItinV60 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -948,6 +965,11 @@ class DepHVXItinV60 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -1404,6 +1426,11 @@ class DepHVXItinV62 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -1540,6 +1567,11 @@ class DepHVXItinV62 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -1996,6 +2028,11 @@ class DepHVXItinV65 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -2132,6 +2169,11 @@ class DepHVXItinV65 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -2588,6 +2630,11 @@ class DepHVXItinV66 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -2724,6 +2771,11 @@ class DepHVXItinV66 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -3180,6 +3232,11 @@ class DepHVXItinV67 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -3316,6 +3373,11 @@ class DepHVXItinV67 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -3772,6 +3834,11 @@ class DepHVXItinV68 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -3908,6 +3975,11 @@ class DepHVXItinV68 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -4364,6 +4436,11 @@ class DepHVXItinV69 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -4500,6 +4577,11 @@ class DepHVXItinV69 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -4956,6 +5038,11 @@ class DepHVXItinV71 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -5092,6 +5179,11 @@ class DepHVXItinV71 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -5548,6 +5640,11 @@ class DepHVXItinV73 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -5684,6 +5781,11 @@ class DepHVXItinV73 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -6140,6 +6242,11 @@ class DepHVXItinV75 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -6276,6 +6383,11 @@ class DepHVXItinV75 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -6732,6 +6844,11 @@ class DepHVXItinV79 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -6868,6 +6985,11 @@ class DepHVXItinV79 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -7324,6 +7446,11 @@ class DepHVXItinV81 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -7460,6 +7587,11 @@ class DepHVXItinV81 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index f8f1c2a..b188134 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -29939,6 +29939,58 @@ let opNewValue = 0;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vabs_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vabs($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vabs($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vabs($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vabs($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vabs_sf : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -31302,6 +31354,21 @@ let isPseudo = 1;  let isCodeGenOnly = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_valign4 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), +"$Vd32 = valign4($Vu32,$Vv32,$Rt8)", +tc_57a4709c, TypeCVI_VA>, Enc_a30110, Requires<[UseHVXV81]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b0; +let Inst{31-24} = 0b00011000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_valignb : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), @@ -32583,6 +32650,32 @@ let isCVI = 1;  let hasHvxTmp = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vconv_bf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32), +"$Vd32.bf = $Vuu32.qf32", +tc_2a698a03, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_f8_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.f8 = $Vu32.qf16", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vconv_h_hf : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -32596,6 +32689,19 @@ let opNewValue = 0;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vconv_h_hf_rnd : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.h = $Vu32.hf:rnd", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vconv_hf_h : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -32635,6 +32741,71 @@ let opNewValue = 0;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vconv_qf16_f8 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.qf16 = $Vu32.f8", +tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = $Vu32.hf", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = $Vu32.qf16", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = $Vu32.qf32", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = $Vu32.sf", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vconv_sf_qf32 : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -33720,6 +33891,122 @@ let isHVXALU2SRC = 1;  let DecoderNamespace = "EXT_mmvec";  let Constraints = "$Qx4 = $Qx4in";  } +def V6_veqhf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000111; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_veqhf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqhf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b010111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqhf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b100111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000011; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_veqsf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b010011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b100011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +}  def V6_veqw : HInst<  (outs HvxQR:$Qd4),  (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34538,6 +34825,58 @@ let Inst{31-24} = 0b00011110;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vilog2_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vinsertwr : HInst<  (outs HvxVR:$Vx32),  (ins HvxVR:$Vx32in, IntRegs:$Rt32), @@ -37170,6 +37509,58 @@ let isCVI = 1;  let isHVXALU = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vneg_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vneg($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vneg($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vneg($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vneg($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vnormamth : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 23f4b3a..c11483b 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3830,6 +3830,122 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),  // V81 HVX Instructions. +def: Pat<(int_hexagon_V6_vabs_qf16_hf HvxVR:$src1), +         (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_hf_128B HvxVR:$src1), +         (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_qf16 HvxVR:$src1), +         (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_qf16_128B HvxVR:$src1), +         (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_qf32 HvxVR:$src1), +         (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_qf32_128B HvxVR:$src1), +         (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_sf HvxVR:$src1), +         (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_sf_128B HvxVR:$src1), +         (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), +         (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX64B]>; +def: Pat<(int_hexagon_V6_valign4_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), +         (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_bf_qf32 HvxWR:$src1), +         (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_bf_qf32_128B HvxWR:$src1), +         (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_f8_qf16 HvxVR:$src1), +         (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_f8_qf16_128B HvxVR:$src1), +         (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_rnd HvxVR:$src1), +         (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_rnd_128B HvxVR:$src1), +         (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_qf16_f8 HvxVR:$src1), +         (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_f8_128B HvxVR:$src1), +         (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_hf HvxVR:$src1), +         (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_hf_128B HvxVR:$src1), +         (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_qf16 HvxVR:$src1), +         (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_qf16_128B HvxVR:$src1), +         (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_qf32 HvxVR:$src1), +         (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_qf32_128B HvxVR:$src1), +         (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_sf HvxVR:$src1), +         (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_sf_128B HvxVR:$src1), +         (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf HvxVR:$src1, HvxVR:$src2), +         (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_128B HvxVR:$src1, HvxVR:$src2), +         (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf HvxVR:$src1, HvxVR:$src2), +         (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_128B HvxVR:$src1, HvxVR:$src2), +         (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_hf HvxVR:$src1), +         (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_hf_128B HvxVR:$src1), +         (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf16 HvxVR:$src1), +         (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf16_128B HvxVR:$src1), +         (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf32 HvxVR:$src1), +         (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf32_128B HvxVR:$src1), +         (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_sf HvxVR:$src1), +         (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_sf_128B HvxVR:$src1), +         (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_hf HvxVR:$src1), +         (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_hf_128B HvxVR:$src1), +         (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_qf16 HvxVR:$src1), +         (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_qf16_128B HvxVR:$src1), +         (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_qf32 HvxVR:$src1), +         (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_qf32_128B HvxVR:$src1), +         (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_sf HvxVR:$src1), +         (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_sf_128B HvxVR:$src1), +         (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;  def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),           (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;  def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2), diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 7ee280d..eadf020 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1815,7 +1815,7 @@ struct WeightedLeaf {    int Weight;    int InsertionOrder; -  WeightedLeaf() {} +  WeightedLeaf() = default;    WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :      Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 904aabed..fe700e1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -375,6 +375,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FFLOOR, VT, Legal);        setOperationAction(ISD::FTRUNC, VT, Legal);        setOperationAction(ISD::FROUNDEVEN, VT, Legal); +      setOperationAction(ISD::FMINNUM, VT, Legal); +      setOperationAction(ISD::FMAXNUM, VT, Legal);      }      setOperationAction(ISD::CTPOP, GRLenVT, Legal);      setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -461,6 +463,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FFLOOR, VT, Legal);        setOperationAction(ISD::FTRUNC, VT, Legal);        setOperationAction(ISD::FROUNDEVEN, VT, Legal); +      setOperationAction(ISD::FMINNUM, VT, Legal); +      setOperationAction(ISD::FMAXNUM, VT, Legal);      }    } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 610ba05..b502b056 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;  // XVFDIV_{S/D}  defm : PatXrXrF<fdiv, "XVFDIV">; +// XVFMAX_{S/D}, XVFMIN_{S/D} +defm : PatXrXrF<fmaxnum, "XVFMAX">; +defm : PatXrXrF<fminnum, "XVFMIN">; +  // XVFMADD_{S/D}  def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),            (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 6470842..6b74a4b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1760,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">;  // VFDIV_{S/D}  defm : PatVrVrF<fdiv, "VFDIV">; +// VFMAX_{S/D}, VFMIN_{S/D} +defm : PatVrVrF<fmaxnum, "VFMAX">; +defm : PatVrVrF<fminnum, "VFMIN">; +  // VFMADD_{S/D}  def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),            (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 7d54565..6d69af5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)      : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,                                /*HasRelocationAddend=*/true) {} -LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} +LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;  unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,                                                  const MCValue &Target, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index f0e2bc4..08fa51d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -38,7 +38,7 @@ public:    LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)        : Ctx(ctx), MCII(MCII) {} -  ~LoongArchMCCodeEmitter() override {} +  ~LoongArchMCCodeEmitter() override = default;    void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,                           SmallVectorImpl<MCFixup> &Fixups, diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index e37f3a66..fb5cd5c2 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -690,9 +690,9 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc,      } else {        // Floating point control register.        RegNo = StringSwitch<unsigned>(RegisterNameLower) -                  .Cases("fpc", "fpcr", M68k::FPC) -                  .Cases("fps", "fpsr", M68k::FPS) -                  .Cases("fpi", "fpiar", M68k::FPIAR) +                  .Cases({"fpc", "fpcr"}, M68k::FPC) +                  .Cases({"fps", "fpsr"}, M68k::FPS) +                  .Cases({"fpi", "fpiar"}, M68k::FPIAR)                    .Default(M68k::NoRegister);        assert(RegNo != M68k::NoRegister &&               "Unrecognized FP control register name"); diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 97379d7..f588e56 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -6176,7 +6176,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {    CC = StringSwitch<unsigned>(Name)             .Case("zero", 0) -           .Cases("at", "AT", 1) +           .Cases({"at", "AT"}, 1)             .Case("a0", 4)             .Case("a1", 5)             .Case("a2", 6) diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index caef8fe7..b832b82 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -20,7 +20,7 @@ class MemoryLocation;  class NVPTXAAResult : public AAResultBase {  public: -  NVPTXAAResult() {} +  NVPTXAAResult() = default;    NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}    /// Handle invalidation events from the new pass manager. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c667a09..996d653 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1836,7 +1836,7 @@ bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {    return true;  } -NVPTXScopes::NVPTXScopes(LLVMContext &C) { +NVPTXScopes::NVPTXScopes(LLVMContext &C) : Context(&C) {    Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;    Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;    Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block; @@ -1851,11 +1851,21 @@ NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {    auto S = Scopes.find(ID);    if (S == Scopes.end()) { -    // TODO: -    // - Add API to LLVMContext to get the name of a single scope. -    // - Use that API here to print an error containing the name -    //   of this Unknown ID. -    report_fatal_error(formatv("Could not find scope ID={}.", int(ID))); +    auto scopeName = Context->getSyncScopeName(ID); +    assert(scopeName.has_value() && "Scope name must exist."); + +    // Build list of supported syncscopes programmatically +    SmallVector<StringRef> supportedScopes; +    for (const auto &Entry : Scopes) { +      if (auto name = Context->getSyncScopeName(Entry.first)) +        supportedScopes.push_back(name->empty() ? "<empty string>" : *name); +    } + +    reportFatalUsageError( +        formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n" +                "Supported syncscopes are: {2}.", +                scopeName.value(), int(ID), +                make_range(supportedScopes.begin(), supportedScopes.end())));    }    return S->second;  } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 1cb579b..d525531 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -35,6 +35,7 @@ struct NVPTXScopes {  private:    SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{}; +  LLVMContext *Context = nullptr;  };  class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index bcb3f50..780e124 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2702,7 +2702,7 @@ static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) {  static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {    return StringSwitch<bool>(GV->getName()) -      .Cases("llvm.global_ctors", "llvm.global_dtors", true) +      .Cases({"llvm.global_ctors", "llvm.global_dtors"}, true)        .Default(false);  } diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index da3efdc..0c2e44e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -360,6 +360,10 @@ let Predicates = [HasVSX, IsISAFuture] in {      def LXVPRLL : XForm_XTp5_RAB5<31, 621, (outs vsrprc:$XTp),                                    (ins (memr $RA):$addr, g8rc:$RB),                                    "lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; +    def LXVPB32X +        : XForm_XTp5_RAB5<31, 877, (outs vsrprc:$XTp), +                          (ins (memr $RA):$addr, g8rc:$RB), +                          "lxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;    }    let mayStore = 1 in { @@ -376,6 +380,10 @@ let Predicates = [HasVSX, IsISAFuture] in {          : XForm_XTp5_RAB5<31, 749, (outs),                            (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),                            "stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; +    def STXVPB32X +        : XForm_XTp5_RAB5<31, 1005, (outs), +                          (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB), +                          "stxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;    }    def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB), diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2fba090..b04e887 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -912,7 +912,7 @@ bool PPCTTIImpl::areInlineCompatible(const Function *Caller,  bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,                                         const Function *Callee, -                                       const ArrayRef<Type *> &Types) const { +                                       ArrayRef<Type *> Types) const {    // We need to ensure that argument promotion does not    // attempt to promote pointers to MMA types (__vector_pair diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 475472a..8d7f255 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -147,7 +147,7 @@ public:    bool areInlineCompatible(const Function *Caller,                             const Function *Callee) const override;    bool areTypesABICompatible(const Function *Caller, const Function *Callee, -                             const ArrayRef<Type *> &Types) const override; +                             ArrayRef<Type *> Types) const override;    bool supportsTailCallFor(const CallBase *CB) const override;  private: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index e75dfe3..5b8cfb2 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -407,7 +407,6 @@ enum OperandType : unsigned {    OPERAND_SIMM5_PLUS1,    OPERAND_SIMM6,    OPERAND_SIMM6_NONZERO, -  OPERAND_SIMM8,    OPERAND_SIMM8_UNSIGNED,    OPERAND_SIMM10,    OPERAND_SIMM10_LSB0000_NONZERO, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b25a054..9078335 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -371,8 +371,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,    RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -    report_fatal_error("The V extension does not support EEW=64 for index " -                       "values when XLEN=32"); +    reportFatalUsageError("The V extension does not support EEW=64 for index " +                          "values when XLEN=32");    }    const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(        NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -444,8 +444,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,    RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -    report_fatal_error("The V extension does not support EEW=64 for index " -                       "values when XLEN=32"); +    reportFatalUsageError("The V extension does not support EEW=64 for index " +                          "values when XLEN=32");    }    const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(        NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -2223,8 +2223,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {        RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);        unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());        if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -        report_fatal_error("The V extension does not support EEW=64 for index " -                           "values when XLEN=32"); +        reportFatalUsageError("The V extension does not support EEW=64 for " +                              "index values when XLEN=32");        }        const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(            IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -2457,8 +2457,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {        RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);        unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());        if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -        report_fatal_error("The V extension does not support EEW=64 for index " -                           "values when XLEN=32"); +        reportFatalUsageError("The V extension does not support EEW=64 for " +                              "index values when XLEN=32");        }        const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(            IsMasked, IsOrdered, IndexLog2EEW, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c6a8b84..e0cf739 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -23946,7 +23946,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,                                 .Case("{t0}", RISCV::X5)                                 .Case("{t1}", RISCV::X6)                                 .Case("{t2}", RISCV::X7) -                               .Cases("{s0}", "{fp}", RISCV::X8) +                               .Cases({"{s0}", "{fp}"}, RISCV::X8)                                 .Case("{s1}", RISCV::X9)                                 .Case("{a0}", RISCV::X10)                                 .Case("{a1}", RISCV::X11) @@ -23983,38 +23983,38 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,    // use the ABI names in register constraint lists.    if (Subtarget.hasStdExtF()) {      unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) -                        .Cases("{f0}", "{ft0}", RISCV::F0_F) -                        .Cases("{f1}", "{ft1}", RISCV::F1_F) -                        .Cases("{f2}", "{ft2}", RISCV::F2_F) -                        .Cases("{f3}", "{ft3}", RISCV::F3_F) -                        .Cases("{f4}", "{ft4}", RISCV::F4_F) -                        .Cases("{f5}", "{ft5}", RISCV::F5_F) -                        .Cases("{f6}", "{ft6}", RISCV::F6_F) -                        .Cases("{f7}", "{ft7}", RISCV::F7_F) -                        .Cases("{f8}", "{fs0}", RISCV::F8_F) -                        .Cases("{f9}", "{fs1}", RISCV::F9_F) -                        .Cases("{f10}", "{fa0}", RISCV::F10_F) -                        .Cases("{f11}", "{fa1}", RISCV::F11_F) -                        .Cases("{f12}", "{fa2}", RISCV::F12_F) -                        .Cases("{f13}", "{fa3}", RISCV::F13_F) -                        .Cases("{f14}", "{fa4}", RISCV::F14_F) -                        .Cases("{f15}", "{fa5}", RISCV::F15_F) -                        .Cases("{f16}", "{fa6}", RISCV::F16_F) -                        .Cases("{f17}", "{fa7}", RISCV::F17_F) -                        .Cases("{f18}", "{fs2}", RISCV::F18_F) -                        .Cases("{f19}", "{fs3}", RISCV::F19_F) -                        .Cases("{f20}", "{fs4}", RISCV::F20_F) -                        .Cases("{f21}", "{fs5}", RISCV::F21_F) -                        .Cases("{f22}", "{fs6}", RISCV::F22_F) -                        .Cases("{f23}", "{fs7}", RISCV::F23_F) -                        .Cases("{f24}", "{fs8}", RISCV::F24_F) -                        .Cases("{f25}", "{fs9}", RISCV::F25_F) -                        .Cases("{f26}", "{fs10}", RISCV::F26_F) -                        .Cases("{f27}", "{fs11}", RISCV::F27_F) -                        .Cases("{f28}", "{ft8}", RISCV::F28_F) -                        .Cases("{f29}", "{ft9}", RISCV::F29_F) -                        .Cases("{f30}", "{ft10}", RISCV::F30_F) -                        .Cases("{f31}", "{ft11}", RISCV::F31_F) +                        .Cases({"{f0}", "{ft0}"}, RISCV::F0_F) +                        .Cases({"{f1}", "{ft1}"}, RISCV::F1_F) +                        .Cases({"{f2}", "{ft2}"}, RISCV::F2_F) +                        .Cases({"{f3}", "{ft3}"}, RISCV::F3_F) +                        .Cases({"{f4}", "{ft4}"}, RISCV::F4_F) +                        .Cases({"{f5}", "{ft5}"}, RISCV::F5_F) +                        .Cases({"{f6}", "{ft6}"}, RISCV::F6_F) +                        .Cases({"{f7}", "{ft7}"}, RISCV::F7_F) +                        .Cases({"{f8}", "{fs0}"}, RISCV::F8_F) +                        .Cases({"{f9}", "{fs1}"}, RISCV::F9_F) +                        .Cases({"{f10}", "{fa0}"}, RISCV::F10_F) +                        .Cases({"{f11}", "{fa1}"}, RISCV::F11_F) +                        .Cases({"{f12}", "{fa2}"}, RISCV::F12_F) +                        .Cases({"{f13}", "{fa3}"}, RISCV::F13_F) +                        .Cases({"{f14}", "{fa4}"}, RISCV::F14_F) +                        .Cases({"{f15}", "{fa5}"}, RISCV::F15_F) +                        .Cases({"{f16}", "{fa6}"}, RISCV::F16_F) +                        .Cases({"{f17}", "{fa7}"}, RISCV::F17_F) +                        .Cases({"{f18}", "{fs2}"}, RISCV::F18_F) +                        .Cases({"{f19}", "{fs3}"}, RISCV::F19_F) +                        .Cases({"{f20}", "{fs4}"}, RISCV::F20_F) +                        .Cases({"{f21}", "{fs5}"}, RISCV::F21_F) +                        .Cases({"{f22}", "{fs6}"}, RISCV::F22_F) +                        .Cases({"{f23}", "{fs7}"}, RISCV::F23_F) +                        .Cases({"{f24}", "{fs8}"}, RISCV::F24_F) +                        .Cases({"{f25}", "{fs9}"}, RISCV::F25_F) +                        .Cases({"{f26}", "{fs10}"}, RISCV::F26_F) +                        .Cases({"{f27}", "{fs11}"}, RISCV::F27_F) +                        .Cases({"{f28}", "{ft8}"}, RISCV::F28_F) +                        .Cases({"{f29}", "{ft9}"}, RISCV::F29_F) +                        .Cases({"{f30}", "{ft10}"}, RISCV::F30_F) +                        .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)                          .Default(RISCV::NoRegister);      if (FReg != RISCV::NoRegister) {        assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp index a1c8e23..c58a5c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -48,7 +48,7 @@ class VXRMInfo {    } State = Uninitialized;  public: -  VXRMInfo() {} +  VXRMInfo() = default;    static VXRMInfo getUnknown() {      VXRMInfo Info; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index c31713e..1c6a5af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -90,6 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];  //===----------------------------------------------------------------------===//  let Predicates = [HasHalfFPLoadStoreMove] in { +let canFoldAsLoad = 1 in  def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;  // Operands for stores are in the order srcreg, base, offset rather than diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp index 0a318e0..ed6d355 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp @@ -15,4 +15,4 @@  using namespace llvm;  SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} -SPIRVTargetStreamer::~SPIRVTargetStreamer() {} +SPIRVTargetStreamer::~SPIRVTargetStreamer() = default; diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 9e11c3a..dd57b74 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -149,23 +149,23 @@ static FunctionType *getOriginalFunctionType(const Function &F) {          return isa<MDString>(N->getOperand(0)) &&                 cast<MDString>(N->getOperand(0))->getString() == F.getName();        }); -  // TODO: probably one function can have numerous type mutations, -  // so we should support this.    if (ThisFuncMDIt != NamedMD->op_end()) {      auto *ThisFuncMD = *ThisFuncMDIt; -    MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1)); -    assert(MD && "MDNode operand is expected"); -    ConstantInt *Const = getConstInt(MD, 0); -    if (Const) { -      auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); -      assert(CMeta && "ConstantAsMetadata operand is expected"); -      assert(Const->getSExtValue() >= -1); -      // Currently -1 indicates return value, greater values mean -      // argument numbers. -      if (Const->getSExtValue() == -1) -        RetTy = CMeta->getType(); -      else -        ArgTypes[Const->getSExtValue()] = CMeta->getType(); +    for (unsigned I = 1; I != ThisFuncMD->getNumOperands(); ++I) { +      MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(I)); +      assert(MD && "MDNode operand is expected"); +      ConstantInt *Const = getConstInt(MD, 0); +      if (Const) { +        auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); +        assert(CMeta && "ConstantAsMetadata operand is expected"); +        assert(Const->getSExtValue() >= -1); +        // Currently -1 indicates return value, greater values mean +        // argument numbers. +        if (Const->getSExtValue() == -1) +          RetTy = CMeta->getType(); +        else +          ArgTypes[Const->getSExtValue()] = CMeta->getType(); +      }      }    } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 2d19f6de..44b6c66 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -81,7 +81,7 @@ private:    void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);  public: -  RequirementHandler() {} +  RequirementHandler() = default;    void clear() {      MinimalCaps.clear();      AllCaps.clear(); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 7dd0b95..5ba0356 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {  }  // Pin SPIRVTargetObjectFile's vtables to this file. -SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {} +SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default;  SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,                                         StringRef CPU, StringRef FS, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h index 9d0adbb..87ec256 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h @@ -16,7 +16,7 @@ namespace llvm {  /// This implementation is used for SystemZ ELF targets.  class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF {  public: -  SystemZELFTargetObjectFile() {} +  SystemZELFTargetObjectFile() = default;    /// Describe a TLS variable address within debug info.    const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h index 7845cdf..1bfc61f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -76,7 +76,7 @@ public:      BlockSet.insert(MBB);    }    ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; } -  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; +  using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;    block_iterator block_begin() const { return getBlocks().begin(); }    block_iterator block_end() const { return getBlocks().end(); }    inline iterator_range<block_iterator> blocks() const { @@ -96,7 +96,7 @@ public:    void addSubException(std::unique_ptr<WebAssemblyException> E) {      SubExceptions.push_back(std::move(E));    } -  using iterator = typename decltype(SubExceptions)::const_iterator; +  using iterator = decltype(SubExceptions)::const_iterator;    iterator begin() const { return SubExceptions.begin(); }    iterator end() const { return SubExceptions.end(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h index e92bf17..96b8a4e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h @@ -35,7 +35,7 @@ public:    virtual MachineBasicBlock *getHeader() const = 0;    virtual bool contains(const MachineBasicBlock *MBB) const = 0;    virtual unsigned getNumBlocks() const = 0; -  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; +  using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;    virtual iterator_range<block_iterator> blocks() const = 0;    virtual bool isLoop() const = 0;  }; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index b7ea672..bac3692 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2470,10 +2470,10 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,  // Report back its kind, or IOK_INVALID if does not evaluated as a known one  unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {    return StringSwitch<unsigned>(Name) -    .Cases("TYPE","type",IOK_TYPE) -    .Cases("SIZE","size",IOK_SIZE) -    .Cases("LENGTH","length",IOK_LENGTH) -    .Default(IOK_INVALID); +      .Cases({"TYPE", "type"}, IOK_TYPE) +      .Cases({"SIZE", "size"}, IOK_SIZE) +      .Cases({"LENGTH", "length"}, IOK_LENGTH) +      .Default(IOK_INVALID);  }  /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator @@ -2516,8 +2516,8 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {  unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {    return StringSwitch<unsigned>(Name.lower())        .Case("type", MOK_TYPE) -      .Cases("size", "sizeof", MOK_SIZEOF) -      .Cases("length", "lengthof", MOK_LENGTHOF) +      .Cases({"size", "sizeof"}, MOK_SIZEOF) +      .Cases({"length", "lengthof"}, MOK_LENGTHOF)        .Default(MOK_INVALID);  } @@ -2581,21 +2581,21 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {  bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,                                                 StringRef *SizeStr) {    Size = StringSwitch<unsigned>(getTok().getString()) -    .Cases("BYTE", "byte", 8) -    .Cases("WORD", "word", 16) -    .Cases("DWORD", "dword", 32) -    .Cases("FLOAT", "float", 32) -    .Cases("LONG", "long", 32) -    .Cases("FWORD", "fword", 48) -    .Cases("DOUBLE", "double", 64) -    .Cases("QWORD", "qword", 64) -    .Cases("MMWORD","mmword", 64) -    .Cases("XWORD", "xword", 80) -    .Cases("TBYTE", "tbyte", 80) -    .Cases("XMMWORD", "xmmword", 128) -    .Cases("YMMWORD", "ymmword", 256) -    .Cases("ZMMWORD", "zmmword", 512) -    .Default(0); +             .Cases({"BYTE", "byte"}, 8) +             .Cases({"WORD", "word"}, 16) +             .Cases({"DWORD", "dword"}, 32) +             .Cases({"FLOAT", "float"}, 32) +             .Cases({"LONG", "long"}, 32) +             .Cases({"FWORD", "fword"}, 48) +             .Cases({"DOUBLE", "double"}, 64) +             .Cases({"QWORD", "qword"}, 64) +             .Cases({"MMWORD", "mmword"}, 64) +             .Cases({"XWORD", "xword"}, 80) +             .Cases({"TBYTE", "tbyte"}, 80) +             .Cases({"XMMWORD", "xmmword"}, 128) +             .Cases({"YMMWORD", "ymmword"}, 256) +             .Cases({"ZMMWORD", "zmmword"}, 512) +             .Default(0);    if (Size) {      if (SizeStr)        *SizeStr = getTok().getString(); @@ -2886,22 +2886,22 @@ bool X86AsmParser::parseATTOperand(OperandVector &Operands) {  // otherwise the EFLAGS Condition Code enumerator.  X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {    return StringSwitch<X86::CondCode>(CC) -      .Case("o", X86::COND_O)          // Overflow -      .Case("no", X86::COND_NO)        // No Overflow -      .Cases("b", "nae", X86::COND_B)  // Below/Neither Above nor Equal -      .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below -      .Cases("e", "z", X86::COND_E)    // Equal/Zero -      .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero -      .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above -      .Cases("a", "nbe", X86::COND_A)  // Above/Neither Below nor Equal -      .Case("s", X86::COND_S)          // Sign -      .Case("ns", X86::COND_NS)        // No Sign -      .Cases("p", "pe", X86::COND_P)   // Parity/Parity Even -      .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd -      .Cases("l", "nge", X86::COND_L)  // Less/Neither Greater nor Equal -      .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less -      .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater -      .Cases("g", "nle", X86::COND_G)  // Greater/Neither Less nor Equal +      .Case("o", X86::COND_O)            // Overflow +      .Case("no", X86::COND_NO)          // No Overflow +      .Cases({"b", "nae"}, X86::COND_B)  // Below/Neither Above nor Equal +      .Cases({"ae", "nb"}, X86::COND_AE) // Above or Equal/Not Below +      .Cases({"e", "z"}, X86::COND_E)    // Equal/Zero +      .Cases({"ne", "nz"}, X86::COND_NE) // Not Equal/Not Zero +      .Cases({"be", "na"}, X86::COND_BE) // Below or Equal/Not Above +      .Cases({"a", "nbe"}, X86::COND_A)  // Above/Neither Below nor Equal +      .Case("s", X86::COND_S)            // Sign +      .Case("ns", X86::COND_NS)          // No Sign +      .Cases({"p", "pe"}, X86::COND_P)   // Parity/Parity Even +      .Cases({"np", "po"}, X86::COND_NP) // No Parity/Parity Odd +      .Cases({"l", "nge"}, X86::COND_L)  // Less/Neither Greater nor Equal +      .Cases({"ge", "nl"}, X86::COND_GE) // Greater or Equal/Not Less +      .Cases({"le", "ng"}, X86::COND_LE) // Less or Equal/Not Greater +      .Cases({"g", "nle"}, X86::COND_G)  // Greater/Neither Less nor Equal        .Default(X86::COND_INVALID);  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 133406b..b97b508 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33034,12 +33034,13 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,        DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));    Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) -                      : (Type *)FixedVectorType::get(ArgTy, 4); +                      : (Type *)FixedVectorType::get(ArgTy, 2);    TargetLowering::CallLoweringInfo CLI(DAG);    CLI.setDebugLoc(dl)        .setChain(DAG.getEntryNode()) -      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); +      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)) +      .setIsPostTypeLegalization();    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -53347,6 +53348,80 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,    return SDValue();  } +// Look for a RMW operation that only touches one bit of a larger than legal +// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value. +static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, +                              SelectionDAG &DAG, +                              const X86Subtarget &Subtarget) { +  using namespace SDPatternMatch; + +  // Only handle normal stores and its chain was a matching normal load. +  auto *Ld = dyn_cast<LoadSDNode>(St->getChain()); +  if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld || +      !ISD::isNormalLoad(Ld) || !Ld->isSimple() || +      Ld->getBasePtr() != St->getBasePtr() || +      Ld->getOffset() != St->getOffset()) +    return SDValue(); + +  SDValue LoadVal(Ld, 0); +  SDValue StoredVal = St->getValue(); +  EVT VT = StoredVal.getValueType(); + +  // Only narrow larger than legal scalar integers. +  if (!VT.isScalarInteger() || +      VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32)) +    return SDValue(); + +  // BTR: X & ~(1 << ShAmt) +  // BTS: X | (1 << ShAmt) +  // BTC: X ^ (1 << ShAmt) +  SDValue ShAmt; +  if (!StoredVal.hasOneUse() || +      !(sd_match(StoredVal, m_And(m_Specific(LoadVal), +                                  m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || +        sd_match(StoredVal, +                 m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || +        sd_match(StoredVal, +                 m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))))) +    return SDValue(); + +  // Ensure the shift amount is in bounds. +  KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); +  if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) +    return SDValue(); + +  // Split the shift into an alignment shift that moves the active i32 block to +  // the bottom bits for truncation and a modulo shift that can act on the i32. +  EVT AmtVT = ShAmt.getValueType(); +  SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                                 DAG.getSignedConstant(-32LL, DL, AmtVT)); +  SDValue ModuloAmt = +      DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); + +  // Compute the byte offset for the i32 block that is changed by the RMW. +  // combineTruncate will adjust the load for us in a similar way. +  EVT PtrVT = St->getBasePtr().getValueType(); +  SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT); +  SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, +                                   DAG.getShiftAmountConstant(3, PtrVT, DL)); +  SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL, +                                            SDNodeFlags::NoUnsignedWrap); + +  // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store. +  SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); +  X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); + +  SDValue Mask = +      DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), +                  DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); +  if (StoredVal.getOpcode() == ISD::AND) +    Mask = DAG.getNOT(DL, Mask, MVT::i32); + +  SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); +  return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), +                      Align(), St->getMemOperand()->getFlags()); +} +  static SDValue combineStore(SDNode *N, SelectionDAG &DAG,                              TargetLowering::DAGCombinerInfo &DCI,                              const X86Subtarget &Subtarget) { @@ -53573,6 +53648,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,      }    } +  if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) +    return R; +    // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)    //         store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)    if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -54505,8 +54583,9 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,    // truncation, see if we can convert the shift into a pointer offset instead.    // Limit this to normal (non-ext) scalar integer loads.    if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && -      Src.hasOneUse() && Src.getOperand(0).hasOneUse() && -      ISD::isNormalLoad(Src.getOperand(0).getNode())) { +      Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) && +      (Src.getOperand(0).hasOneUse() || +       !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {      auto *Ld = cast<LoadSDNode>(Src.getOperand(0));      if (Ld->isSimple() && VT.isByteSized() &&          isPowerOf2_64(VT.getSizeInBits())) { @@ -54529,8 +54608,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,          SDValue NewLoad =              DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),                          Align(), Ld->getMemOperand()->getFlags()); -        DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1), -                                      NewLoad.getValue(1)); +        DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);          return NewLoad;        }      } @@ -56306,6 +56384,7 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,  static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,                              TargetLowering::DAGCombinerInfo &DCI,                              const X86Subtarget &Subtarget) { +  using namespace SDPatternMatch;    const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();    const SDValue LHS = N->getOperand(0);    const SDValue RHS = N->getOperand(1); @@ -56364,6 +56443,37 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,        if (SDValue AndN = MatchAndCmpEq(RHS, LHS))          return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); +      // If we're performing a bit test on a larger than legal type, attempt +      // to (aligned) shift down the value to the bottom 32-bits and then +      // perform the bittest on the i32 value. +      // ICMP_ZERO(AND(X,SHL(1,IDX))) +      // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31)))) +      if (isNullConstant(RHS) && +          OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) { +        SDValue X, ShAmt; +        if (sd_match(LHS, m_OneUse(m_And(m_Value(X), +                                         m_Shl(m_One(), m_Value(ShAmt)))))) { +          // Only attempt this if the shift amount is known to be in bounds. +          KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); +          if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) { +            EVT AmtVT = ShAmt.getValueType(); +            SDValue AlignAmt = +                DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                            DAG.getSignedConstant(-32LL, DL, AmtVT)); +            SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                                            DAG.getConstant(31, DL, AmtVT)); +            SDValue Mask = DAG.getNode( +                ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), +                DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); +            X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt); +            X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); +            X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask); +            return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), +                                CC); +          } +        } +      } +        // cmpeq(trunc(x),C) --> cmpeq(x,C)        // cmpne(trunc(x),C) --> cmpne(x,C)        // iff x upper bits are zero. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp index 090060e..3b96e70 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -115,9 +115,9 @@ struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {    static constexpr MachineInstr *const ArgNodeSentinel = nullptr;    using GraphT = ImmutableGraph<MachineInstr *, int>; -  using Node = typename GraphT::Node; -  using Edge = typename GraphT::Edge; -  using size_type = typename GraphT::size_type; +  using Node = GraphT::Node; +  using Edge = GraphT::Edge; +  using size_type = GraphT::size_type;    MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,                       std::unique_ptr<Edge[]> Edges, size_type NodesSize,                       size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) @@ -191,10 +191,10 @@ template <>  struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {    using GraphType = MachineGadgetGraph;    using Traits = llvm::GraphTraits<GraphType *>; -  using NodeRef = typename Traits::NodeRef; -  using EdgeRef = typename Traits::EdgeRef; -  using ChildIteratorType = typename Traits::ChildIteratorType; -  using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; +  using NodeRef = Traits::NodeRef; +  using EdgeRef = Traits::EdgeRef; +  using ChildIteratorType = Traits::ChildIteratorType; +  using ChildEdgeIteratorType = Traits::ChildEdgeIteratorType;    DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} @@ -227,9 +227,6 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {  } // end namespace llvm -constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; -constexpr int MachineGadgetGraph::GadgetEdgeSentinel; -  char X86LoadValueInjectionLoadHardeningPass::ID = 0;  void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( @@ -335,7 +332,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(    L.computePhiInfo();    GraphBuilder Builder; -  using GraphIter = typename GraphBuilder::BuilderNodeRef; +  using GraphIter = GraphBuilder::BuilderNodeRef;    DenseMap<MachineInstr *, GraphIter> NodeMap;    int FenceCount = 0, GadgetCount = 0;    auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 3d8d0a23..0b1430e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6562,7 +6562,7 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,  bool X86TTIImpl::areTypesABICompatible(const Function *Caller,                                         const Function *Callee, -                                       const ArrayRef<Type *> &Types) const { +                                       ArrayRef<Type *> Types) const {    if (!BaseT::areTypesABICompatible(Caller, Callee, Types))      return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 133b366..de5e1c2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -296,7 +296,7 @@ public:    bool areInlineCompatible(const Function *Caller,                             const Function *Callee) const override;    bool areTypesABICompatible(const Function *Caller, const Function *Callee, -                             const ArrayRef<Type *> &Type) const override; +                             ArrayRef<Type *> Type) const override;    uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {      return ST->getMaxInlineSizeThreshold();  | 
