diff options
Diffstat (limited to 'llvm/lib')
69 files changed, 1099 insertions, 902 deletions
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index fb3a6f8..1b7277d 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -77,9 +78,15 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI, }; for (unsigned Idx = 0; Idx != CI->getNumOperandBundles(); Idx++) { - if (CI->getOperandBundleAt(Idx).Inputs.size() > ABA_WasOn && - CI->getOperandBundleAt(Idx).getTagName() != IgnoreBundleTag) - AddAffected(CI->getOperandBundleAt(Idx).Inputs[ABA_WasOn], Idx); + OperandBundleUse Bundle = CI->getOperandBundleAt(Idx); + if (Bundle.getTagName() == "separate_storage") { + assert(Bundle.Inputs.size() == 2 && + "separate_storage must have two args"); + AddAffected(getUnderlyingObject(Bundle.Inputs[0]), Idx); + AddAffected(getUnderlyingObject(Bundle.Inputs[1]), Idx); + } else if (Bundle.Inputs.size() > ABA_WasOn && + Bundle.getTagName() != IgnoreBundleTag) + AddAffected(Bundle.Inputs[ABA_WasOn], Idx); } Value *Cond = CI->getArgOperand(0), *A, *B; diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3de1473..97f60d2 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -69,7 +69,7 @@ static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, cl::init(true)); static cl::opt<bool> EnableSeparateStorageAnalysis("basic-aa-separate-storage", - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); /// SearchLimitReached / SearchTimes shows how often the limit of /// to decompose GEPs is reached. It will affect the precision @@ -1544,28 +1544,25 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, return AliasResult::NoAlias; if (CtxI && EnableSeparateStorageAnalysis) { - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) + for (AssumptionCache::ResultElem &Elem : AC.assumptionsFor(O1)) { + if (!Elem || Elem.Index == AssumptionCache::ExprResultIdx) continue; - AssumeInst *Assume = cast<AssumeInst>(AssumeVH); - - for (unsigned Idx = 0; Idx < Assume->getNumOperandBundles(); Idx++) { - OperandBundleUse OBU = Assume->getOperandBundleAt(Idx); - if (OBU.getTagName() == "separate_storage") { - assert(OBU.Inputs.size() == 2); - const Value *Hint1 = OBU.Inputs[0].get(); - const Value *Hint2 = OBU.Inputs[1].get(); - // This is often a no-op; instcombine rewrites this for us. No-op - // getUnderlyingObject calls are fast, though. - const Value *HintO1 = getUnderlyingObject(Hint1); - const Value *HintO2 = getUnderlyingObject(Hint2); - - if (((O1 == HintO1 && O2 == HintO2) || - (O1 == HintO2 && O2 == HintO1)) && - isValidAssumeForContext(Assume, CtxI, DT)) - return AliasResult::NoAlias; - } + AssumeInst *Assume = cast<AssumeInst>(Elem); + OperandBundleUse OBU = Assume->getOperandBundleAt(Elem.Index); + if (OBU.getTagName() == "separate_storage") { + assert(OBU.Inputs.size() == 2); + const Value *Hint1 = OBU.Inputs[0].get(); + const Value *Hint2 = OBU.Inputs[1].get(); + // This is often a no-op; instcombine rewrites this for us. No-op + // getUnderlyingObject calls are fast, though. + const Value *HintO1 = getUnderlyingObject(Hint1); + const Value *HintO2 = getUnderlyingObject(Hint2); + + if (((O1 == HintO1 && O2 == HintO2) || + (O1 == HintO2 && O2 == HintO1)) && + isValidAssumeForContext(Assume, CtxI, DT)) + return AliasResult::NoAlias; } } } diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 35bdd86..1a9c7c21 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -95,14 +95,14 @@ bool ConstraintSystem::eliminateUsingFM() { IdxUpper++; } - if (MulOverflow(UpperV, ((-1) * LowerLast), M1)) + if (MulOverflow(UpperV, -1 * LowerLast, M1)) return false; if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) { LowerV = LowerRow[IdxLower].Coefficient; IdxLower++; } - if (MulOverflow(LowerV, (UpperLast), M2)) + if (MulOverflow(LowerV, UpperLast, M2)) return false; if (AddOverflow(M1, M2, N)) return false; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 78a8334..241bdd8 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2204,6 +2204,13 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y)))) return Constant::getNullValue(Op0->getType()); + const APInt *C1; + Value *A; + // (A ^ C) & (A ^ ~C) -> 0 + if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) && + match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1)))) + return Constant::getNullValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL)) { // If Op0 is true implies Op1 is true, then Op0 is a subset of Op1. @@ -2473,6 +2480,11 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; + // (A ^ C) | (A ^ ~C) -> -1, i.e. all bits set to one. + if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) && + match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1)))) + return Constant::getAllOnesValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, false)) { diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 87ddfe3..59c96a3 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -969,7 +969,9 @@ LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses LoopPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - AM.getResult<LoopAnalysis>(F).print(OS); + auto &LI = AM.getResult<LoopAnalysis>(F); + OS << "Loop info for function '" << F.getName() << "':\n"; + LI.print(OS); return PreservedAnalyses::all(); } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 37e7153..7522353 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -149,7 +149,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI, return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); - return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; + return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized + : UnableToLegalize; default: LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; @@ -567,7 +568,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(MachineInstr &MI, +static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, + MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI) { MachineBasicBlock &MBB = *MI.getParent(); @@ -596,17 +598,12 @@ static bool isLibCallInTailPosition(MachineInstr &MI, // RET_ReallyLR implicit $x0 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); if (Next != MBB.instr_end() && Next->isCopy()) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("unsupported opcode"); - case TargetOpcode::G_BZERO: + if (MI.getOpcode() == TargetOpcode::G_BZERO) return false; - case TargetOpcode::G_MEMCPY: - case TargetOpcode::G_MEMMOVE: - case TargetOpcode::G_MEMSET: - break; - } + // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the + // mempy/etc routines return the same parameter. For other it will be the + // returned value. Register VReg = MI.getOperand(0).getReg(); if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg()) return false; @@ -622,7 +619,7 @@ static bool isLibCallInTailPosition(MachineInstr &MI, if (Ret->getNumImplicitOperands() != 1) return false; - if (PReg != Ret->getOperand(0).getReg()) + if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg()) return false; // Skip over the COPY that we just validated. @@ -639,34 +636,64 @@ LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args, - const CallingConv::ID CC) { + const CallingConv::ID CC, LostDebugLocObserver &LocObserver, + MachineInstr *MI) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); CallLowering::CallLoweringInfo Info; Info.CallConv = CC; Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = Result; + if (MI) + Info.IsTailCall = + (Result.Ty->isVoidTy() || + Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) && + isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(), + *MIRBuilder.getMRI()); + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; + if (MI && Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + + // Check debug locations before removing the return. + LocObserver.checkpoint(true); + + // We must have a return following the call (or debug insts) to get past + // isLibCallInTailPosition. + do { + MachineInstr *Next = MI->getNextNode(); + assert(Next && + (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) && + "Expected instr following MI to be return or debug inst?"); + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + Next->eraseFromParent(); + } while (MI->getNextNode()); + + // We expect to lose the debug location from the return. + LocObserver.checkpoint(false); + } return LegalizerHelper::Legalized; } LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args) { + ArrayRef<CallLowering::ArgInfo> Args, + LostDebugLocObserver &LocObserver, MachineInstr *MI) { auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); - return createLibcall(MIRBuilder, Name, Result, Args, CC); + return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI); } // Useful for libcalls where all operands have the same type. static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, - Type *OpType) { + Type *OpType, LostDebugLocObserver &LocObserver) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); // FIXME: What does the original arg index mean here? @@ -674,7 +701,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) Args.push_back({MO.getReg(), OpType, 0}); return createLibcall(MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), OpType, 0}, Args); + {MI.getOperand(0).getReg(), OpType, 0}, Args, + LocObserver, &MI); } LegalizerHelper::LegalizeResult @@ -733,8 +761,9 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); - Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI); + Info.IsTailCall = + MI.getOperand(MI.getNumOperands() - 1).getImm() && + isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -789,11 +818,11 @@ static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, - Type *FromType) { + Type *FromType, LostDebugLocObserver &LocObserver) { RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); - return createLibcall(MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), ToType, 0}, - {{MI.getOperand(1).getReg(), FromType, 0}}); + return createLibcall( + MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0}, + {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI); } static RTLIB::Libcall @@ -829,7 +858,8 @@ getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { // LegalizerHelper::LegalizeResult LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &MRI = *MIRBuilder.getMRI(); @@ -850,7 +880,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, auto Res = createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), + LocObserver, nullptr); if (Res != LegalizerHelper::Legalized) return Res; @@ -867,7 +898,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, // content of memory region. LegalizerHelper::LegalizeResult LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &MRI = *MIRBuilder.getMRI(); @@ -892,7 +924,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); return createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), + LocObserver, nullptr); } // The function is used to legalize operations that set default environment @@ -902,7 +935,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, // it is not true, the target must provide custom lowering. LegalizerHelper::LegalizeResult LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &Ctx = MF.getFunction().getContext(); @@ -919,7 +953,8 @@ LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); return createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0})); + CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), + LocObserver, &MI); } LegalizerHelper::LegalizeResult @@ -938,7 +973,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); Type *HLTy = IntegerType::get(Ctx, Size); - auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); if (Status != Legalized) return Status; break; @@ -974,7 +1009,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); return UnableToLegalize; } - auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); if (Status != Legalized) return Status; break; @@ -985,7 +1020,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); if (!FromTy || !ToTy) return UnableToLegalize; - LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); + LegalizeResult Status = + conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver); if (Status != Legalized) return Status; break; @@ -1000,7 +1036,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), - FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); + FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), + LocObserver); if (Status != Legalized) return Status; break; @@ -1015,7 +1052,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); + FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), + LocObserver); if (Status != Legalized) return Status; break; @@ -1032,19 +1070,20 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { return Result; } case TargetOpcode::G_GET_FPMODE: { - LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } case TargetOpcode::G_SET_FPMODE: { - LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } case TargetOpcode::G_RESET_FPMODE: { - LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = + createResetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; @@ -2831,6 +2870,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMIN: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMINIMUM: diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 3fbb937..cbb1a74 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -305,11 +305,7 @@ namespace { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - /// - /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG - /// SrcReg. This introduces an implicit-def of DstReg on coalesced users. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, - bool IsSubregToReg); + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1347,7 +1343,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (DstReg.isPhysical()) { Register NewDstReg = DstReg; - unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx); + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); if (NewDstIdx) NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); @@ -1496,7 +1493,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MRI->setRegClass(DstReg, NewRC); // Update machine operands and add flags. - updateRegDefsUses(DstReg, DstReg, DstIdx, false); + updateRegDefsUses(DstReg, DstReg, DstIdx); NewMI.getOperand(0).setSubReg(NewIdx); // updateRegDefUses can add an "undef" flag to the definition, since // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make @@ -1816,7 +1813,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx, bool IsSubregToReg) { + unsigned SubIdx) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1856,8 +1853,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); - bool FullDef = true; - // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); @@ -1865,13 +1860,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // Adjust <undef> flags in case of sub-register joins. We don't want to // turn a full def into a read-modify-write sub-register def and vice // versa. - if (SubIdx && MO.isDef()) { + if (SubIdx && MO.isDef()) MO.setIsUndef(!Reads); - if (!Reads) - FullDef = false; - } - // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. if (MO.isUse() && !DstIsPhys) { @@ -1903,25 +1894,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, MO.substVirtReg(DstReg, SubIdx, *TRI); } - if (IsSubregToReg && !FullDef) { - // If the coalesed instruction doesn't fully define the register, we need - // to preserve the original super register liveness for SUBREG_TO_REG. - // - // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, - // but it introduces liveness for other subregisters. Downstream users may - // have been relying on those bits, so we need to ensure their liveness is - // captured with a def of other lanes. - - // FIXME: Need to add new subrange if tracking subranges. We could also - // skip adding this if we knew the other lanes are dead, and only for - // other lanes. - - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "this should update subranges"); - MachineInstrBuilder MIB(*MF, UseMI); - MIB.addReg(DstReg, RegState::ImplicitDefine); - } - LLVM_DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugInstr()) @@ -2121,8 +2093,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); } - const bool IsSubregToReg = CopyMI->isSubregToReg(); - ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2190,12 +2160,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) { - assert(!IsSubregToReg && "can this happen?"); - updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false); - } - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), - IsSubregToReg); + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eafa95c..464e1be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7987,7 +7987,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // If OR can be rewritten into ADD, try combines based on ADD. if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) + DAG.isADDLike(SDValue(N, 0))) if (SDValue Combined = visitADDLike(N)) return Combined; @@ -10055,7 +10055,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Shl0.getNode()); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); + SDNodeFlags Flags; + // Preserve the disjoint flag for Or. + if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint()) + Flags.setDisjoint(true); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e17bba..4151964 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5022,7 +5022,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: case ISD::AND: - case ISD::OR: case ISD::XOR: case ISD::ROTL: case ISD::ROTR: @@ -5062,6 +5061,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()); + // Matches hasPoisonGeneratingFlags(). + case ISD::OR: + return ConsiderFlags && Op->getFlags().hasDisjoint(); + case ISD::INSERT_VECTOR_ELT:{ // Ensure that the element index is in bounds. EVT VecVT = Op.getOperand(0).getValueType(); @@ -5085,7 +5088,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool SelectionDAG::isADDLike(SDValue Op) const { unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::OR) - return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); + return Op->getFlags().hasDisjoint() || + haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); if (Opcode == ISD::XOR) return isMinSignedConstant(Op.getOperand(1)); return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3c4b285..192f7bc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3354,6 +3354,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { } if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) Flags.setExact(ExactOp->isExact()); + if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(&I)) + Flags.setDisjoint(DisjointOp->isDisjoint()); if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) Flags.copyFMF(*FPOp); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 78cc600..4ae3000 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -597,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; + if (getFlags().hasDisjoint()) + OS << " disjoint"; + if (getFlags().hasNonNeg()) OS << " nneg"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c597754..66cdd75 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1064,10 +1064,9 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue ResultAVG = - DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA), - DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB)); - return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, - ResultAVG); + DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT), + DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT)); + return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT); } /// Look at Op. At this point, we know that only the OriginalDemandedBits of the @@ -1468,14 +1467,24 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::OR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - + SDNodeFlags Flags = Op.getNode()->getFlags(); if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, - Depth + 1)) + Depth + 1)) { + if (Flags.hasDisjoint()) { + Flags.setDisjoint(false); + Op->setFlags(Flags); + } return true; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, - Known2, TLO, Depth + 1)) + Known2, TLO, Depth + 1)) { + if (Flags.hasDisjoint()) { + Flags.setDisjoint(false); + Op->setFlags(Flags); + } return true; + } assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp index 117b849..83f3cdc 100644 --- a/llvm/lib/Demangle/Demangle.cpp +++ b/llvm/lib/Demangle/Demangle.cpp @@ -47,8 +47,7 @@ static bool isRustEncoding(std::string_view S) { return starts_with(S, "_R"); } static bool isDLangEncoding(std::string_view S) { return starts_with(S, "_D"); } bool llvm::nonMicrosoftDemangle(std::string_view MangledName, - std::string &Result, bool CanHaveLeadingDot, - bool ParseParams) { + std::string &Result, bool CanHaveLeadingDot) { char *Demangled = nullptr; // Do not consider the dot prefix as part of the demangled symbol name. @@ -58,7 +57,7 @@ bool llvm::nonMicrosoftDemangle(std::string_view MangledName, } if (isItaniumEncoding(MangledName)) - Demangled = itaniumDemangle(MangledName, ParseParams); + Demangled = itaniumDemangle(MangledName); else if (isRustEncoding(MangledName)) Demangled = rustDemangle(MangledName); else if (isDLangEncoding(MangledName)) diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp index 5c21b06..e3f208f 100644 --- a/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -366,13 +366,13 @@ public: using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>; -char *llvm::itaniumDemangle(std::string_view MangledName, bool ParseParams) { +char *llvm::itaniumDemangle(std::string_view MangledName) { if (MangledName.empty()) return nullptr; Demangler Parser(MangledName.data(), MangledName.data() + MangledName.length()); - Node *AST = Parser.parse(ParseParams); + Node *AST = Parser.parse(); if (!AST) return nullptr; diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp index 8eca874..8a4145a 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp @@ -21,31 +21,8 @@ // First version as landed in August 2009 static constexpr uint32_t JitDescriptorVersion = 1; -// Keep in sync with gdb/gdb/jit.h extern "C" { -typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN -} jit_actions_t; - -struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -}; - -struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; -}; - // We put information about the JITed function in this global, which the // debugger reads. Make sure to specify the version statically, because the // debugger checks the version before we can set it during runtime. diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index eab05ee..c6dc42e 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -2115,6 +2115,10 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) { if (F.hasFnAttribute(Attribute::OptimizeNone)) return /*Changed*/ false; + // FIXME: https://github.com/llvm/llvm-project/issues/76545 + if (F.hasFnAttribute(Attribute::SanitizeHWAddress)) + return /*Changed*/ false; + bool Changed = false; auto *DL = &F.getParent()->getDataLayout(); // Collect a map of {backing storage : dbg.declares} (currently "backing diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 05836fd..6a1e53b 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -592,7 +592,9 @@ LTO::LTO(Config Conf, ThinBackend Backend, unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode) : Conf(std::move(Conf)), RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), - ThinLTO(std::move(Backend)), LTOMode(LTOMode) {} + ThinLTO(std::move(Backend)), + GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()), + LTOMode(LTOMode) {} // Requires a destructor for MapVector<BitcodeModule>. LTO::~LTO() = default; @@ -610,7 +612,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, assert(ResI != ResE); SymbolResolution Res = *ResI++; - auto &GlobalRes = GlobalResolutions[Sym.getName()]; + auto &GlobalRes = (*GlobalResolutions)[Sym.getName()]; GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); if (Res.Prevailing) { assert(!GlobalRes.Prevailing && @@ -1125,7 +1127,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { // Compute "dead" symbols, we don't want to import/export these! DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions; - for (auto &Res : GlobalResolutions) { + for (auto &Res : *GlobalResolutions) { // Normally resolution have IR name of symbol. We can do nothing here // otherwise. See comments in GlobalResolution struct for more details. if (Res.second.IRName.empty()) @@ -1169,6 +1171,8 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { Error Result = runRegularLTO(AddStream); if (!Result) + // This will reset the GlobalResolutions optional once done with it to + // reduce peak memory before importing. Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); if (StatsFile) @@ -1273,8 +1277,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { // This returns true when the name is local or not defined. Locals are // expected to be handled separately. auto IsVisibleToRegularObj = [&](StringRef name) { - auto It = GlobalResolutions.find(name); - return (It == GlobalResolutions.end() || It->second.VisibleOutsideSummary); + auto It = GlobalResolutions->find(name); + return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary); }; // If allowed, upgrade public vcall visibility metadata to linkage unit @@ -1291,7 +1295,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); if (!Conf.CodeGenOnly) { - for (const auto &R : GlobalResolutions) { + for (const auto &R : *GlobalResolutions) { GlobalValue *GV = RegularLTO.CombinedModule->getNamedValue(R.second.IRName); if (!R.second.isPrevailingIRSymbol()) @@ -1708,8 +1712,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, // This returns true when the name is local or not defined. Locals are // expected to be handled separately. auto IsVisibleToRegularObj = [&](StringRef name) { - auto It = GlobalResolutions.find(name); - return (It == GlobalResolutions.end() || + auto It = GlobalResolutions->find(name); + return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary); }; @@ -1739,15 +1743,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing); } - if (Conf.OptLevel > 0) - ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - isPrevailing, ImportLists, ExportLists); - // Figure out which symbols need to be internalized. This also needs to happen // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - for (auto &Res : GlobalResolutions) { + for (auto &Res : *GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. if (Res.second.Partition != GlobalResolution::External || @@ -1760,6 +1760,16 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ExportedGUIDs.insert(GUID); } + // Reset the GlobalResolutions to deallocate the associated memory, as there + // are no further accesses. We specifically want to do this before computing + // cross module importing, which adds to peak memory via the computed import + // and export lists. + GlobalResolutions.reset(); + + if (Conf.OptLevel > 0) + ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + isPrevailing, ImportLists, ExportLists); + // Any functions referenced by the jump table in the regular LTO object must // be exported. for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs()) diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index a85182a..9dae026 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -704,8 +704,14 @@ static void AttemptToFoldSymbolOffsetDifference( } int64_t Num; + unsigned Count; if (DF) { Displacement += DF->getContents().size(); + } else if (auto *AF = dyn_cast<MCAlignFragment>(FI); + AF && Layout && + !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( + *AF, Count)) { + Displacement += Asm->computeFragmentSize(*Layout, *AF); } else if (auto *FF = dyn_cast<MCFillFragment>(FI); FF && FF->getNumValues().evaluateAsAbsolute(Num)) { Displacement += Num * FF->getValueSize(); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f94bd42..439f749 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -452,9 +452,10 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, std::optional<PGOOptions> PGOOpt, PassInstrumentationCallbacks *PIC) : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) { + bool ShouldPopulateClassToPassNames = PIC && shouldPopulateClassToPassNames(); if (TM) - TM->registerPassBuilderCallbacks(*this); - if (PIC && shouldPopulateClassToPassNames()) { + TM->registerPassBuilderCallbacks(*this, ShouldPopulateClassToPassNames); + if (ShouldPopulateClassToPassNames) { #define MODULE_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 9b8162c..1ea63a5 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -1231,15 +1231,6 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, // Only extend the RHS within the instruction if there is a valid extend type. if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && isValueAvailable(RHS)) { - if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) - if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) - if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { - Register RHSReg = getRegForValue(SI->getOperand(0)); - if (!RHSReg) - return 0; - return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, - C->getZExtValue(), SetFlags, WantResult); - } Register RHSReg = getRegForValue(RHS); if (!RHSReg) return 0; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index cb63d87..10ad5b1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -12586,6 +12586,7 @@ def : TokenAlias<".4S", ".4s">; def : TokenAlias<".2D", ".2d">; def : TokenAlias<".1Q", ".1q">; def : TokenAlias<".2H", ".2h">; +def : TokenAlias<".2B", ".2b">; def : TokenAlias<".B", ".b">; def : TokenAlias<".H", ".h">; def : TokenAlias<".S", ".s">; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 738a52e..380f6e1f 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -810,7 +810,7 @@ defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; } -let Predicates = [HasSME2p1, HasB16B16] in { +let Predicates = [HasSME2, HasB16B16] in { defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 1d0e8be..bb5f1f6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -989,6 +989,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, s16, 8) .lower(); + // For fmul reductions we need to split up into individual operations. We + // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of + // smaller types, followed by scalarizing what remains. + getActionDefinitionsBuilder(G_VECREDUCE_FMUL) + .minScalarOrElt(0, MinFPScalar) + .clampMaxNumElements(1, s64, 2) + .clampMaxNumElements(1, s32, 4) + .clampMaxNumElements(1, s16, 8) + .clampMaxNumElements(1, s32, 2) + .clampMaxNumElements(1, s16, 4) + .scalarize(1) + .lower(); + getActionDefinitionsBuilder(G_VECREDUCE_ADD) .legalFor({{s8, v16s8}, {s8, v8s8}, @@ -1137,8 +1150,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) verify(*ST.getInstrInfo()); } -bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AArch64LegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); GISelChangeObserver &Observer = Helper.Observer; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 19f77ba..e96ec6d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -28,7 +28,8 @@ class AArch64LegalizerInfo : public LegalizerInfo { public: AArch64LegalizerInfo(const AArch64Subtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index b755254..789ec81 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10082,6 +10082,12 @@ multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDP def : InstAlias<mnemonic # "\t$Pd, $Zn", (!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _H) PPR16:$Pd, ZPRAny:$Zn, 0), 0>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _S) PPR32:$Pd, ZPRAny:$Zn, 0), 0>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _D) PPR64:$Pd, ZPRAny:$Zn, 0), 0>; // any_lane def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))), @@ -10143,6 +10149,12 @@ multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp, def : InstAlias<mnemonic # "\t$Zd, $Pn", (!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, 0, PPR16:$Pn), 0>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, 0, PPR32:$Pn), 0>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, 0, PPR64:$Pn), 0>; // Merge def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 88ef4b5..ad8dcda 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2764,7 +2764,9 @@ static bool isConstant(const MachineInstr &MI) { void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { - const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); + unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1; + const MachineInstr *PtrMI = + MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg()); assert(PtrMI); @@ -2817,6 +2819,10 @@ bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; + if (MI.getOpcode() == AMDGPU::G_PREFETCH) + return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() == + AMDGPU::SGPRRegBankID; + const Instruction *I = dyn_cast<Instruction>(Ptr); return I && I->getMetadata("amdgpu.uniform"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fbee288..dfbe5c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1996,8 +1996,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, verify(*ST.getInstrInfo()); } -bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AMDGPULegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &B = Helper.MIRBuilder; MachineRegisterInfo &MRI = *B.getMRI(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 855fa0d..1fa0648 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -35,7 +35,8 @@ public: AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index fba0604..92182ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3263,17 +3263,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } - unsigned PtrBank = - getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID); + Register PtrReg = MI.getOperand(0).getReg(); + unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID); if (PtrBank == AMDGPU::VGPRRegBankID) { MI.eraseFromParent(); return; } - // FIXME: There is currently no support for prefetch in global isel. - // There is no node equivalence and what's worse there is no MMO produced - // for a prefetch on global isel path. - // Prefetch does not affect execution so erase it for now. - MI.eraseFromParent(); + unsigned AS = MRI.getType(PtrReg).getAddressSpace(); + if (!AMDGPU::isFlatGlobalAddrSpace(AS) && + AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + MI.eraseFromParent(); + return; + } + applyDefaultMapping(OpdMapper); return; } default: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fdc2077..0f3bb3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -620,7 +620,8 @@ void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { AAM.registerFunctionAnalysis<AMDGPUAA>(); } -void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void AMDGPUTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [this](StringRef PassName, ModulePassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 9051a61..99c9db3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -51,7 +51,8 @@ public: return TLOF.get(); } - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; void registerDefaultAliasAnalyses(AAManager &) override; /// Get the integer value of a null pointer in the given address space. diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index abd7e91..5f2b7c0 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -166,6 +166,8 @@ public: ImmTyEndpgm, ImmTyWaitVDST, ImmTyWaitEXP, + ImmTyWaitVAVDst, + ImmTyWaitVMVSrc, }; // Immediate operand kind. @@ -909,6 +911,8 @@ public: bool isEndpgm() const; bool isWaitVDST() const; bool isWaitEXP() const; + bool isWaitVAVDst() const; + bool isWaitVMVSrc() const; auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { return std::bind(P, *this); @@ -1029,6 +1033,7 @@ public: } static void printImmTy(raw_ostream& OS, ImmTy Type) { + // clang-format off switch (Type) { case ImmTyNone: OS << "None"; break; case ImmTyGDS: OS << "GDS"; break; @@ -1086,7 +1091,10 @@ public: case ImmTyEndpgm: OS << "Endpgm"; break; case ImmTyWaitVDST: OS << "WaitVDST"; break; case ImmTyWaitEXP: OS << "WaitEXP"; break; + case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; + case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; } + // clang-format on } void print(raw_ostream &OS) const override { @@ -9192,6 +9200,14 @@ bool AMDGPUOperand::isWaitVDST() const { return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); } +bool AMDGPUOperand::isWaitVAVDst() const { + return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm()); +} + +bool AMDGPUOperand::isWaitVMVSrc() const { + return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm()); +} + //===----------------------------------------------------------------------===// // VINTERP //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td new file mode 100644 index 0000000..54ef785 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td @@ -0,0 +1,191 @@ +//===-- DSDIRInstructions.td - LDS/VDS Direct Instruction Definitions -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+) +//===----------------------------------------------------------------------===// + +class LDSDIRe<bits<2> op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23-22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +class VDSDIRe<bits<2> op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + bits<1> waitvsrc; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23} = waitvsrc; + let Inst{22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +//===----------------------------------------------------------------------===// +// LDSDIR/VDSDIR Classes +//===----------------------------------------------------------------------===// + +class LDSDIR_getIns<bit direct> { + dag ret = !if(direct, + (ins wait_vdst:$waitvdst), + (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst) + ); +} + +class VDSDIR_getIns<bit direct> { + dag ret = !if(direct, + (ins wait_va_vdst:$waitvdst, wait_va_vsrc:$waitvsrc), + (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_va_vdst:$waitvdst, + wait_va_vsrc:$waitvsrc) + ); +} + +class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : + InstSI<(outs VGPR_32:$vdst), ins, asm> { + let LDSDIR = 1; + let EXP_CNT = 1; + + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 0; + + string Mnemonic = opName; + let UseNamedOperandTable = 1; + + let Uses = [M0, EXEC]; + let DisableWQM = 0; + let SchedRW = [WriteLDS]; + + bit is_direct; + let is_direct = direct; +} + +class DSDIR_Pseudo<string opName, dag ins, bit direct> : + DSDIR_Common<opName, "", ins, direct>, + SIMCInstr<opName, SIEncodingFamily.NONE> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +class LDSDIR_getAsm<bit direct> { + string ret = !if(direct, + " $vdst$waitvdst", + " $vdst, $attr$attrchan$waitvdst" + ); +} + +class VDSDIR_getAsm<bit direct> { + string ret = !if(direct, + " $vdst$waitvdst$waitvsrc", + " $vdst, $attr$attrchan$waitvdst$waitvsrc" + ); +} + +class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> : + DSDIR_Common<lds.Mnemonic, + lds.Mnemonic # asm, + ins, + lds.is_direct>, + SIMCInstr <lds.Mnemonic, subtarget> { + let isPseudo = 0; + let isCodeGenOnly = 0; +} + +//===----------------------------------------------------------------------===// +// LDS/VDS Direct Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGFX11Only in { + +def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>; +def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (LDS_DIRECT_LOAD 0) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) +>; + +} // End SubtargetPredicate = isGFX11Only + +let SubtargetPredicate = isGFX12Plus in { + +def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>; +def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (DS_DIRECT_LOAD 0, 1) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1) +>; + +} // End SubtargetPredicate = isGFX12Only + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +multiclass DSDIR_Real_gfx11<bits<2> op, + DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> { + def _gfx11 : DSDIR_Real<lds, lds.InOperandList, + LDSDIR_getAsm<lds.is_direct>.ret, + SIEncodingFamily.GFX11>, + LDSDIRe<op, lds.is_direct> { + let AssemblerPredicate = isGFX11Only; + let DecoderNamespace = "GFX11"; + } +} + +defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>; +defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>; + +//===----------------------------------------------------------------------===// +// GFX12+ +//===----------------------------------------------------------------------===// + +multiclass DSDIR_Real_gfx12<bits<2> op, + DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> { + def _gfx12 : DSDIR_Real<lds, lds.InOperandList, + VDSDIR_getAsm<lds.is_direct>.ret, + SIEncodingFamily.GFX12>, + VDSDIRe<op, lds.is_direct> { + let AssemblerPredicate = isGFX12Plus; + let DecoderNamespace = "GFX12"; + } +} + +defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>; +defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>; diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td deleted file mode 100644 index 4956a15..0000000 --- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td +++ /dev/null @@ -1,116 +0,0 @@ -//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// LDSDIR encoding -//===----------------------------------------------------------------------===// - -class LDSDIRe<bits<2> op, bit is_direct> : Enc32 { - // encoding fields - bits<2> attrchan; - bits<6> attr; - bits<4> waitvdst; - bits<8> vdst; - - // encoding - let Inst{31-24} = 0xce; // encoding - let Inst{23-22} = 0x0; // reserved - let Inst{21-20} = op; - let Inst{19-16} = waitvdst; - let Inst{15-10} = !if(is_direct, ?, attr); - let Inst{9-8} = !if(is_direct, ?, attrchan); - let Inst{7-0} = vdst; -} - -//===----------------------------------------------------------------------===// -// LDSDIR Classes -//===----------------------------------------------------------------------===// - -class LDSDIR_getIns<bit direct> { - dag ret = !if(direct, - (ins wait_vdst:$waitvdst), - (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst) - ); -} - -class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI< - (outs VGPR_32:$vdst), - LDSDIR_getIns<direct>.ret, - asm> { - let LDSDIR = 1; - let EXP_CNT = 1; - - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 0; - - string Mnemonic = opName; - let UseNamedOperandTable = 1; - - let Uses = [M0, EXEC]; - let DisableWQM = 0; - let SchedRW = [WriteLDS]; - - bit is_direct; - let is_direct = direct; -} - -class LDSDIR_Pseudo<string opName, bit direct> : - LDSDIR_Common<opName, "", direct>, - SIMCInstr<opName, SIEncodingFamily.NONE> { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class LDSDIR_getAsm<bit direct> { - string ret = !if(direct, - " $vdst$waitvdst", - " $vdst, $attr$attrchan$waitvdst" - ); -} - -class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> : - LDSDIR_Common<lds.Mnemonic, - lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret, - lds.is_direct>, - SIMCInstr <lds.Mnemonic, subtarget>, - LDSDIRe<op, lds.is_direct> { - let isPseudo = 0; - let isCodeGenOnly = 0; -} - -//===----------------------------------------------------------------------===// -// LDS Direct Instructions -//===----------------------------------------------------------------------===// - -def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>; -def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>; - -def : GCNPat < - (f32 (int_amdgcn_lds_direct_load M0)), - (LDS_DIRECT_LOAD 0) ->; - -def : GCNPat < - (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), - (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) ->; - -//===----------------------------------------------------------------------===// -// GFX11+ -//===----------------------------------------------------------------------===// - -multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> { - def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> { - let AssemblerPredicate = isGFX11Plus; - let DecoderNamespace = "GFX11"; - } -} - -defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>; -defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index edc244d..ef1b85f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -639,6 +639,20 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo, printU4ImmDecOperand(MI, OpNo, O); } +void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " wait_va_vdst:"; + printU4ImmDecOperand(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " wait_vm_vsrc:"; + printU4ImmDecOperand(MI, OpNo, O); +} + void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 95c26de..f2f985f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -161,6 +161,10 @@ private: raw_ostream &O); void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVAVDst(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVMVSrc(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, unsigned N); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 173c877..50724fd 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1144,6 +1144,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">; def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">; +def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">; +def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">; class KImmFPOperand<ValueType vt> : ImmOperand<vt> { let OperandNamespace = "AMDGPU"; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8310c6b..0f12727 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -30,7 +30,7 @@ include "SMInstructions.td" include "FLATInstructions.td" include "BUFInstructions.td" include "EXPInstructions.td" -include "LDSDIRInstructions.td" +include "DSDIRInstructions.td" include "VINTERPInstructions.td" //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 59d6ccf..5e6c349 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } continue; } else if (Opcode == AMDGPU::LDS_PARAM_LOAD || - Opcode == AMDGPU::LDS_DIRECT_LOAD) { + Opcode == AMDGPU::DS_PARAM_LOAD || + Opcode == AMDGPU::LDS_DIRECT_LOAD || + Opcode == AMDGPU::DS_DIRECT_LOAD) { // Mark these STRICTWQM, but only for the instruction, not its operands. // This avoid unnecessarily marking M0 as requiring WQM. InstrInfo &II = Instructions[&MI]; diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 3297847..be21cf0 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -977,20 +977,35 @@ def : GCNPat < } } // let OtherPredicates = [HasShaderCyclesRegister] -multiclass SMPrefetchPat<string type, int cache_type> { +def i32imm_zero : TImmLeaf <i32, [{ + return Imm == 0; +}]>; + +def i32imm_one : TImmLeaf <i32, [{ + return Imm == 1; +}]>; + +multiclass SMPrefetchPat<string type, TImmLeaf cache_type> { def : GCNPat < - (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)), + (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type), (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0)) >; def : GCNPat < - (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)), + (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type), (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0)) >; + + def : GCNPat < + (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type), + (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) + (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)), + 0, (i32 SGPR_NULL), (i8 0)) + >; } -defm : SMPrefetchPat<"INST", 0>; -defm : SMPrefetchPat<"DATA", 1>; +defm : SMPrefetchPat<"INST", i32imm_zero>; +defm : SMPrefetchPat<"DATA", i32imm_one>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 3ffde86..abea0fe 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -362,8 +362,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, llvm_unreachable("Unsupported size for FCmp predicate"); } -bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; @@ -392,7 +392,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, OriginalResult}; auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy, 0}, {{MI.getOperand(1).getReg(), ArgTy, 0}, - {MI.getOperand(2).getReg(), ArgTy, 0}}); + {MI.getOperand(2).getReg(), ArgTy, 0}}, + LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; break; @@ -428,7 +429,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, auto Status = createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy, 0}, {{MI.getOperand(2).getReg(), ArgTy, 0}, - {MI.getOperand(3).getReg(), ArgTy, 0}}); + {MI.getOperand(3).getReg(), ArgTy, 0}}, + LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/llvm/lib/Target/ARM/ARMLegalizerInfo.h index f1c2e9c..3636cc6 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.h +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -28,7 +28,8 @@ class ARMLegalizerInfo : public LegalizerInfo { public: ARMLegalizerInfo(const ARMSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; private: void setFCmpLibcallsGNU(); diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index ab0db576..8973684 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -108,7 +108,8 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { return new BPFPassConfig(*this, PM); } -void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void BPFTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &FPM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index 4e6adc7..0a28394 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -42,7 +42,8 @@ public: return TLOF.get(); } - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; }; } diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h index 8ffa1d7..bce4116 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h +++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h @@ -36,6 +36,7 @@ class DXILResourcePrinterPass : public PassInfoMixin<DXILResourcePrinterPass> { public: explicit DXILResourcePrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// The legacy pass manager's analysis pass to compute DXIL resource diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index d5cb488..06938f8 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -100,7 +100,8 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT, DirectXTargetMachine::~DirectXTargetMachine() {} -void DirectXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void DirectXTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, ModulePassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h index d04c375..428beaf 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.h +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h @@ -47,7 +47,8 @@ public: } TargetTransformInfo getTargetTransformInfo(const Function &F) const override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; }; } // namespace llvm diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 590e464..e7a692d 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -274,7 +274,8 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void HexagonTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerLateLoopOptimizationsEPCallback( [=](LoopPassManager &LPM, OptimizationLevel Level) { LPM.addPass(HexagonLoopIdiomRecognitionPass()); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index dddd79a..c5fed0c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -34,7 +34,8 @@ public: ~HexagonTargetMachine() override; const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 14f2620..f5e9423 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -330,8 +330,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { verify(*ST.getInstrInfo()); } -bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool MipsLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/llvm/lib/Target/Mips/MipsLegalizerInfo.h index 05027b7..63daebf 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.h +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.h @@ -25,7 +25,8 @@ class MipsLegalizerInfo : public LegalizerInfo { public: MipsLegalizerInfo(const MipsSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 8d89576..fad69f5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -225,7 +225,8 @@ void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { AAM.registerFunctionAnalysis<NVPTXAA>(); } -void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void NVPTXTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index cfdd8da..9e6bf92 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -69,7 +69,8 @@ public: void registerDefaultAliasAnalyses(AAManager &AAM) override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 079906d..61bae58 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -411,8 +411,9 @@ bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, return true; } -bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool RISCVLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; GISelChangeObserver &Observer = Helper.Observer; switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index 48c3697..4335bd0 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -30,7 +30,8 @@ class RISCVLegalizerInfo : public LegalizerInfo { public: RISCVLegalizerInfo(const RISCVSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03a59f8..27bb69d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, - ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, - ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, + ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -12850,9 +12850,9 @@ struct CombineResult; /// Helper class for folding sign/zero extensions. /// In particular, this class is used for the following combines: -/// add | add_vl -> vwadd(u) | vwadd(u)_w -/// sub | sub_vl -> vwsub(u) | vwsub(u)_w -/// mul | mul_vl -> vwmul(u) | vwmul_su +/// add_vl -> vwadd(u) | vwadd(u)_w +/// sub_vl -> vwsub(u) | vwsub(u)_w +/// mul_vl -> vwmul(u) | vwmul_su /// /// An object of this class represents an operand of the operation we want to /// combine. @@ -12897,8 +12897,6 @@ struct NodeExtensionHelper { /// E.g., for zext(a), this would return a. SDValue getSource() const { switch (OrigOperand.getOpcode()) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return OrigOperand.getOperand(0); @@ -12915,8 +12913,7 @@ struct NodeExtensionHelper { /// Get or create a value that can feed \p Root with the given extension \p /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. /// \see ::getSource(). - SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, + SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, std::optional<bool> SExt) const { if (!SExt.has_value()) return OrigOperand; @@ -12931,10 +12928,8 @@ struct NodeExtensionHelper { // If we need an extension, we should be changing the type. SDLoc DL(Root); - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); + auto [Mask, VL] = getMaskAndVL(Root); switch (OrigOperand.getOpcode()) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); @@ -12974,15 +12969,12 @@ struct NodeExtensionHelper { /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { - case ISD::ADD: case RISCVISD::ADD_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; - case ISD::MUL: case RISCVISD::MUL_VL: return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; - case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -12995,8 +12987,7 @@ struct NodeExtensionHelper { /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> /// newOpcode(a, b). static unsigned getSUOpcode(unsigned Opcode) { - assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && - "SU is only supported for MUL"); + assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); return RISCVISD::VWMULSU_VL; } @@ -13004,10 +12995,8 @@ struct NodeExtensionHelper { /// newOpcode(a, b). static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { - case ISD::ADD: case RISCVISD::ADD_VL: return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; - case ISD::SUB: case RISCVISD::SUB_VL: return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; default: @@ -13017,33 +13006,19 @@ struct NodeExtensionHelper { using CombineToTry = std::function<std::optional<CombineResult>( SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, - const NodeExtensionHelper & /*RHS*/, SelectionDAG &, - const RISCVSubtarget &)>; + const NodeExtensionHelper & /*RHS*/)>; /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } /// Helper method to set the various fields of this struct based on the /// type of \p Root. - void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { SupportsZExt = false; SupportsSExt = false; EnforceOneUse = true; CheckMask = true; - unsigned Opc = OrigOperand.getOpcode(); - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: { - if (OrigOperand.getValueType().isVector()) { - SupportsZExt = Opc == ISD::ZERO_EXTEND; - SupportsSExt = Opc == ISD::SIGN_EXTEND; - SDLoc DL(Root); - MVT VT = Root->getSimpleValueType(0); - std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - } - break; - } + switch (OrigOperand.getOpcode()) { case RISCVISD::VZEXT_VL: SupportsZExt = true; Mask = OrigOperand.getOperand(1); @@ -13099,16 +13074,8 @@ struct NodeExtensionHelper { } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { + static bool isSupportedRoot(const SDNode *Root) { switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isTypeLegal(Root->getValueType(0))) - return false; - return Root->getValueType(0).isScalableVector(); - } case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: @@ -13123,10 +13090,9 @@ struct NodeExtensionHelper { } /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). - NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " - "unsupported root"); + NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { + assert(isSupportedRoot(Root) && "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); OrigOperand = Root->getOperand(OperandIdx); @@ -13142,7 +13108,7 @@ struct NodeExtensionHelper { SupportsZExt = Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; SupportsSExt = !SupportsZExt; - std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); + std::tie(Mask, VL) = getMaskAndVL(Root); CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. @@ -13151,7 +13117,7 @@ struct NodeExtensionHelper { } [[fallthrough]]; default: - fillUpExtensionSupport(Root, DAG, Subtarget); + fillUpExtensionSupport(Root, DAG); break; } } @@ -13167,27 +13133,14 @@ struct NodeExtensionHelper { } /// Helper function to get the Mask and VL from \p Root. - static std::pair<SDValue, SDValue> - getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root, DAG) && "Unexpected root"); - switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: { - SDLoc DL(Root); - MVT VT = Root->getSimpleValueType(0); - return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - } - default: - return std::make_pair(Root->getOperand(3), Root->getOperand(4)); - } + static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { + assert(isSupportedRoot(Root) && "Unexpected root"); + return std::make_pair(Root->getOperand(3), Root->getOperand(4)); } /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); + bool areVLAndMaskCompatible(const SDNode *Root) const { + auto [Mask, VL] = getMaskAndVL(Root); return isMaskCompatible(Mask) && isVLCompatible(VL); } @@ -13195,14 +13148,11 @@ struct NodeExtensionHelper { /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { switch (N->getOpcode()) { - case ISD::ADD: - case ISD::MUL: case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return true; - case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13247,25 +13197,14 @@ struct CombineResult { /// Return a value that uses TargetOpcode and that can be used to replace /// Root. /// The actual replacement is *not* done in that method. - SDValue materialize(SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { + SDValue materialize(SelectionDAG &DAG) const { SDValue Mask, VL, Merge; - std::tie(Mask, VL) = - NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); - switch (Root->getOpcode()) { - default: - Merge = Root->getOperand(2); - break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - Merge = DAG.getUNDEF(Root->getValueType(0)); - break; - } + std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); + Merge = Root->getOperand(2); return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), - RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), - Merge, Mask, VL); + LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), + RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, + Mask, VL); } }; @@ -13282,16 +13221,15 @@ struct CombineResult { static std::optional<CombineResult> canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, bool AllowSExt, - bool AllowZExt, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + bool AllowZExt) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/false, RHS, + /*SExtRHS=*/false); if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/true), @@ -13308,10 +13246,9 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/true, DAG, Subtarget); + /*AllowZExt=*/true); } /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) @@ -13320,9 +13257,8 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + const NodeExtensionHelper &RHS) { + if (!RHS.areVLAndMaskCompatible(Root)) return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar @@ -13346,10 +13282,9 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/false, DAG, Subtarget); + /*AllowZExt=*/false); } /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) @@ -13358,10 +13293,9 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, - /*AllowZExt=*/true, DAG, Subtarget); + /*AllowZExt=*/true); } /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) @@ -13370,13 +13304,10 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - + const NodeExtensionHelper &RHS) { if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); @@ -13386,8 +13317,6 @@ SmallVector<NodeExtensionHelper::CombineToTry> NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { SmallVector<CombineToTry> Strategies; switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: case RISCVISD::ADD_VL: case RISCVISD::SUB_VL: // add|sub -> vwadd(u)|vwsub(u) @@ -13395,7 +13324,6 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // add|sub -> vwadd(u)_w|vwsub(u)_w Strategies.push_back(canFoldToVW_W); break; - case ISD::MUL: case RISCVISD::MUL_VL: // mul -> vwmul(u) Strategies.push_back(canFoldToVWWithSameExtension); @@ -13426,14 +13354,12 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// mul_vl -> vwmul(u) | vwmul_su /// vwadd_w(u) -> vwadd(u) /// vwub_w(u) -> vwadd(u) -static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const RISCVSubtarget &Subtarget) { +static SDValue +combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) - return SDValue(); - + assert(NodeExtensionHelper::isSupportedRoot(N) && + "Shouldn't have called this method"); SmallVector<SDNode *> Worklist; SmallSet<SDNode *, 8> Inserted; Worklist.push_back(N); @@ -13442,11 +13368,11 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) + if (!NodeExtensionHelper::isSupportedRoot(Root)) return SDValue(); - NodeExtensionHelper LHS(N, 0, DAG, Subtarget); - NodeExtensionHelper RHS(N, 1, DAG, Subtarget); + NodeExtensionHelper LHS(N, 0, DAG); + NodeExtensionHelper RHS(N, 1, DAG); auto AppendUsersIfNeeded = [&Worklist, &Inserted](const NodeExtensionHelper &Op) { if (Op.needToPromoteOtherUsers()) { @@ -13473,8 +13399,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, for (NodeExtensionHelper::CombineToTry FoldingStrategy : FoldingStrategies) { - std::optional<CombineResult> Res = - FoldingStrategy(N, LHS, RHS, DAG, Subtarget); + std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); if (Res) { Matched = true; CombinesToApply.push_back(*Res); @@ -13503,7 +13428,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { - SDValue NewValue = Res.materialize(DAG, Subtarget); + SDValue NewValue = Res.materialize(DAG); if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); @@ -14775,20 +14700,13 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - - assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); - - if (N->getValueType(0).isFixedLengthVector()) - return SDValue(); - + assert(N->getOpcode() == RISCVISD::ADD_VL); SDValue Addend = N->getOperand(0); SDValue MulOp = N->getOperand(1); + SDValue AddMergeOp = N->getOperand(2); - if (N->getOpcode() == RISCVISD::ADD_VL) { - SDValue AddMergeOp = N->getOperand(2); - if (!AddMergeOp.isUndef()) - return SDValue(); - } + if (!AddMergeOp.isUndef()) + return SDValue(); auto IsVWMulOpc = [](unsigned Opc) { switch (Opc) { @@ -14812,16 +14730,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, if (!MulMergeOp.isUndef()) return SDValue(); - auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (N->getOpcode() == ISD::ADD) { - SDLoc DL(N); - return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, - Subtarget); - } - return std::make_pair(N->getOperand(3), N->getOperand(4)); - }(N, DAG, Subtarget); - + SDValue AddMask = N->getOperand(3); + SDValue AddVL = N->getOperand(4); SDValue MulMask = MulOp.getOperand(3); SDValue MulVL = MulOp.getOperand(4); @@ -15087,18 +14997,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, VT, NewFMV, DAG.getConstant(~SignBit, DL, VT)); } - case ISD::ADD: { - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; - if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) - return V; + case ISD::ADD: return performADDCombine(N, DAG, Subtarget); - } - case ISD::SUB: { - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; + case ISD::SUB: return performSUBCombine(N, DAG, Subtarget); - } case ISD::AND: return performANDCombine(N, DCI, Subtarget); case ISD::OR: @@ -15106,8 +15008,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return performXORCombine(N, DAG, Subtarget); case ISD::MUL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: @@ -15584,7 +15484,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::ADD_VL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) return V; return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::SUB_VL: @@ -15593,7 +15493,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: case RISCVISD::MUL_VL: - return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); + return combineBinOp_VLToVWBinOp_VL(N, DCI); case RISCVISD::VFMADD_VL: case RISCVISD::VFNMADD_VL: case RISCVISD::VFMSUB_VL: diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index faaf7f0..061bc96 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -289,8 +289,9 @@ static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType, return ConvReg; } -bool SPIRVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool SPIRVLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { auto Opc = MI.getOpcode(); MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); if (!isTypeFoldingSupported(Opc)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h index 2541ff2..f18b15b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h @@ -29,7 +29,8 @@ class SPIRVLegalizerInfo : public LegalizerInfo { SPIRVGlobalRegistry *GR; public: - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; SPIRVLegalizerInfo(const SPIRVSubtarget &ST); }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1e4b136..cd56529 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40221,6 +40221,34 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } return SDValue(); } + case X86ISD::SHUF128: { + // If we're permuting the upper 256-bits subvectors of a concatenation, then + // see if we can peek through and access the subvector directly. + if (VT.is512BitVector()) { + // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the + // upper subvector is used. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + uint64_t Mask = N->getConstantOperandVal(2); + SmallVector<SDValue> LHSOps, RHSOps; + SDValue NewLHS, NewRHS; + if ((Mask & 0x0A) == 0x0A && + collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) { + NewLHS = widenSubVector(LHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0x0A; + } + if ((Mask & 0xA0) == 0xA0 && + collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) { + NewRHS = widenSubVector(RHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0xA0; + } + if (NewLHS || NewRHS) + return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS, + NewRHS ? NewRHS : RHS, + DAG.getTargetConstant(Mask, DL, MVT::i8)); + } + return SDValue(); + } case X86ISD::VPERM2X128: { // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)). SDValue LHS = N->getOperand(0); @@ -54572,6 +54600,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT()) return Op0.getOperand(0); } + + // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x)) + if (Op0.getOpcode() == X86ISD::VPERMI && Subtarget.useAVX512Regs() && + !X86::mayFoldLoad(Op0.getOperand(0), Subtarget)) + return DAG.getNode(Op0.getOpcode(), DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, + Op0.getOperand(0), Op0.getOperand(0)), + Op0.getOperand(1)); } // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128. @@ -54979,6 +55015,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); } break; + case X86ISD::BLENDI: + if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) { + uint64_t Mask0 = Ops[0].getConstantOperandVal(2); + uint64_t Mask1 = Ops[1].getConstantOperandVal(2); + uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0; + MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements()); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Sel = + DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT)); + return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1), + ConcatSubOperand(VT, Ops, 0)); + } + break; case ISD::VSELECT: if (!IsSplat && Subtarget.hasAVX512() && (VT.is256BitVector() || diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 6b0c1b8..08f5a88 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -350,212 +350,212 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + def 8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def 16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; + def 16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; + def 64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; + def 8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; + def 16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; + def 64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; + def 8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } } - def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; - def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; + def 16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; + def 64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; + def 8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + def 8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def 16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; + def 16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; + def 64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; + def 8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; - def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; + def 8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; + def 16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; + def 64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; + def 8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; + def 16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; + def 32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; + def 64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; } let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { let Predicates = [NoNDD] in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + def 16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + def 8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def 16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def 32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def 64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; - def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; - def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; - def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; - def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; + def 16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; + def 32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; + def 64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; + def 16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; + def 16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; - def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; - def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; - def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; + def 16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; + def 64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; + def 8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; + def 16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; + def 64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; + def 16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; + def 16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; + def 64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; } } - def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; - def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; - def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; - def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; - def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; + def 8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; + def 16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; + def 32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; + def 64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; + def 16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; + def 64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; + def 8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; + def 8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; - def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; - def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; + def 8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; } let Predicates = [In64BitMode] in { - def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; - def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; + def 16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; + def 8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; + def 16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; + def 64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -571,162 +571,162 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; + def 8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; - def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; + def 16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; + def 32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; + def 64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; } } } // isCommutable let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } - def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; - def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; - def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; + def 8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; + def 16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; + def 32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; + def 64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; } let Predicates = [In64BitMode] in { - def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; - def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; - def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; - def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; + def 8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; + def 16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; + def 32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; + def 64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; } let Predicates = [NoNDD] in { - def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; + def 8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; + def 16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; + def 32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; + def 64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; } } let Predicates = [In64BitMode] in { - def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; - def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; + def 8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; + def 16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; + def 64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; } - def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } let Predicates = [In64BitMode] in { - def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; - def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; + def 16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; - def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; + def 16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; } let Predicates = [In64BitMode] in { - def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; - def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; + def 8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; + def 16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; + def 16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; + def 64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -739,71 +739,71 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; - def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; - def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; } - def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; + def 64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; let mayLoad = 1 in - def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; + def 8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; } - def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 305bd74..772ed2a 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1212,36 +1212,33 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { (implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>; } -multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, - RegisterClass RC, X86MemOperand x86memop, - X86FoldableSchedWrite sched, string Suffix = ""> { -let hasSideEffects = 0 in { - def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched]>; - let mayLoad = 1 in - def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched.Folded]>; -} +multiclass Bls<string m, Format RegMRM, Format MemMRM, X86TypeInfo t, string Suffix = ""> { + let SchedRW = [WriteBLS] in { + def rr#Suffix : UnaryOpR<0xF3, RegMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; + } + + let SchedRW = [WriteBLS.Folded] in + def rm#Suffix : UnaryOpM<0xF3, MemMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; } -let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W; +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32>, VEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64>, VEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32>, VEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64>, VEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32>, VEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64>, VEX; } -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; +let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32, "_EVEX">, EVEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64, "_EVEX">, EVEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32, "_EVEX">, EVEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64, "_EVEX">, EVEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32, "_EVEX">, EVEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX; } let Predicates = [HasBMI] in { @@ -1281,50 +1278,35 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } -multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, SDPatternOperator OpNode, - PatFrag ld_frag, X86FoldableSchedWrite Sched, - string Suffix = ""> { - def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>, - T8, VEX, Sched<[Sched]>; -let mayLoad = 1 in - def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)), - (implicit EFLAGS)]>, T8, VEX, - Sched<[Sched.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - Sched.ReadAfterFold]>; +multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + X86FoldableSchedWrite sched, string Suffix = ""> { + let SchedRW = [sched], Form = MRMSrcReg4VOp3 in + def rr#Suffix : BinOpRR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, T8; + let SchedRW = [sched.Folded, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + sched.ReadAfterFold], Form = MRMSrcMem4VOp3 in + def rm#Suffix : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), + t.RegClass:$src2))]>, T8; } let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR>; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR>, REX_W; + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR>, VEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR>, VEX; } let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI>; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI>, REX_W; -} -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W; -} -let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W; + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI>, VEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI>, VEX; +} +let Predicates = [HasBMI, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR, "_EVEX">, EVEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR, "_EVEX">, EVEX; +} +let Predicates = [HasBMI2, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI, "_EVEX">, EVEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI, "_EVEX">, EVEX; } def CountTrailingOnes : SDNodeXForm<imm, [{ diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 2e08c7b..32941c0 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1524,7 +1524,8 @@ StringRef sys::getHostCPUName() { // Use processor id to detect cpu name. uint32_t processor_id; __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); - switch (processor_id & 0xff00) { + // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. + switch (processor_id & 0xf000) { case 0xc000: // Loongson 64bit, 4-issue return "la464"; // TODO: Others. diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 529f730..89a1ad2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2953,6 +2953,9 @@ void coro::salvageDebugInfo( std::optional<BasicBlock::iterator> InsertPt; if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. if (!OptimizeFrame && I->getDebugLoc()) DVI.setDebugLoc(I->getDebugLoc()); } else if (isa<Argument>(Storage)) @@ -2988,9 +2991,14 @@ void coro::salvageDebugInfo( // dbg.declare does. if (DPV.getType() == DPValue::LocationType::Declare) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. + if (!OptimizeFrame && I->getDebugLoc()) + DPV.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) { DPV.removeFromParent(); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 49ac1e9..cc93c86 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -366,6 +366,13 @@ struct Decomposition { append_range(Vars, Other.Vars); } + void sub(const Decomposition &Other) { + Decomposition Tmp = Other; + Tmp.mul(-1); + add(Tmp.Offset); + append_range(Vars, Tmp.Vars); + } + void mul(int64_t Factor) { Offset = multiplyWithOverflow(Offset, Factor); for (auto &Var : Vars) @@ -569,10 +576,12 @@ static Decomposition decompose(Value *V, return Result; } - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) - return {-1 * CI->getSExtValue(), {{1, Op0}}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {0, {{1, Op0}, {-1, Op1}}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) { + auto ResA = decompose(Op0, Preconditions, IsSigned, DL); + auto ResB = decompose(Op1, Preconditions, IsSigned, DL); + ResA.sub(ResB); + return ResA; + } return {V, IsKnownNonNegative}; } @@ -635,7 +644,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, // First try to look up \p V in Value2Index and NewVariables. Otherwise add a // new entry to NewVariables. - DenseMap<Value *, unsigned> NewIndexMap; + SmallDenseMap<Value *, unsigned> NewIndexMap; auto GetOrAddIndex = [&Value2Index, &NewVariables, &NewIndexMap](Value *V) -> unsigned { auto V2I = Value2Index.find(V); @@ -659,7 +668,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, IsSigned, IsEq, IsNe); // Collect variables that are known to be positive in all uses in the // constraint. - DenseMap<Value *, bool> KnownNonNegativeVariables; + SmallDenseMap<Value *, bool> KnownNonNegativeVariables; auto &R = Res.Coefficients; for (const auto &KV : VariablesA) { R[GetOrAddIndex(KV.Variable)] += KV.Coefficient; diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 0990c75..ea31356 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -33,37 +33,37 @@ STATISTIC(NumVFDeclAdded, STATISTIC(NumCompUsedAdded, "Number of `@llvm.compiler.used` operands that have been added."); -/// A helper function that adds the vector function declaration that -/// vectorizes the CallInst CI with a vectorization factor of VF -/// lanes. The TLI assumes that all parameters and the return type of -/// CI (other than void) need to be widened to a VectorType of VF -/// lanes. +/// A helper function that adds the vector variant declaration for vectorizing +/// the CallInst \p CI with a vectorization factor of \p VF lanes. For each +/// mapping, TLI provides a VABI prefix, which contains all information required +/// to create vector function declaration. static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, - bool Predicate, const StringRef VFName) { + const VecDesc *VD) { Module *M = CI.getModule(); + FunctionType *ScalarFTy = CI.getFunctionType(); - // Add function declaration. - Type *RetTy = ToVectorTy(CI.getType(), VF); - SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); - assert(!CI.getFunctionType()->isVarArg() && - "VarArg functions are not supported."); - if (Predicate) - Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF)); - FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false); - Function *VectorF = - Function::Create(FTy, Function::ExternalLinkage, VFName, M); - VectorF->copyAttributesFrom(CI.getCalledFunction()); + assert(!ScalarFTy->isVarArg() && "VarArg functions are not supported."); + + const std::optional<VFInfo> Info = VFABI::tryDemangleForVFABI( + VD->getVectorFunctionABIVariantString(), ScalarFTy); + + assert(Info && "Failed to demangle vector variant"); + assert(Info->Shape.VF == VF && "Mangled name does not match VF"); + + const StringRef VFName = VD->getVectorFnName(); + FunctionType *VectorFTy = VFABI::createFunctionType(*Info, ScalarFTy); + Function *VecFunc = + Function::Create(VectorFTy, Function::ExternalLinkage, VFName, M); + VecFunc->copyAttributesFrom(CI.getCalledFunction()); ++NumVFDeclAdded; LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName - << "` of type " << *(VectorF->getType()) << "\n"); + << "` of type " << *VectorFTy << "\n"); // Make function declaration (without a body) "sticky" in the IR by // listing it in the @llvm.compiler.used intrinsic. - assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` " + assert(!VecFunc->size() && "VFABI attribute requires `@llvm.compiler.used` " "only on declarations."); - appendToCompilerUsed(*M, {VectorF}); + appendToCompilerUsed(*M, {VecFunc}); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName << "` to `@llvm.compiler.used`.\n"); ++NumCompUsedAdded; @@ -100,7 +100,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { } Function *VariantF = M->getFunction(VD->getVectorFnName()); if (!VariantF) - addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName()); + addVariantDeclaration(CI, VF, VD); } }; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 55e3756..61d891d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { } static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, + bool RemoveOrigDefaultBlock = true) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); auto *BB = Switch->getParent(); auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); + if (RemoveOrigDefaultBlock) + OrigDefaultBlock->removePredecessor(BB); BasicBlock *NewDefaultBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), OrigDefaultBlock); @@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, if (DTU) { SmallVector<DominatorTree::UpdateType, 2> Updates; Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) + if (RemoveOrigDefaultBlock && + !is_contained(successors(BB), OrigDefaultBlock)) Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); DTU->applyUpdates(Updates); } @@ -5609,10 +5612,28 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, Known.getBitWidth() - (Known.Zero | Known.One).popcount(); assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && - SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI, DTU); - return true; + NumUnknownBits < 64 /* avoid overflow */) { + uint64_t AllNumCases = 1ULL << NumUnknownBits; + if (SI->getNumCases() == AllNumCases) { + createUnreachableSwitchDefault(SI, DTU); + return true; + } + // When only one case value is missing, replace default with that case. + // Eliminating the default branch will provide more opportunities for + // optimization, such as lookup tables. + if (SI->getNumCases() == AllNumCases - 1) { + assert(NumUnknownBits > 1 && "Should be canonicalized to a branch"); + uint64_t MissingCaseVal = 0; + for (const auto &Case : SI->cases()) + MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue(); + auto *MissingCase = + cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal)); + SwitchInstProfUpdateWrapper SIW(*SI); + SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0)); + createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false); + SIW.setSuccessorWeight(0, 0); + return true; + } } if (DeadCases.empty()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 577ce80..150ab30 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -167,9 +167,14 @@ public: } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL, const Twine &Name = "") { - return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, - Name); + DebugLoc DL, const Twine &Name = "", + std::optional<FastMathFlags> FMFs = std::nullopt) { + auto *Select = + FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + *FMFs, DL, Name) + : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + DL, Name); + return tryInsertInstruction(Select); } /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8e135d8..f5f0461 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9141,7 +9141,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); + auto *NewExitingVPV = PhiR->getBackedgeValue(); // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9151,21 +9151,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( VPValue *Red = PhiR->getBackedgeValue(); assert(Red->getDefiningRecipe()->getParent() != LatchVPBB && "reduction recipe must be defined before latch"); - FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - Result = + std::optional<FastMathFlags> FMFs = PhiTy->isFloatingPointTy() - ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) - : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Result->insertBefore(&*Builder.getInsertPoint()); - Red->replaceUsesWithIf( - Result->getVPSingleValue(), - [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); + ? std::make_optional(RdxDesc.getFastMathFlags()) + : std::nullopt; + NewExitingVPV = Builder.createSelect(Cond, Red, PhiR, {}, "", FMFs); + Red->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) { + return isa<VPLiveOut>(&U); + }); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Result->getVPSingleValue()); + PhiR->setOperand(1, NewExitingVPV); } // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the @@ -9174,17 +9173,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, - Result->getVPSingleValue(), RdxTy); + auto *Trunc = + new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); auto *Extnd = RdxDesc.isSigned() ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); - Trunc->insertAfter(Result); + Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); Extnd->insertAfter(Trunc); - Result->getVPSingleValue()->replaceAllUsesWith(Extnd); - Trunc->setOperand(0, Result->getVPSingleValue()); + NewExitingVPV->replaceAllUsesWith(Extnd); + Trunc->setOperand(0, NewExitingVPV); } } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 30499152..bd89ec0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11139,6 +11139,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { case Instruction::ExtractElement: { Value *V = E->getSingleOperand(0); + if (const TreeEntry *TE = getTreeEntry(V)) + V = TE->VectorizedValue; setInsertPointAfterBundle(E); V = FinalShuffle(V, E, VecTy, IsSigned); E->VectorizedValue = V; |