aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorAndrés Villegas <andresvi@google.com>2024-01-03 23:05:20 +0000
committerAndrés Villegas <andresvi@google.com>2024-01-03 23:05:20 +0000
commite99fdd060baf7ea196f9b9e531b58e5d8489f5fd (patch)
tree56305609013119524612245bd222dac094de4f68 /llvm/lib
parentd242f164d69ec606db9418c02c9588bffa429928 (diff)
parent51113244836be55b3d2f181c0f88043b5967eb61 (diff)
downloadllvm-e99fdd060baf7ea196f9b9e531b58e5d8489f5fd.zip
llvm-e99fdd060baf7ea196f9b9e531b58e5d8489f5fd.tar.gz
llvm-e99fdd060baf7ea196f9b9e531b58e5d8489f5fd.tar.bz2
Created using spr 1.3.5
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/AssumptionCache.cpp13
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp39
-rw-r--r--llvm/lib/Analysis/ConstraintSystem.cpp4
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp12
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp114
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp51
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp23
-rw-r--r--llvm/lib/Demangle/Demangle.cpp5
-rw-r--r--llvm/lib/Demangle/ItaniumDemangle.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp23
-rw-r--r--llvm/lib/IR/DebugInfo.cpp4
-rw-r--r--llvm/lib/LTO/LTO.cpp36
-rw-r--r--llvm/lib/MC/MCExpr.cpp6
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td1
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp18
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h3
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/DSDIRInstructions.td191
-rw-r--r--llvm/lib/Target/AMDGPU/LDSDIRInstructions.td116
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td25
-rw-r--r--llvm/lib/Target/ARM/ARMLegalizerInfo.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMLegalizerInfo.h3
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.h3
-rw-r--r--llvm/lib/Target/DirectX/DXILResourceAnalysis.h1
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.h3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.h3
-rw-r--r--llvm/lib/Target/Mips/MipsLegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/Mips/MipsLegalizerInfo.h3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.h3
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp222
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp49
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td576
-rw-r--r--llvm/lib/Target/X86/X86InstrMisc.td112
-rw-r--r--llvm/lib/TargetParser/Host.cpp3
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp21
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h11
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp29
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2
69 files changed, 1099 insertions, 902 deletions
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index fb3a6f8..1b7277d 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -77,9 +78,15 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
};
for (unsigned Idx = 0; Idx != CI->getNumOperandBundles(); Idx++) {
- if (CI->getOperandBundleAt(Idx).Inputs.size() > ABA_WasOn &&
- CI->getOperandBundleAt(Idx).getTagName() != IgnoreBundleTag)
- AddAffected(CI->getOperandBundleAt(Idx).Inputs[ABA_WasOn], Idx);
+ OperandBundleUse Bundle = CI->getOperandBundleAt(Idx);
+ if (Bundle.getTagName() == "separate_storage") {
+ assert(Bundle.Inputs.size() == 2 &&
+ "separate_storage must have two args");
+ AddAffected(getUnderlyingObject(Bundle.Inputs[0]), Idx);
+ AddAffected(getUnderlyingObject(Bundle.Inputs[1]), Idx);
+ } else if (Bundle.Inputs.size() > ABA_WasOn &&
+ Bundle.getTagName() != IgnoreBundleTag)
+ AddAffected(Bundle.Inputs[ABA_WasOn], Idx);
}
Value *Cond = CI->getArgOperand(0), *A, *B;
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3de1473..97f60d2 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -69,7 +69,7 @@ static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
cl::init(true));
static cl::opt<bool> EnableSeparateStorageAnalysis("basic-aa-separate-storage",
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
/// SearchLimitReached / SearchTimes shows how often the limit of
/// to decompose GEPs is reached. It will affect the precision
@@ -1544,28 +1544,25 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
return AliasResult::NoAlias;
if (CtxI && EnableSeparateStorageAnalysis) {
- for (auto &AssumeVH : AC.assumptions()) {
- if (!AssumeVH)
+ for (AssumptionCache::ResultElem &Elem : AC.assumptionsFor(O1)) {
+ if (!Elem || Elem.Index == AssumptionCache::ExprResultIdx)
continue;
- AssumeInst *Assume = cast<AssumeInst>(AssumeVH);
-
- for (unsigned Idx = 0; Idx < Assume->getNumOperandBundles(); Idx++) {
- OperandBundleUse OBU = Assume->getOperandBundleAt(Idx);
- if (OBU.getTagName() == "separate_storage") {
- assert(OBU.Inputs.size() == 2);
- const Value *Hint1 = OBU.Inputs[0].get();
- const Value *Hint2 = OBU.Inputs[1].get();
- // This is often a no-op; instcombine rewrites this for us. No-op
- // getUnderlyingObject calls are fast, though.
- const Value *HintO1 = getUnderlyingObject(Hint1);
- const Value *HintO2 = getUnderlyingObject(Hint2);
-
- if (((O1 == HintO1 && O2 == HintO2) ||
- (O1 == HintO2 && O2 == HintO1)) &&
- isValidAssumeForContext(Assume, CtxI, DT))
- return AliasResult::NoAlias;
- }
+ AssumeInst *Assume = cast<AssumeInst>(Elem);
+ OperandBundleUse OBU = Assume->getOperandBundleAt(Elem.Index);
+ if (OBU.getTagName() == "separate_storage") {
+ assert(OBU.Inputs.size() == 2);
+ const Value *Hint1 = OBU.Inputs[0].get();
+ const Value *Hint2 = OBU.Inputs[1].get();
+ // This is often a no-op; instcombine rewrites this for us. No-op
+ // getUnderlyingObject calls are fast, though.
+ const Value *HintO1 = getUnderlyingObject(Hint1);
+ const Value *HintO2 = getUnderlyingObject(Hint2);
+
+ if (((O1 == HintO1 && O2 == HintO2) ||
+ (O1 == HintO2 && O2 == HintO1)) &&
+ isValidAssumeForContext(Assume, CtxI, DT))
+ return AliasResult::NoAlias;
}
}
}
diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp
index 35bdd86..1a9c7c21 100644
--- a/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -95,14 +95,14 @@ bool ConstraintSystem::eliminateUsingFM() {
IdxUpper++;
}
- if (MulOverflow(UpperV, ((-1) * LowerLast), M1))
+ if (MulOverflow(UpperV, -1 * LowerLast, M1))
return false;
if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) {
LowerV = LowerRow[IdxLower].Coefficient;
IdxLower++;
}
- if (MulOverflow(LowerV, (UpperLast), M2))
+ if (MulOverflow(LowerV, UpperLast, M2))
return false;
if (AddOverflow(M1, M2, N))
return false;
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 78a8334..241bdd8 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2204,6 +2204,13 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y))))
return Constant::getNullValue(Op0->getType());
+ const APInt *C1;
+ Value *A;
+ // (A ^ C) & (A ^ ~C) -> 0
+ if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) &&
+ match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1))))
+ return Constant::getNullValue(Op0->getType());
+
if (Op0->getType()->isIntOrIntVectorTy(1)) {
if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL)) {
// If Op0 is true implies Op1 is true, then Op0 is a subset of Op1.
@@ -2473,6 +2480,11 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
+ // (A ^ C) | (A ^ ~C) -> -1, i.e. all bits set to one.
+ if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) &&
+ match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1))))
+ return Constant::getAllOnesValue(Op0->getType());
+
if (Op0->getType()->isIntOrIntVectorTy(1)) {
if (std::optional<bool> Implied =
isImpliedCondition(Op0, Op1, Q.DL, false)) {
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 87ddfe3..59c96a3 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -969,7 +969,9 @@ LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses LoopPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
- AM.getResult<LoopAnalysis>(F).print(OS);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ OS << "Loop info for function '" << F.getName() << "':\n";
+ LI.print(OS);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 37e7153..7522353 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -149,7 +149,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
- return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
+ return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
+ : UnableToLegalize;
default:
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
@@ -567,7 +568,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(MachineInstr &MI,
+static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
+ MachineInstr &MI,
const TargetInstrInfo &TII,
MachineRegisterInfo &MRI) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -596,17 +598,12 @@ static bool isLibCallInTailPosition(MachineInstr &MI,
// RET_ReallyLR implicit $x0
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
if (Next != MBB.instr_end() && Next->isCopy()) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("unsupported opcode");
- case TargetOpcode::G_BZERO:
+ if (MI.getOpcode() == TargetOpcode::G_BZERO)
return false;
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMMOVE:
- case TargetOpcode::G_MEMSET:
- break;
- }
+ // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
+ // mempy/etc routines return the same parameter. For other it will be the
+ // returned value.
Register VReg = MI.getOperand(0).getReg();
if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
return false;
@@ -622,7 +619,7 @@ static bool isLibCallInTailPosition(MachineInstr &MI,
if (Ret->getNumImplicitOperands() != 1)
return false;
- if (PReg != Ret->getOperand(0).getReg())
+ if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
return false;
// Skip over the COPY that we just validated.
@@ -639,34 +636,64 @@ LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args,
- const CallingConv::ID CC) {
+ const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
+ MachineInstr *MI) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
CallLowering::CallLoweringInfo Info;
Info.CallConv = CC;
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = Result;
+ if (MI)
+ Info.IsTailCall =
+ (Result.Ty->isVoidTy() ||
+ Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
+ isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
+ *MIRBuilder.getMRI());
+
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
+ if (MI && Info.LoweredTailCall) {
+ assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
+
+ // Check debug locations before removing the return.
+ LocObserver.checkpoint(true);
+
+ // We must have a return following the call (or debug insts) to get past
+ // isLibCallInTailPosition.
+ do {
+ MachineInstr *Next = MI->getNextNode();
+ assert(Next &&
+ (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
+ "Expected instr following MI to be return or debug inst?");
+ // We lowered a tail call, so the call is now the return from the block.
+ // Delete the old return.
+ Next->eraseFromParent();
+ } while (MI->getNextNode());
+
+ // We expect to lose the debug location from the return.
+ LocObserver.checkpoint(false);
+ }
return LegalizerHelper::Legalized;
}
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
- ArrayRef<CallLowering::ArgInfo> Args) {
+ ArrayRef<CallLowering::ArgInfo> Args,
+ LostDebugLocObserver &LocObserver, MachineInstr *MI) {
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
const char *Name = TLI.getLibcallName(Libcall);
const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
- return createLibcall(MIRBuilder, Name, Result, Args, CC);
+ return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
}
// Useful for libcalls where all operands have the same type.
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
- Type *OpType) {
+ Type *OpType, LostDebugLocObserver &LocObserver) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
// FIXME: What does the original arg index mean here?
@@ -674,7 +701,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
Args.push_back({MO.getReg(), OpType, 0});
return createLibcall(MIRBuilder, Libcall,
- {MI.getOperand(0).getReg(), OpType, 0}, Args);
+ {MI.getOperand(0).getReg(), OpType, 0}, Args,
+ LocObserver, &MI);
}
LegalizerHelper::LegalizeResult
@@ -733,8 +761,9 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
- Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
- isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
+ Info.IsTailCall =
+ MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+ isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
@@ -789,11 +818,11 @@ static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
- Type *FromType) {
+ Type *FromType, LostDebugLocObserver &LocObserver) {
RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
- return createLibcall(MIRBuilder, Libcall,
- {MI.getOperand(0).getReg(), ToType, 0},
- {{MI.getOperand(1).getReg(), FromType, 0}});
+ return createLibcall(
+ MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
+ {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
}
static RTLIB::Libcall
@@ -829,7 +858,8 @@ getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
//
LegalizerHelper::LegalizeResult
LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
- MachineInstr &MI) {
+ MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) {
const DataLayout &DL = MIRBuilder.getDataLayout();
auto &MF = MIRBuilder.getMF();
auto &MRI = *MIRBuilder.getMRI();
@@ -850,7 +880,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
auto Res =
createLibcall(MIRBuilder, RTLibcall,
CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
- CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
+ CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
+ LocObserver, nullptr);
if (Res != LegalizerHelper::Legalized)
return Res;
@@ -867,7 +898,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
// content of memory region.
LegalizerHelper::LegalizeResult
LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
- MachineInstr &MI) {
+ MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) {
const DataLayout &DL = MIRBuilder.getDataLayout();
auto &MF = MIRBuilder.getMF();
auto &MRI = *MIRBuilder.getMRI();
@@ -892,7 +924,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
return createLibcall(MIRBuilder, RTLibcall,
CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
- CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
+ CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
+ LocObserver, nullptr);
}
// The function is used to legalize operations that set default environment
@@ -902,7 +935,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
// it is not true, the target must provide custom lowering.
LegalizerHelper::LegalizeResult
LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
- MachineInstr &MI) {
+ MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) {
const DataLayout &DL = MIRBuilder.getDataLayout();
auto &MF = MIRBuilder.getMF();
auto &Ctx = MF.getFunction().getContext();
@@ -919,7 +953,8 @@ LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
return createLibcall(MIRBuilder, RTLibcall,
CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
- CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0}));
+ CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
+ LocObserver, &MI);
}
LegalizerHelper::LegalizeResult
@@ -938,7 +973,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
Type *HLTy = IntegerType::get(Ctx, Size);
- auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
if (Status != Legalized)
return Status;
break;
@@ -974,7 +1009,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
- auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
if (Status != Legalized)
return Status;
break;
@@ -985,7 +1020,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
if (!FromTy || !ToTy)
return UnableToLegalize;
- LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
+ LegalizeResult Status =
+ conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
if (Status != Legalized)
return Status;
break;
@@ -1000,7 +1036,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
- FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
+ FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
+ LocObserver);
if (Status != Legalized)
return Status;
break;
@@ -1015,7 +1052,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
- FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
+ FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
+ LocObserver);
if (Status != Legalized)
return Status;
break;
@@ -1032,19 +1070,20 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Result;
}
case TargetOpcode::G_GET_FPMODE: {
- LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI);
+ LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
if (Result != Legalized)
return Result;
break;
}
case TargetOpcode::G_SET_FPMODE: {
- LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI);
+ LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
if (Result != Legalized)
return Result;
break;
}
case TargetOpcode::G_RESET_FPMODE: {
- LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI);
+ LegalizeResult Result =
+ createResetStateLibcall(MIRBuilder, MI, LocObserver);
if (Result != Legalized)
return Result;
break;
@@ -2831,6 +2870,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 3fbb937..cbb1a74 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -305,11 +305,7 @@ namespace {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- ///
- /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
- /// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
- bool IsSubregToReg);
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1347,7 +1343,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (DstReg.isPhysical()) {
Register NewDstReg = DstReg;
- unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
+ DefMI->getOperand(0).getSubReg());
if (NewDstIdx)
NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
@@ -1496,7 +1493,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MRI->setRegClass(DstReg, NewRC);
// Update machine operands and add flags.
- updateRegDefsUses(DstReg, DstReg, DstIdx, false);
+ updateRegDefsUses(DstReg, DstReg, DstIdx);
NewMI.getOperand(0).setSubReg(NewIdx);
// updateRegDefUses can add an "undef" flag to the definition, since
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
@@ -1816,7 +1813,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx, bool IsSubregToReg) {
+ unsigned SubIdx) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1856,8 +1853,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
- bool FullDef = true;
-
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(Ops[i]);
@@ -1865,13 +1860,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
// versa.
- if (SubIdx && MO.isDef()) {
+ if (SubIdx && MO.isDef())
MO.setIsUndef(!Reads);
- if (!Reads)
- FullDef = false;
- }
-
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
if (MO.isUse() && !DstIsPhys) {
@@ -1903,25 +1894,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
- if (IsSubregToReg && !FullDef) {
- // If the coalesed instruction doesn't fully define the register, we need
- // to preserve the original super register liveness for SUBREG_TO_REG.
- //
- // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
- // but it introduces liveness for other subregisters. Downstream users may
- // have been relying on those bits, so we need to ensure their liveness is
- // captured with a def of other lanes.
-
- // FIXME: Need to add new subrange if tracking subranges. We could also
- // skip adding this if we knew the other lanes are dead, and only for
- // other lanes.
-
- assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
- "this should update subranges");
- MachineInstrBuilder MIB(*MF, UseMI);
- MIB.addReg(DstReg, RegState::ImplicitDefine);
- }
-
LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugInstr())
@@ -2121,8 +2093,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
});
}
- const bool IsSubregToReg = CopyMI->isSubregToReg();
-
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2190,12 +2160,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx()) {
- assert(!IsSubregToReg && "can this happen?");
- updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
- }
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
- IsSubregToReg);
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eafa95c..464e1be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7987,7 +7987,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// If OR can be rewritten into ADD, try combines based on ADD.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
- DAG.haveNoCommonBitsSet(N0, N1))
+ DAG.isADDLike(SDValue(N, 0)))
if (SDValue Combined = visitADDLike(N))
return Combined;
@@ -10055,7 +10055,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
AddToWorklist(Shl0.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ SDNodeFlags Flags;
+ // Preserve the disjoint flag for Or.
+ if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
+ Flags.setDisjoint(true);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0e17bba..4151964 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5022,7 +5022,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::AND:
- case ISD::OR:
case ISD::XOR:
case ISD::ROTL:
case ISD::ROTR:
@@ -5062,6 +5061,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
Op->getFlags().hasNoUnsignedWrap());
+ // Matches hasPoisonGeneratingFlags().
+ case ISD::OR:
+ return ConsiderFlags && Op->getFlags().hasDisjoint();
+
case ISD::INSERT_VECTOR_ELT:{
// Ensure that the element index is in bounds.
EVT VecVT = Op.getOperand(0).getValueType();
@@ -5085,7 +5088,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
bool SelectionDAG::isADDLike(SDValue Op) const {
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::OR)
- return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1));
+ return Op->getFlags().hasDisjoint() ||
+ haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1));
if (Opcode == ISD::XOR)
return isMinSignedConstant(Op.getOperand(1));
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3c4b285..192f7bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3354,6 +3354,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
}
if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
Flags.setExact(ExactOp->isExact());
+ if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(&I))
+ Flags.setDisjoint(DisjointOp->isDisjoint());
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 78cc600..4ae3000 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -597,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasExact())
OS << " exact";
+ if (getFlags().hasDisjoint())
+ OS << " disjoint";
+
if (getFlags().hasNonNeg())
OS << " nneg";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c597754..66cdd75 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1064,10 +1064,9 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue ResultAVG =
- DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
- DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
- return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
- ResultAVG);
+ DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
+ DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
+ return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
}
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
@@ -1468,14 +1467,24 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::OR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
-
+ SDNodeFlags Flags = Op.getNode()->getFlags();
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
- Depth + 1))
+ Depth + 1)) {
+ if (Flags.hasDisjoint()) {
+ Flags.setDisjoint(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
- Known2, TLO, Depth + 1))
+ Known2, TLO, Depth + 1)) {
+ if (Flags.hasDisjoint()) {
+ Flags.setDisjoint(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp
index 117b849..83f3cdc 100644
--- a/llvm/lib/Demangle/Demangle.cpp
+++ b/llvm/lib/Demangle/Demangle.cpp
@@ -47,8 +47,7 @@ static bool isRustEncoding(std::string_view S) { return starts_with(S, "_R"); }
static bool isDLangEncoding(std::string_view S) { return starts_with(S, "_D"); }
bool llvm::nonMicrosoftDemangle(std::string_view MangledName,
- std::string &Result, bool CanHaveLeadingDot,
- bool ParseParams) {
+ std::string &Result, bool CanHaveLeadingDot) {
char *Demangled = nullptr;
// Do not consider the dot prefix as part of the demangled symbol name.
@@ -58,7 +57,7 @@ bool llvm::nonMicrosoftDemangle(std::string_view MangledName,
}
if (isItaniumEncoding(MangledName))
- Demangled = itaniumDemangle(MangledName, ParseParams);
+ Demangled = itaniumDemangle(MangledName);
else if (isRustEncoding(MangledName))
Demangled = rustDemangle(MangledName);
else if (isDLangEncoding(MangledName))
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index 5c21b06..e3f208f 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -366,13 +366,13 @@ public:
using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>;
-char *llvm::itaniumDemangle(std::string_view MangledName, bool ParseParams) {
+char *llvm::itaniumDemangle(std::string_view MangledName) {
if (MangledName.empty())
return nullptr;
Demangler Parser(MangledName.data(),
MangledName.data() + MangledName.length());
- Node *AST = Parser.parse(ParseParams);
+ Node *AST = Parser.parse();
if (!AST)
return nullptr;
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
index 8eca874..8a4145a 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
@@ -21,31 +21,8 @@
// First version as landed in August 2009
static constexpr uint32_t JitDescriptorVersion = 1;
-// Keep in sync with gdb/gdb/jit.h
extern "C" {
-typedef enum {
- JIT_NOACTION = 0,
- JIT_REGISTER_FN,
- JIT_UNREGISTER_FN
-} jit_actions_t;
-
-struct jit_code_entry {
- struct jit_code_entry *next_entry;
- struct jit_code_entry *prev_entry;
- const char *symfile_addr;
- uint64_t symfile_size;
-};
-
-struct jit_descriptor {
- uint32_t version;
- // This should be jit_actions_t, but we want to be specific about the
- // bit-width.
- uint32_t action_flag;
- struct jit_code_entry *relevant_entry;
- struct jit_code_entry *first_entry;
-};
-
// We put information about the JITed function in this global, which the
// debugger reads. Make sure to specify the version statically, because the
// debugger checks the version before we can set it during runtime.
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index eab05ee..c6dc42e 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -2115,6 +2115,10 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
if (F.hasFnAttribute(Attribute::OptimizeNone))
return /*Changed*/ false;
+ // FIXME: https://github.com/llvm/llvm-project/issues/76545
+ if (F.hasFnAttribute(Attribute::SanitizeHWAddress))
+ return /*Changed*/ false;
+
bool Changed = false;
auto *DL = &F.getParent()->getDataLayout();
// Collect a map of {backing storage : dbg.declares} (currently "backing
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 05836fd..6a1e53b 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -592,7 +592,9 @@ LTO::LTO(Config Conf, ThinBackend Backend,
unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
: Conf(std::move(Conf)),
RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
- ThinLTO(std::move(Backend)), LTOMode(LTOMode) {}
+ ThinLTO(std::move(Backend)),
+ GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()),
+ LTOMode(LTOMode) {}
// Requires a destructor for MapVector<BitcodeModule>.
LTO::~LTO() = default;
@@ -610,7 +612,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
- auto &GlobalRes = GlobalResolutions[Sym.getName()];
+ auto &GlobalRes = (*GlobalResolutions)[Sym.getName()];
GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
if (Res.Prevailing) {
assert(!GlobalRes.Prevailing &&
@@ -1125,7 +1127,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
- for (auto &Res : GlobalResolutions) {
+ for (auto &Res : *GlobalResolutions) {
// Normally resolution have IR name of symbol. We can do nothing here
// otherwise. See comments in GlobalResolution struct for more details.
if (Res.second.IRName.empty())
@@ -1169,6 +1171,8 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
Error Result = runRegularLTO(AddStream);
if (!Result)
+ // This will reset the GlobalResolutions optional once done with it to
+ // reduce peak memory before importing.
Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
if (StatsFile)
@@ -1273,8 +1277,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
// This returns true when the name is local or not defined. Locals are
// expected to be handled separately.
auto IsVisibleToRegularObj = [&](StringRef name) {
- auto It = GlobalResolutions.find(name);
- return (It == GlobalResolutions.end() || It->second.VisibleOutsideSummary);
+ auto It = GlobalResolutions->find(name);
+ return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary);
};
// If allowed, upgrade public vcall visibility metadata to linkage unit
@@ -1291,7 +1295,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
if (!Conf.CodeGenOnly) {
- for (const auto &R : GlobalResolutions) {
+ for (const auto &R : *GlobalResolutions) {
GlobalValue *GV =
RegularLTO.CombinedModule->getNamedValue(R.second.IRName);
if (!R.second.isPrevailingIRSymbol())
@@ -1708,8 +1712,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
// This returns true when the name is local or not defined. Locals are
// expected to be handled separately.
auto IsVisibleToRegularObj = [&](StringRef name) {
- auto It = GlobalResolutions.find(name);
- return (It == GlobalResolutions.end() ||
+ auto It = GlobalResolutions->find(name);
+ return (It == GlobalResolutions->end() ||
It->second.VisibleOutsideSummary);
};
@@ -1739,15 +1743,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing);
}
- if (Conf.OptLevel > 0)
- ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- isPrevailing, ImportLists, ExportLists);
-
// Figure out which symbols need to be internalized. This also needs to happen
// at -O0 because summary-based DCE is implemented using internalization, and
// we must apply DCE consistently with the full LTO module in order to avoid
// undefined references during the final link.
- for (auto &Res : GlobalResolutions) {
+ for (auto &Res : *GlobalResolutions) {
// If the symbol does not have external references or it is not prevailing,
// then not need to mark it as exported from a ThinLTO partition.
if (Res.second.Partition != GlobalResolution::External ||
@@ -1760,6 +1760,16 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
ExportedGUIDs.insert(GUID);
}
+ // Reset the GlobalResolutions to deallocate the associated memory, as there
+ // are no further accesses. We specifically want to do this before computing
+ // cross module importing, which adds to peak memory via the computed import
+ // and export lists.
+ GlobalResolutions.reset();
+
+ if (Conf.OptLevel > 0)
+ ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
+ isPrevailing, ImportLists, ExportLists);
+
// Any functions referenced by the jump table in the regular LTO object must
// be exported.
for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs())
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index a85182a..9dae026 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -704,8 +704,14 @@ static void AttemptToFoldSymbolOffsetDifference(
}
int64_t Num;
+ unsigned Count;
if (DF) {
Displacement += DF->getContents().size();
+ } else if (auto *AF = dyn_cast<MCAlignFragment>(FI);
+ AF && Layout &&
+ !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign(
+ *AF, Count)) {
+ Displacement += Asm->computeFragmentSize(*Layout, *AF);
} else if (auto *FF = dyn_cast<MCFillFragment>(FI);
FF && FF->getNumValues().evaluateAsAbsolute(Num)) {
Displacement += Num * FF->getValueSize();
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f94bd42..439f749 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -452,9 +452,10 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
std::optional<PGOOptions> PGOOpt,
PassInstrumentationCallbacks *PIC)
: TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {
+ bool ShouldPopulateClassToPassNames = PIC && shouldPopulateClassToPassNames();
if (TM)
- TM->registerPassBuilderCallbacks(*this);
- if (PIC && shouldPopulateClassToPassNames()) {
+ TM->registerPassBuilderCallbacks(*this, ShouldPopulateClassToPassNames);
+ if (ShouldPopulateClassToPassNames) {
#define MODULE_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9b8162c..1ea63a5 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1231,15 +1231,6 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
// Only extend the RHS within the instruction if there is a valid extend type.
if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
isValueAvailable(RHS)) {
- if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
- if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
- Register RHSReg = getRegForValue(SI->getOperand(0));
- if (!RHSReg)
- return 0;
- return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
- C->getZExtValue(), SetFlags, WantResult);
- }
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index cb63d87..10ad5b1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12586,6 +12586,7 @@ def : TokenAlias<".4S", ".4s">;
def : TokenAlias<".2D", ".2d">;
def : TokenAlias<".1Q", ".1q">;
def : TokenAlias<".2H", ".2h">;
+def : TokenAlias<".2B", ".2b">;
def : TokenAlias<".B", ".b">;
def : TokenAlias<".H", ".h">;
def : TokenAlias<".S", ".s">;
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 738a52e..380f6e1f 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -810,7 +810,7 @@ defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>;
defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>;
}
-let Predicates = [HasSME2p1, HasB16B16] in {
+let Predicates = [HasSME2, HasB16B16] in {
defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1d0e8be..bb5f1f6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -989,6 +989,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s16, 8)
.lower();
+ // For fmul reductions we need to split up into individual operations. We
+ // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
+ // smaller types, followed by scalarizing what remains.
+ getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
+ .minScalarOrElt(0, MinFPScalar)
+ .clampMaxNumElements(1, s64, 2)
+ .clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8)
+ .clampMaxNumElements(1, s32, 2)
+ .clampMaxNumElements(1, s16, 4)
+ .scalarize(1)
+ .lower();
+
getActionDefinitionsBuilder(G_VECREDUCE_ADD)
.legalFor({{s8, v16s8},
{s8, v8s8},
@@ -1137,8 +1150,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
verify(*ST.getInstrInfo());
}
-bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool AArch64LegalizerInfo::legalizeCustom(
+ LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
GISelChangeObserver &Observer = Helper.Observer;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 19f77ba..e96ec6d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -28,7 +28,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
public:
AArch64LegalizerInfo(const AArch64Subtarget &ST);
- bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const override;
bool legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index b755254..789ec81 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10082,6 +10082,12 @@ multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDP
def : InstAlias<mnemonic # "\t$Pd, $Zn",
(!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>;
+ def : InstAlias<mnemonic # "\t$Pd, $Zn",
+ (!cast<Instruction>(NAME # _H) PPR16:$Pd, ZPRAny:$Zn, 0), 0>;
+ def : InstAlias<mnemonic # "\t$Pd, $Zn",
+ (!cast<Instruction>(NAME # _S) PPR32:$Pd, ZPRAny:$Zn, 0), 0>;
+ def : InstAlias<mnemonic # "\t$Pd, $Zn",
+ (!cast<Instruction>(NAME # _D) PPR64:$Pd, ZPRAny:$Zn, 0), 0>;
// any_lane
def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))),
@@ -10143,6 +10149,12 @@ multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp,
def : InstAlias<mnemonic # "\t$Zd, $Pn",
(!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>;
+ def : InstAlias<mnemonic # "\t$Zd, $Pn",
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, 0, PPR16:$Pn), 0>;
+ def : InstAlias<mnemonic # "\t$Zd, $Pn",
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, 0, PPR32:$Pn), 0>;
+ def : InstAlias<mnemonic # "\t$Zd, $Pn",
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, 0, PPR64:$Pn), 0>;
// Merge
def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 88ef4b5..ad8dcda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2764,7 +2764,9 @@ static bool isConstant(const MachineInstr &MI) {
void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
- const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
+ unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
+ const MachineInstr *PtrMI =
+ MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg());
assert(PtrMI);
@@ -2817,6 +2819,10 @@ bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
+ if (MI.getOpcode() == AMDGPU::G_PREFETCH)
+ return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
+ AMDGPU::SGPRRegBankID;
+
const Instruction *I = dyn_cast<Instruction>(Ptr);
return I && I->getMetadata("amdgpu.uniform");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fbee288..dfbe5c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1996,8 +1996,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
verify(*ST.getInstrInfo());
}
-bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool AMDGPULegalizerInfo::legalizeCustom(
+ LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 855fa0d..1fa0648 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -35,7 +35,8 @@ public:
AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM);
- bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const override;
Register getSegmentAperture(unsigned AddrSpace,
MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index fba0604..92182ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3263,17 +3263,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MI.eraseFromParent();
return;
}
- unsigned PtrBank =
- getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID);
+ Register PtrReg = MI.getOperand(0).getReg();
+ unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);
if (PtrBank == AMDGPU::VGPRRegBankID) {
MI.eraseFromParent();
return;
}
- // FIXME: There is currently no support for prefetch in global isel.
- // There is no node equivalence and what's worse there is no MMO produced
- // for a prefetch on global isel path.
- // Prefetch does not affect execution so erase it for now.
- MI.eraseFromParent();
+ unsigned AS = MRI.getType(PtrReg).getAddressSpace();
+ if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
+ AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ MI.eraseFromParent();
+ return;
+ }
+ applyDefaultMapping(OpdMapper);
return;
}
default:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fdc2077..0f3bb3e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -620,7 +620,8 @@ void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
AAM.registerFunctionAnalysis<AMDGPUAA>();
}
-void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+void AMDGPUTargetMachine::registerPassBuilderCallbacks(
+ PassBuilder &PB, bool PopulateClassToPassNames) {
PB.registerPipelineParsingCallback(
[this](StringRef PassName, ModulePassManager &PM,
ArrayRef<PassBuilder::PipelineElement>) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 9051a61..99c9db3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -51,7 +51,8 @@ public:
return TLOF.get();
}
- void registerPassBuilderCallbacks(PassBuilder &PB) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool PopulateClassToPassNames) override;
void registerDefaultAliasAnalyses(AAManager &) override;
/// Get the integer value of a null pointer in the given address space.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index abd7e91..5f2b7c0 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -166,6 +166,8 @@ public:
ImmTyEndpgm,
ImmTyWaitVDST,
ImmTyWaitEXP,
+ ImmTyWaitVAVDst,
+ ImmTyWaitVMVSrc,
};
// Immediate operand kind.
@@ -909,6 +911,8 @@ public:
bool isEndpgm() const;
bool isWaitVDST() const;
bool isWaitEXP() const;
+ bool isWaitVAVDst() const;
+ bool isWaitVMVSrc() const;
auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
return std::bind(P, *this);
@@ -1029,6 +1033,7 @@ public:
}
static void printImmTy(raw_ostream& OS, ImmTy Type) {
+ // clang-format off
switch (Type) {
case ImmTyNone: OS << "None"; break;
case ImmTyGDS: OS << "GDS"; break;
@@ -1086,7 +1091,10 @@ public:
case ImmTyEndpgm: OS << "Endpgm"; break;
case ImmTyWaitVDST: OS << "WaitVDST"; break;
case ImmTyWaitEXP: OS << "WaitEXP"; break;
+ case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
+ case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
}
+ // clang-format on
}
void print(raw_ostream &OS) const override {
@@ -9192,6 +9200,14 @@ bool AMDGPUOperand::isWaitVDST() const {
return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
}
+bool AMDGPUOperand::isWaitVAVDst() const {
+ return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
+}
+
+bool AMDGPUOperand::isWaitVMVSrc() const {
+ return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
+}
+
//===----------------------------------------------------------------------===//
// VINTERP
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td
new file mode 100644
index 0000000..54ef785
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td
@@ -0,0 +1,191 @@
+//===-- DSDIRInstructions.td - LDS/VDS Direct Instruction Definitions -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+)
+//===----------------------------------------------------------------------===//
+
+class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+ // encoding fields
+ bits<2> attrchan;
+ bits<6> attr;
+ bits<4> waitvdst;
+ bits<8> vdst;
+
+ // encoding
+ let Inst{31-24} = 0xce; // encoding
+ let Inst{23-22} = 0x0; // reserved
+ let Inst{21-20} = op;
+ let Inst{19-16} = waitvdst;
+ let Inst{15-10} = !if(is_direct, ?, attr);
+ let Inst{9-8} = !if(is_direct, ?, attrchan);
+ let Inst{7-0} = vdst;
+}
+
+class VDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+ // encoding fields
+ bits<2> attrchan;
+ bits<6> attr;
+ bits<4> waitvdst;
+ bits<8> vdst;
+ bits<1> waitvsrc;
+
+ // encoding
+ let Inst{31-24} = 0xce; // encoding
+ let Inst{23} = waitvsrc;
+ let Inst{22} = 0x0; // reserved
+ let Inst{21-20} = op;
+ let Inst{19-16} = waitvdst;
+ let Inst{15-10} = !if(is_direct, ?, attr);
+ let Inst{9-8} = !if(is_direct, ?, attrchan);
+ let Inst{7-0} = vdst;
+}
+
+//===----------------------------------------------------------------------===//
+// LDSDIR/VDSDIR Classes
+//===----------------------------------------------------------------------===//
+
+class LDSDIR_getIns<bit direct> {
+ dag ret = !if(direct,
+ (ins wait_vdst:$waitvdst),
+ (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst)
+ );
+}
+
+class VDSDIR_getIns<bit direct> {
+ dag ret = !if(direct,
+ (ins wait_va_vdst:$waitvdst, wait_va_vsrc:$waitvsrc),
+ (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_va_vdst:$waitvdst,
+ wait_va_vsrc:$waitvsrc)
+ );
+}
+
+class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> :
+ InstSI<(outs VGPR_32:$vdst), ins, asm> {
+ let LDSDIR = 1;
+ let EXP_CNT = 1;
+
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+
+ string Mnemonic = opName;
+ let UseNamedOperandTable = 1;
+
+ let Uses = [M0, EXEC];
+ let DisableWQM = 0;
+ let SchedRW = [WriteLDS];
+
+ bit is_direct;
+ let is_direct = direct;
+}
+
+class DSDIR_Pseudo<string opName, dag ins, bit direct> :
+ DSDIR_Common<opName, "", ins, direct>,
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+class LDSDIR_getAsm<bit direct> {
+ string ret = !if(direct,
+ " $vdst$waitvdst",
+ " $vdst, $attr$attrchan$waitvdst"
+ );
+}
+
+class VDSDIR_getAsm<bit direct> {
+ string ret = !if(direct,
+ " $vdst$waitvdst$waitvsrc",
+ " $vdst, $attr$attrchan$waitvdst$waitvsrc"
+ );
+}
+
+class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> :
+ DSDIR_Common<lds.Mnemonic,
+ lds.Mnemonic # asm,
+ ins,
+ lds.is_direct>,
+ SIMCInstr <lds.Mnemonic, subtarget> {
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LDS/VDS Direct Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGFX11Only in {
+
+def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>;
+def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_direct_load M0)),
+ (LDS_DIRECT_LOAD 0)
+>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+ (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
+>;
+
+} // End SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX12Plus in {
+
+def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>;
+def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_direct_load M0)),
+ (DS_DIRECT_LOAD 0, 1)
+>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+ (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1)
+>;
+
+} // End SubtargetPredicate = isGFX12Only
+
+//===----------------------------------------------------------------------===//
+// GFX11
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx11<bits<2> op,
+ DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+ def _gfx11 : DSDIR_Real<lds, lds.InOperandList,
+ LDSDIR_getAsm<lds.is_direct>.ret,
+ SIEncodingFamily.GFX11>,
+ LDSDIRe<op, lds.is_direct> {
+ let AssemblerPredicate = isGFX11Only;
+ let DecoderNamespace = "GFX11";
+ }
+}
+
+defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>;
+
+//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx12<bits<2> op,
+ DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+ def _gfx12 : DSDIR_Real<lds, lds.InOperandList,
+ VDSDIR_getAsm<lds.is_direct>.ret,
+ SIEncodingFamily.GFX12>,
+ VDSDIRe<op, lds.is_direct> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+ }
+}
+
+defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>;
+defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
deleted file mode 100644
index 4956a15..0000000
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ /dev/null
@@ -1,116 +0,0 @@
-//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// LDSDIR encoding
-//===----------------------------------------------------------------------===//
-
-class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
- // encoding fields
- bits<2> attrchan;
- bits<6> attr;
- bits<4> waitvdst;
- bits<8> vdst;
-
- // encoding
- let Inst{31-24} = 0xce; // encoding
- let Inst{23-22} = 0x0; // reserved
- let Inst{21-20} = op;
- let Inst{19-16} = waitvdst;
- let Inst{15-10} = !if(is_direct, ?, attr);
- let Inst{9-8} = !if(is_direct, ?, attrchan);
- let Inst{7-0} = vdst;
-}
-
-//===----------------------------------------------------------------------===//
-// LDSDIR Classes
-//===----------------------------------------------------------------------===//
-
-class LDSDIR_getIns<bit direct> {
- dag ret = !if(direct,
- (ins wait_vdst:$waitvdst),
- (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst)
- );
-}
-
-class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
- (outs VGPR_32:$vdst),
- LDSDIR_getIns<direct>.ret,
- asm> {
- let LDSDIR = 1;
- let EXP_CNT = 1;
-
- let hasSideEffects = 0;
- let mayLoad = 1;
- let mayStore = 0;
-
- string Mnemonic = opName;
- let UseNamedOperandTable = 1;
-
- let Uses = [M0, EXEC];
- let DisableWQM = 0;
- let SchedRW = [WriteLDS];
-
- bit is_direct;
- let is_direct = direct;
-}
-
-class LDSDIR_Pseudo<string opName, bit direct> :
- LDSDIR_Common<opName, "", direct>,
- SIMCInstr<opName, SIEncodingFamily.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class LDSDIR_getAsm<bit direct> {
- string ret = !if(direct,
- " $vdst$waitvdst",
- " $vdst, $attr$attrchan$waitvdst"
- );
-}
-
-class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
- LDSDIR_Common<lds.Mnemonic,
- lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
- lds.is_direct>,
- SIMCInstr <lds.Mnemonic, subtarget>,
- LDSDIRe<op, lds.is_direct> {
- let isPseudo = 0;
- let isCodeGenOnly = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// LDS Direct Instructions
-//===----------------------------------------------------------------------===//
-
-def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
-def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
-
-def : GCNPat <
- (f32 (int_amdgcn_lds_direct_load M0)),
- (LDS_DIRECT_LOAD 0)
->;
-
-def : GCNPat <
- (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
- (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
->;
-
-//===----------------------------------------------------------------------===//
-// GFX11+
-//===----------------------------------------------------------------------===//
-
-multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
- def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
- let AssemblerPredicate = isGFX11Plus;
- let DecoderNamespace = "GFX11";
- }
-}
-
-defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
-defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index edc244d..ef1b85f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -639,6 +639,20 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
printU4ImmDecOperand(MI, OpNo, O);
}
+void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << " wait_va_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << " wait_vm_vsrc:";
+ printU4ImmDecOperand(MI, OpNo, O);
+}
+
void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 95c26de..f2f985f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -161,6 +161,10 @@ private:
raw_ostream &O);
void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, unsigned N);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 173c877..50724fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1144,6 +1144,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
+def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">;
+def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">;
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8310c6b..0f12727 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -30,7 +30,7 @@ include "SMInstructions.td"
include "FLATInstructions.td"
include "BUFInstructions.td"
include "EXPInstructions.td"
-include "LDSDIRInstructions.td"
+include "DSDIRInstructions.td"
include "VINTERPInstructions.td"
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 59d6ccf..5e6c349 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
}
continue;
} else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
- Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+ Opcode == AMDGPU::DS_PARAM_LOAD ||
+ Opcode == AMDGPU::LDS_DIRECT_LOAD ||
+ Opcode == AMDGPU::DS_DIRECT_LOAD) {
// Mark these STRICTWQM, but only for the instruction, not its operands.
// This avoid unnecessarily marking M0 as requiring WQM.
InstrInfo &II = Instructions[&MI];
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 3297847..be21cf0 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -977,20 +977,35 @@ def : GCNPat <
}
} // let OtherPredicates = [HasShaderCyclesRegister]
-multiclass SMPrefetchPat<string type, int cache_type> {
+def i32imm_zero : TImmLeaf <i32, [{
+ return Imm == 0;
+}]>;
+
+def i32imm_one : TImmLeaf <i32, [{
+ return Imm == 1;
+}]>;
+
+multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
def : GCNPat <
- (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)),
+ (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type),
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
>;
def : GCNPat <
- (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)),
+ (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type),
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0))
>;
+
+ def : GCNPat <
+ (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type),
+ (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type)
+ (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)),
+ 0, (i32 SGPR_NULL), (i8 0))
+ >;
}
-defm : SMPrefetchPat<"INST", 0>;
-defm : SMPrefetchPat<"DATA", 1>;
+defm : SMPrefetchPat<"INST", i32imm_zero>;
+defm : SMPrefetchPat<"DATA", i32imm_one>;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 3ffde86..abea0fe 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -362,8 +362,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
llvm_unreachable("Unsupported size for FCmp predicate");
}
-bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const {
using namespace TargetOpcode;
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
@@ -392,7 +392,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
OriginalResult};
auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy, 0},
{{MI.getOperand(1).getReg(), ArgTy, 0},
- {MI.getOperand(2).getReg(), ArgTy, 0}});
+ {MI.getOperand(2).getReg(), ArgTy, 0}},
+ LocObserver, &MI);
if (Status != LegalizerHelper::Legalized)
return false;
break;
@@ -428,7 +429,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
auto Status = createLibcall(MIRBuilder, Libcall.LibcallID,
{LibcallResult, RetTy, 0},
{{MI.getOperand(2).getReg(), ArgTy, 0},
- {MI.getOperand(3).getReg(), ArgTy, 0}});
+ {MI.getOperand(3).getReg(), ArgTy, 0}},
+ LocObserver, &MI);
if (Status != LegalizerHelper::Legalized)
return false;
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/llvm/lib/Target/ARM/ARMLegalizerInfo.h
index f1c2e9c..3636cc6 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -28,7 +28,8 @@ class ARMLegalizerInfo : public LegalizerInfo {
public:
ARMLegalizerInfo(const ARMSubtarget &ST);
- bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const override;
private:
void setFCmpLibcallsGNU();
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index ab0db576..8973684 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -108,7 +108,8 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
return new BPFPassConfig(*this, PM);
}
-void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+void BPFTargetMachine::registerPassBuilderCallbacks(
+ PassBuilder &PB, bool PopulateClassToPassNames) {
PB.registerPipelineParsingCallback(
[](StringRef PassName, FunctionPassManager &FPM,
ArrayRef<PassBuilder::PipelineElement>) {
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h
index 4e6adc7..0a28394 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.h
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -42,7 +42,8 @@ public:
return TLOF.get();
}
- void registerPassBuilderCallbacks(PassBuilder &PB) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool PopulateClassToPassNames) override;
};
}
diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
index 8ffa1d7..bce4116 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
+++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
@@ -36,6 +36,7 @@ class DXILResourcePrinterPass : public PassInfoMixin<DXILResourcePrinterPass> {
public:
explicit DXILResourcePrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
/// The legacy pass manager's analysis pass to compute DXIL resource
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index d5cb488..06938f8 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -100,7 +100,8 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT,
DirectXTargetMachine::~DirectXTargetMachine() {}
-void DirectXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+void DirectXTargetMachine::registerPassBuilderCallbacks(
+ PassBuilder &PB, bool PopulateClassToPassNames) {
PB.registerPipelineParsingCallback(
[](StringRef PassName, ModulePassManager &PM,
ArrayRef<PassBuilder::PipelineElement>) {
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h
index d04c375..428beaf 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.h
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h
@@ -47,7 +47,8 @@ public:
}
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
- void registerPassBuilderCallbacks(PassBuilder &PB) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool PopulateClassToPassNames) override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 590e464..e7a692d 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -274,7 +274,8 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
-void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+void HexagonTargetMachine::registerPassBuilderCallbacks(
+ PassBuilder &PB, bool PopulateClassToPassNames) {
PB.registerLateLoopOptimizationsEPCallback(
[=](LoopPassManager &LPM, OptimizationLevel Level) {
LPM.addPass(HexagonLoopIdiomRecognitionPass());
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index dddd79a..c5fed0c 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -34,7 +34,8 @@ public:
~HexagonTargetMachine() override;
const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
- void registerPassBuilderCallbacks(PassBuilder &PB) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool PopulateClassToPassNames) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 14f2620..f5e9423 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -330,8 +330,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
verify(*ST.getInstrInfo());
}
-bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool MipsLegalizerInfo::legalizeCustom(
+ LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const {
using namespace TargetOpcode;
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/llvm/lib/Target/Mips/MipsLegalizerInfo.h
index 05027b7..63daebf 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.h
@@ -25,7 +25,8 @@ class MipsLegalizerInfo : public LegalizerInfo {
public:
MipsLegalizerInfo(const MipsSubtarget &ST);
- bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const override;
bool legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 8d89576..fad69f5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -225,7 +225,8 @@ void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
AAM.registerFunctionAnalysis<NVPTXAA>();
}
-void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+void NVPTXTargetMachine::registerPassBuilderCallbacks(
+ PassBuilder &PB, bool PopulateClassToPassNames) {
PB.registerPipelineParsingCallback(
[](StringRef PassName, FunctionPassManager &PM,
ArrayRef<PassBuilder::PipelineElement>) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index cfdd8da..9e6bf92 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -69,7 +69,8 @@ public:
void registerDefaultAliasAnalyses(AAManager &AAM) override;
- void registerPassBuilderCallbacks(PassBuilder &PB) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool PopulateClassToPassNames) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 079906d..61bae58 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -411,8 +411,9 @@ bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
return true;
}
-bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool RISCVLegalizerInfo::legalizeCustom(
+ LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
GISelChangeObserver &Observer = Helper.Observer;
switch (MI.getOpcode()) {
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 48c3697..4335bd0 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -30,7 +30,8 @@ class RISCVLegalizerInfo : public LegalizerInfo {
public:
RISCVLegalizerInfo(const RISCVSubtarget &ST);
- bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const override;
bool legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 03a59f8..27bb69d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
- ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
- ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
+ ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
+ ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
@@ -12850,9 +12850,9 @@ struct CombineResult;
/// Helper class for folding sign/zero extensions.
/// In particular, this class is used for the following combines:
-/// add | add_vl -> vwadd(u) | vwadd(u)_w
-/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
-/// mul | mul_vl -> vwmul(u) | vwmul_su
+/// add_vl -> vwadd(u) | vwadd(u)_w
+/// sub_vl -> vwsub(u) | vwsub(u)_w
+/// mul_vl -> vwmul(u) | vwmul_su
///
/// An object of this class represents an operand of the operation we want to
/// combine.
@@ -12897,8 +12897,6 @@ struct NodeExtensionHelper {
/// E.g., for zext(a), this would return a.
SDValue getSource() const {
switch (OrigOperand.getOpcode()) {
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
return OrigOperand.getOperand(0);
@@ -12915,8 +12913,7 @@ struct NodeExtensionHelper {
/// Get or create a value that can feed \p Root with the given extension \p
/// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
/// \see ::getSource().
- SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget,
+ SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
std::optional<bool> SExt) const {
if (!SExt.has_value())
return OrigOperand;
@@ -12931,10 +12928,8 @@ struct NodeExtensionHelper {
// If we need an extension, we should be changing the type.
SDLoc DL(Root);
- auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
+ auto [Mask, VL] = getMaskAndVL(Root);
switch (OrigOperand.getOpcode()) {
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
@@ -12974,15 +12969,12 @@ struct NodeExtensionHelper {
/// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
switch (Opcode) {
- case ISD::ADD:
case RISCVISD::ADD_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
- case ISD::MUL:
case RISCVISD::MUL_VL:
return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
- case ISD::SUB:
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
@@ -12995,8 +12987,7 @@ struct NodeExtensionHelper {
/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
/// newOpcode(a, b).
static unsigned getSUOpcode(unsigned Opcode) {
- assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
- "SU is only supported for MUL");
+ assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
return RISCVISD::VWMULSU_VL;
}
@@ -13004,10 +12995,8 @@ struct NodeExtensionHelper {
/// newOpcode(a, b).
static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
switch (Opcode) {
- case ISD::ADD:
case RISCVISD::ADD_VL:
return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
- case ISD::SUB:
case RISCVISD::SUB_VL:
return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
default:
@@ -13017,33 +13006,19 @@ struct NodeExtensionHelper {
using CombineToTry = std::function<std::optional<CombineResult>(
SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
- const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
- const RISCVSubtarget &)>;
+ const NodeExtensionHelper & /*RHS*/)>;
/// Check if this node needs to be fully folded or extended for all users.
bool needToPromoteOtherUsers() const { return EnforceOneUse; }
/// Helper method to set the various fields of this struct based on the
/// type of \p Root.
- void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+ void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
SupportsZExt = false;
SupportsSExt = false;
EnforceOneUse = true;
CheckMask = true;
- unsigned Opc = OrigOperand.getOpcode();
- switch (Opc) {
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND: {
- if (OrigOperand.getValueType().isVector()) {
- SupportsZExt = Opc == ISD::ZERO_EXTEND;
- SupportsSExt = Opc == ISD::SIGN_EXTEND;
- SDLoc DL(Root);
- MVT VT = Root->getSimpleValueType(0);
- std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
- }
- break;
- }
+ switch (OrigOperand.getOpcode()) {
case RISCVISD::VZEXT_VL:
SupportsZExt = true;
Mask = OrigOperand.getOperand(1);
@@ -13099,16 +13074,8 @@ struct NodeExtensionHelper {
}
/// Check if \p Root supports any extension folding combines.
- static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) {
+ static bool isSupportedRoot(const SDNode *Root) {
switch (Root->getOpcode()) {
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL: {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isTypeLegal(Root->getValueType(0)))
- return false;
- return Root->getValueType(0).isScalableVector();
- }
case RISCVISD::ADD_VL:
case RISCVISD::MUL_VL:
case RISCVISD::VWADD_W_VL:
@@ -13123,10 +13090,9 @@ struct NodeExtensionHelper {
}
/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
- NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an "
- "unsupported root");
+ NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
+ assert(isSupportedRoot(Root) && "Trying to build an helper with an "
+ "unsupported root");
assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
OrigOperand = Root->getOperand(OperandIdx);
@@ -13142,7 +13108,7 @@ struct NodeExtensionHelper {
SupportsZExt =
Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
SupportsSExt = !SupportsZExt;
- std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget);
+ std::tie(Mask, VL) = getMaskAndVL(Root);
CheckMask = true;
// There's no existing extension here, so we don't have to worry about
// making sure it gets removed.
@@ -13151,7 +13117,7 @@ struct NodeExtensionHelper {
}
[[fallthrough]];
default:
- fillUpExtensionSupport(Root, DAG, Subtarget);
+ fillUpExtensionSupport(Root, DAG);
break;
}
}
@@ -13167,27 +13133,14 @@ struct NodeExtensionHelper {
}
/// Helper function to get the Mask and VL from \p Root.
- static std::pair<SDValue, SDValue>
- getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- assert(isSupportedRoot(Root, DAG) && "Unexpected root");
- switch (Root->getOpcode()) {
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL: {
- SDLoc DL(Root);
- MVT VT = Root->getSimpleValueType(0);
- return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
- }
- default:
- return std::make_pair(Root->getOperand(3), Root->getOperand(4));
- }
+ static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
+ assert(isSupportedRoot(Root) && "Unexpected root");
+ return std::make_pair(Root->getOperand(3), Root->getOperand(4));
}
/// Check if the Mask and VL of this operand are compatible with \p Root.
- bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) const {
- auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
+ bool areVLAndMaskCompatible(const SDNode *Root) const {
+ auto [Mask, VL] = getMaskAndVL(Root);
return isMaskCompatible(Mask) && isVLCompatible(VL);
}
@@ -13195,14 +13148,11 @@ struct NodeExtensionHelper {
/// foldings that are supported by this class.
static bool isCommutative(const SDNode *N) {
switch (N->getOpcode()) {
- case ISD::ADD:
- case ISD::MUL:
case RISCVISD::ADD_VL:
case RISCVISD::MUL_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return true;
- case ISD::SUB:
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
@@ -13247,25 +13197,14 @@ struct CombineResult {
/// Return a value that uses TargetOpcode and that can be used to replace
/// Root.
/// The actual replacement is *not* done in that method.
- SDValue materialize(SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) const {
+ SDValue materialize(SelectionDAG &DAG) const {
SDValue Mask, VL, Merge;
- std::tie(Mask, VL) =
- NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
- switch (Root->getOpcode()) {
- default:
- Merge = Root->getOperand(2);
- break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
- Merge = DAG.getUNDEF(Root->getValueType(0));
- break;
- }
+ std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
+ Merge = Root->getOperand(2);
return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
- LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS),
- RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS),
- Merge, Mask, VL);
+ LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
+ RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
+ Mask, VL);
}
};
@@ -13282,16 +13221,15 @@ struct CombineResult {
static std::optional<CombineResult>
canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
const NodeExtensionHelper &RHS, bool AllowSExt,
- bool AllowZExt, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+ bool AllowZExt) {
assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
- if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
- !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
+ if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
return std::nullopt;
if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
Root->getOpcode(), /*IsSExt=*/false),
- Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false);
+ Root, LHS, /*SExtLHS=*/false, RHS,
+ /*SExtRHS=*/false);
if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
Root->getOpcode(), /*IsSExt=*/true),
@@ -13308,10 +13246,9 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+ const NodeExtensionHelper &RHS) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
- /*AllowZExt=*/true, DAG, Subtarget);
+ /*AllowZExt=*/true);
}
/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
@@ -13320,9 +13257,8 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
+ const NodeExtensionHelper &RHS) {
+ if (!RHS.areVLAndMaskCompatible(Root))
return std::nullopt;
// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
@@ -13346,10 +13282,9 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+ const NodeExtensionHelper &RHS) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
- /*AllowZExt=*/false, DAG, Subtarget);
+ /*AllowZExt=*/false);
}
/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
@@ -13358,10 +13293,9 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+ const NodeExtensionHelper &RHS) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
- /*AllowZExt=*/true, DAG, Subtarget);
+ /*AllowZExt=*/true);
}
/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
@@ -13370,13 +13304,10 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
-
+ const NodeExtensionHelper &RHS) {
if (!LHS.SupportsSExt || !RHS.SupportsZExt)
return std::nullopt;
- if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
- !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
+ if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
return std::nullopt;
return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
@@ -13386,8 +13317,6 @@ SmallVector<NodeExtensionHelper::CombineToTry>
NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
SmallVector<CombineToTry> Strategies;
switch (Root->getOpcode()) {
- case ISD::ADD:
- case ISD::SUB:
case RISCVISD::ADD_VL:
case RISCVISD::SUB_VL:
// add|sub -> vwadd(u)|vwsub(u)
@@ -13395,7 +13324,6 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
// add|sub -> vwadd(u)_w|vwsub(u)_w
Strategies.push_back(canFoldToVW_W);
break;
- case ISD::MUL:
case RISCVISD::MUL_VL:
// mul -> vwmul(u)
Strategies.push_back(canFoldToVWWithSameExtension);
@@ -13426,14 +13354,12 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
/// mul_vl -> vwmul(u) | vwmul_su
/// vwadd_w(u) -> vwadd(u)
/// vwub_w(u) -> vwadd(u)
-static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const RISCVSubtarget &Subtarget) {
+static SDValue
+combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
- if (!NodeExtensionHelper::isSupportedRoot(N, DAG))
- return SDValue();
-
+ assert(NodeExtensionHelper::isSupportedRoot(N) &&
+ "Shouldn't have called this method");
SmallVector<SDNode *> Worklist;
SmallSet<SDNode *, 8> Inserted;
Worklist.push_back(N);
@@ -13442,11 +13368,11 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
while (!Worklist.empty()) {
SDNode *Root = Worklist.pop_back_val();
- if (!NodeExtensionHelper::isSupportedRoot(Root, DAG))
+ if (!NodeExtensionHelper::isSupportedRoot(Root))
return SDValue();
- NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
- NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
+ NodeExtensionHelper LHS(N, 0, DAG);
+ NodeExtensionHelper RHS(N, 1, DAG);
auto AppendUsersIfNeeded = [&Worklist,
&Inserted](const NodeExtensionHelper &Op) {
if (Op.needToPromoteOtherUsers()) {
@@ -13473,8 +13399,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
for (NodeExtensionHelper::CombineToTry FoldingStrategy :
FoldingStrategies) {
- std::optional<CombineResult> Res =
- FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
+ std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
if (Res) {
Matched = true;
CombinesToApply.push_back(*Res);
@@ -13503,7 +13428,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
ValuesToReplace.reserve(CombinesToApply.size());
for (CombineResult Res : CombinesToApply) {
- SDValue NewValue = Res.materialize(DAG, Subtarget);
+ SDValue NewValue = Res.materialize(DAG);
if (!InputRootReplacement) {
assert(Res.Root == N &&
"First element is expected to be the current node");
@@ -14775,20 +14700,13 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
-
- assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
-
- if (N->getValueType(0).isFixedLengthVector())
- return SDValue();
-
+ assert(N->getOpcode() == RISCVISD::ADD_VL);
SDValue Addend = N->getOperand(0);
SDValue MulOp = N->getOperand(1);
+ SDValue AddMergeOp = N->getOperand(2);
- if (N->getOpcode() == RISCVISD::ADD_VL) {
- SDValue AddMergeOp = N->getOperand(2);
- if (!AddMergeOp.isUndef())
- return SDValue();
- }
+ if (!AddMergeOp.isUndef())
+ return SDValue();
auto IsVWMulOpc = [](unsigned Opc) {
switch (Opc) {
@@ -14812,16 +14730,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
if (!MulMergeOp.isUndef())
return SDValue();
- auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- if (N->getOpcode() == ISD::ADD) {
- SDLoc DL(N);
- return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
- Subtarget);
- }
- return std::make_pair(N->getOperand(3), N->getOperand(4));
- }(N, DAG, Subtarget);
-
+ SDValue AddMask = N->getOperand(3);
+ SDValue AddVL = N->getOperand(4);
SDValue MulMask = MulOp.getOperand(3);
SDValue MulVL = MulOp.getOperand(4);
@@ -15087,18 +14997,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(ISD::AND, DL, VT, NewFMV,
DAG.getConstant(~SignBit, DL, VT));
}
- case ISD::ADD: {
- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
- return V;
- if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
- return V;
+ case ISD::ADD:
return performADDCombine(N, DAG, Subtarget);
- }
- case ISD::SUB: {
- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
- return V;
+ case ISD::SUB:
return performSUBCombine(N, DAG, Subtarget);
- }
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);
case ISD::OR:
@@ -15106,8 +15008,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR:
return performXORCombine(N, DAG, Subtarget);
case ISD::MUL:
- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
- return V;
return performMULCombine(N, DAG);
case ISD::FADD:
case ISD::UMAX:
@@ -15584,7 +15484,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::ADD_VL:
- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
return V;
return combineToVWMACC(N, DAG, Subtarget);
case RISCVISD::SUB_VL:
@@ -15593,7 +15493,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
case RISCVISD::MUL_VL:
- return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
+ return combineBinOp_VLToVWBinOp_VL(N, DCI);
case RISCVISD::VFMADD_VL:
case RISCVISD::VFNMADD_VL:
case RISCVISD::VFMSUB_VL:
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index faaf7f0..061bc96 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -289,8 +289,9 @@ static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType,
return ConvReg;
}
-bool SPIRVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool SPIRVLegalizerInfo::legalizeCustom(
+ LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const {
auto Opc = MI.getOpcode();
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
if (!isTypeFoldingSupported(Opc)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h
index 2541ff2..f18b15b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h
@@ -29,7 +29,8 @@ class SPIRVLegalizerInfo : public LegalizerInfo {
SPIRVGlobalRegistry *GR;
public:
- bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) const override;
SPIRVLegalizerInfo(const SPIRVSubtarget &ST);
};
} // namespace llvm
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1e4b136..cd56529 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40221,6 +40221,34 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
+ case X86ISD::SHUF128: {
+ // If we're permuting the upper 256-bits subvectors of a concatenation, then
+ // see if we can peek through and access the subvector directly.
+ if (VT.is512BitVector()) {
+ // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the
+ // upper subvector is used.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ uint64_t Mask = N->getConstantOperandVal(2);
+ SmallVector<SDValue> LHSOps, RHSOps;
+ SDValue NewLHS, NewRHS;
+ if ((Mask & 0x0A) == 0x0A &&
+ collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) {
+ NewLHS = widenSubVector(LHSOps[1], false, Subtarget, DAG, DL, 512);
+ Mask &= ~0x0A;
+ }
+ if ((Mask & 0xA0) == 0xA0 &&
+ collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) {
+ NewRHS = widenSubVector(RHSOps[1], false, Subtarget, DAG, DL, 512);
+ Mask &= ~0xA0;
+ }
+ if (NewLHS || NewRHS)
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS,
+ NewRHS ? NewRHS : RHS,
+ DAG.getTargetConstant(Mask, DL, MVT::i8));
+ }
+ return SDValue();
+ }
case X86ISD::VPERM2X128: {
// Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
SDValue LHS = N->getOperand(0);
@@ -54572,6 +54600,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT())
return Op0.getOperand(0);
}
+
+ // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x))
+ if (Op0.getOpcode() == X86ISD::VPERMI && Subtarget.useAVX512Regs() &&
+ !X86::mayFoldLoad(Op0.getOperand(0), Subtarget))
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
+ Op0.getOperand(0), Op0.getOperand(0)),
+ Op0.getOperand(1));
}
// concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.
@@ -54979,6 +55015,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
}
break;
+ case X86ISD::BLENDI:
+ if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) {
+ uint64_t Mask0 = Ops[0].getConstantOperandVal(2);
+ uint64_t Mask1 = Ops[1].getConstantOperandVal(2);
+ uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0;
+ MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue Sel =
+ DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT));
+ return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1),
+ ConcatSubOperand(VT, Ops, 0));
+ }
+ break;
case ISD::VSELECT:
if (!IsSplat && Subtarget.hasAVX512() &&
(VT.is256BitVector() ||
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 6b0c1b8..08f5a88 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -350,212 +350,212 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
let Predicates = [NoNDD] in {
- def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16;
- def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32;
- def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ def 8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def 16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16;
+ def 32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32;
+ def 64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
}
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>;
- def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD;
- def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>;
- def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>;
- def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF;
- def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD;
- def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF;
- def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF;
+ def 8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>;
+ def 16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD;
+ def 32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>;
+ def 64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>;
+ def 8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF;
+ def 16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD;
+ def 32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF;
+ def 64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF;
}
let Predicates = [In64BitMode] in {
- def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF;
- def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD;
- def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF;
- def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF;
- def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
- def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
- def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
- def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ def 8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF;
+ def 16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD;
+ def 32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF;
+ def 64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF;
+ def 8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def 16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def 32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def 64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
}
}
- def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
- def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
- def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+ def 8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def 16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def 32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def 64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>;
let Predicates = [In64BitMode] in {
- def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL;
- def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD;
- def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL;
- def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL;
- def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>;
- def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD;
- def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>;
- def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>;
- def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF;
- def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD;
- def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF;
- def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF;
- def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF;
- def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD;
- def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF;
- def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF;
+ def 8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL;
+ def 16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD;
+ def 32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL;
+ def 64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL;
+ def 8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>;
+ def 16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD;
+ def 32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>;
+ def 64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>;
+ def 8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF;
+ def 16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD;
+ def 32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF;
+ def 64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF;
+ def 8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF;
+ def 16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD;
+ def 32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF;
+ def 64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF;
}
let Predicates = [NoNDD] in {
- def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16;
- def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32;
- def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+ def 8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def 16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16;
+ def 32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32;
+ def 64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
}
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>;
- def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD;
- def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>;
- def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>;
- def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF;
- def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD;
- def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF;
- def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF;
+ def 8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>;
+ def 16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD;
+ def 32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>;
+ def 64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>;
+ def 8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF;
+ def 16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD;
+ def 32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF;
+ def 64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF;
}
let Predicates = [In64BitMode] in {
- def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF;
- def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD;
- def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF;
- def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF;
- def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL;
- def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD;
- def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL;
- def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL;
+ def 8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF;
+ def 16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD;
+ def 32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF;
+ def 64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF;
+ def 8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL;
+ def 16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD;
+ def 32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL;
+ def 64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL;
}
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
let Predicates = [NoNDD] in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
- def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
- def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>;
- def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
- def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16;
- def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32;
- def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>;
+ def 16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def 32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def 64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>;
+ def 8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def 16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16;
+ def 32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32;
+ def 64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>;
}
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD;
- def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>;
- def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>;
- def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>;
- def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD;
- def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>;
- def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>;
- def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD;
- def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF;
- def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF;
- def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF;
- def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD;
- def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF;
- def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF;
+ def 16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD;
+ def 32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>;
+ def 64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>;
+ def 8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>;
+ def 16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD;
+ def 32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>;
+ def 64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>;
+ def 16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD;
+ def 32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF;
+ def 64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF;
+ def 8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF;
+ def 16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD;
+ def 32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF;
+ def 64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF;
}
let Predicates = [In64BitMode] in {
- def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD;
- def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF;
- def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF;
- def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF;
- def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD;
- def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF;
- def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF;
- def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD;
- def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL;
- def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL;
- def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL;
- def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD;
- def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL;
- def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL;
+ def 16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD;
+ def 32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF;
+ def 64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF;
+ def 8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF;
+ def 16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD;
+ def 32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF;
+ def 64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF;
+ def 16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD;
+ def 32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL;
+ def 64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL;
+ def 8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL;
+ def 16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD;
+ def 32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL;
+ def 64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL;
}
}
- def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>;
+ def 8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def 32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def 64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>;
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD;
- def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>;
- def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF;
- def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD;
- def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF;
- def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF;
+ def 8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD;
+ def 32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>;
+ def 64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ def 8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF;
+ def 16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD;
+ def 32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF;
+ def 64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF;
}
let Predicates = [In64BitMode] in {
- def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF;
- def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD;
- def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF;
- def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF;
- def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
- def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
- def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
- def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ def 8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF;
+ def 16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD;
+ def 32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF;
+ def 64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF;
+ def 8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def 16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def 32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def 64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
}
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16;
- def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32;
+ def 16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16;
+ def 32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>;
- def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
- def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
+ def 64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>;
+ def 8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def 16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
+ def 32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def 64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD;
- def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>;
- def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>;
- def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD;
- def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>;
- def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>;
- def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD;
- def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF;
- def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF;
- def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF;
- def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD;
- def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF;
- def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF;
+ def 16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD;
+ def 32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>;
+ def 64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>;
+ def 8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def 16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD;
+ def 32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>;
+ def 64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def 16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD;
+ def 32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF;
+ def 64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF;
+ def 8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF;
+ def 16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD;
+ def 32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF;
+ def 64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF;
}
let Predicates = [In64BitMode] in {
- def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD;
- def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF;
- def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF;
- def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF;
- def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD;
- def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF;
- def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF;
- def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD;
- def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL;
- def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL;
- def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL;
- def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD;
- def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL;
- def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL;
+ def 16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD;
+ def 32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF;
+ def 64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF;
+ def 8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF;
+ def 16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD;
+ def 32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF;
+ def 64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF;
+ def 16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD;
+ def 32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL;
+ def 64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL;
+ def 8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL;
+ def 16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD;
+ def 32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL;
+ def 64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL;
}
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode] in {
- def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
- def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
+ def 8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
+ def 8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
}
- def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL,
+ def 8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL,
"{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX,
+ def 16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX,
"{$src, %ax|ax, $src}">, OpSize16;
- def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX,
+ def 32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX,
"{$src, %eax|eax, $src}">, OpSize32;
- def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX,
+ def 64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX,
"{$src, %rax|rax, $src}">;
}
@@ -571,162 +571,162 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
bit ConvertibleToThreeAddress> {
let isCommutable = CommutableRR in {
let Predicates = [NoNDD] in {
- def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ def 16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def 32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def 64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
}
}
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>;
+ def 8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD;
- def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>;
- def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>;
+ def 16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD;
+ def 32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>;
+ def 64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>;
}
}
} // isCommutable
let Predicates = [In64BitMode] in {
- def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
- def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
- def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
- def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ def 8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def 16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def 32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def 64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
}
- def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
- def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
- def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+ def 8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def 16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def 32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def 64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>;
let Predicates = [In64BitMode] in {
- def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>;
- def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD;
- def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>;
- def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>;
- def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL;
- def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD;
- def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL;
- def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL;
+ def 8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>;
+ def 16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD;
+ def 32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>;
+ def 64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>;
+ def 8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL;
+ def 16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD;
+ def 32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL;
+ def 64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL;
}
let Predicates = [NoNDD] in {
- def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>;
- def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>;
+ def 8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def 16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
+ def 32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
+ def 64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>;
}
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>;
- def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD;
- def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>;
- def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>;
+ def 8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>;
+ def 16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD;
+ def 32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>;
+ def 64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>;
}
let Predicates = [In64BitMode] in {
- def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL;
- def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD;
- def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL;
- def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL;
+ def 8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL;
+ def 16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD;
+ def 32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL;
+ def 64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL;
}
let Predicates = [NoNDD] in {
- def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def 8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
- def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
- def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>;
+ def 16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def 32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def 64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>;
- def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
- def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
- def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>;
+ def 16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
+ def 32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
+ def 64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>;
}
}
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>;
+ def 8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD;
- def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>;
- def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>;
- def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD;
- def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>;
- def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>;
+ def 16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD;
+ def 32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>;
+ def 64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>;
+ def 16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD;
+ def 32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>;
+ def 64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>;
}
}
let Predicates = [In64BitMode] in {
- def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL;
- def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD;
- def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL;
- def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL;
- def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD;
- def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL;
- def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL;
+ def 8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL;
+ def 16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD;
+ def 32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL;
+ def 64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL;
+ def 16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD;
+ def 32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL;
+ def 64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL;
}
- def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>;
+ def 8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def 32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def 64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>;
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD;
- def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ def 8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD;
+ def 32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>;
+ def 64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
}
let Predicates = [In64BitMode] in {
- def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
- def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
- def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
- def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ def 8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def 16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def 32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def 64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
}
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16;
- def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32;
+ def 8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def 16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16;
+ def 32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>;
- def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
- def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
+ def 64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>;
+ def 16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
+ def 32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def 64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
let Predicates = [HasNDD, In64BitMode] in {
- def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD;
- def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>;
- def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>;
- def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD;
- def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>;
- def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def 8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def 16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD;
+ def 32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>;
+ def 64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>;
+ def 16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD;
+ def 32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>;
+ def 64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>;
}
let Predicates = [In64BitMode] in {
- def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL;
- def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD;
- def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL;
- def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL;
- def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD;
- def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL;
- def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL;
+ def 8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL;
+ def 16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD;
+ def 32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL;
+ def 64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL;
+ def 16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD;
+ def 32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL;
+ def 64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL;
}
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode] in {
- def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
- def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
+ def 8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
+ def 8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
}
- def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL,
+ def 8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL,
"{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX,
+ def 16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX,
"{$src, %ax|ax, $src}">, OpSize16;
- def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX,
+ def 32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX,
"{$src, %eax|eax, $src}">, OpSize32;
- def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX,
+ def 64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX,
"{$src, %rax|rax, $src}">;
}
@@ -739,71 +739,71 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
SDNode opnode, bit CommutableRR,
bit ConvertibleToThreeAddress> {
let isCommutable = CommutableRR in {
- def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ def 16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def 32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def 64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
} // isConvertibleToThreeAddress
} // isCommutable
- def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
- def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
- def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+ def 8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def 16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def 32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def 64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
- def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
- def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+ def 8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def 16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
+ def 32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
+ def 64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
- def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def 8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
- def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
- def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>;
+ def 16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def 32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def 64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>;
- def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
- def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
- def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>;
+ def 16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
+ def 32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
+ def 64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>;
}
- def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ def 8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def 16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def 32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def 64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16;
- def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32;
+ def 16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16;
+ def 32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>;
+ def 64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>;
- def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
- def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
+ def 8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def 16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
+ def 32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def 64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>;
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode] in {
- def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
+ def 8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
let mayLoad = 1 in
- def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>;
+ def 8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>;
}
- def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
+ def 8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
"{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX,
+ def 16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX,
"{$src, %ax|ax, $src}">, OpSize16;
- def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX,
+ def 32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX,
"{$src, %eax|eax, $src}">, OpSize32;
- def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX,
+ def 64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX,
"{$src, %rax|rax, $src}">;
}
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 305bd74..772ed2a 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1212,36 +1212,33 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
(implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>;
}
-multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
- RegisterClass RC, X86MemOperand x86memop,
- X86FoldableSchedWrite sched, string Suffix = ""> {
-let hasSideEffects = 0 in {
- def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
- !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8, VEX, VVVV, Sched<[sched]>;
- let mayLoad = 1 in
- def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8, VEX, VVVV, Sched<[sched.Folded]>;
-}
+multiclass Bls<string m, Format RegMRM, Format MemMRM, X86TypeInfo t, string Suffix = ""> {
+ let SchedRW = [WriteBLS] in {
+ def rr#Suffix : UnaryOpR<0xF3, RegMRM, m, unaryop_ndd_args, t,
+ (outs t.RegClass:$dst), []>, T8, VVVV;
+ }
+
+ let SchedRW = [WriteBLS.Folded] in
+ def rm#Suffix : UnaryOpM<0xF3, MemMRM, m, unaryop_ndd_args, t,
+ (outs t.RegClass:$dst), []>, T8, VVVV;
}
-let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
- defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
- defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W;
- defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
- defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, REX_W;
- defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>;
- defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W;
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32>, VEX;
+ defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64>, VEX;
+ defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32>, VEX;
+ defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64>, VEX;
+ defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32>, VEX;
+ defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64>, VEX;
}
-let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
- defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
- defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
- defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
- defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
- defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
- defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
+ defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32, "_EVEX">, EVEX;
+ defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64, "_EVEX">, EVEX;
+ defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32, "_EVEX">, EVEX;
+ defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64, "_EVEX">, EVEX;
+ defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32, "_EVEX">, EVEX;
+ defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
}
let Predicates = [HasBMI] in {
@@ -1281,50 +1278,35 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
-multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDPatternOperator OpNode,
- PatFrag ld_frag, X86FoldableSchedWrite Sched,
- string Suffix = ""> {
- def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
- T8, VEX, Sched<[Sched]>;
-let mayLoad = 1 in
- def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
- (implicit EFLAGS)]>, T8, VEX,
- Sched<[Sched.Folded,
- // x86memop:$src1
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC:$src2
- Sched.ReadAfterFold]>;
+multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
+ X86FoldableSchedWrite sched, string Suffix = ""> {
+ let SchedRW = [sched], Form = MRMSrcReg4VOp3 in
+ def rr#Suffix : BinOpRR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.RegClass:$src2))]>, T8;
+ let SchedRW = [sched.Folded,
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ sched.ReadAfterFold], Form = MRMSrcMem4VOp3 in
+ def rm#Suffix : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1),
+ t.RegClass:$src2))]>, T8;
}
let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
- defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
- X86bextr, loadi32, WriteBEXTR>;
- defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
- X86bextr, loadi64, WriteBEXTR>, REX_W;
+ defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR>, VEX;
+ defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR>, VEX;
}
let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in {
- defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
- X86bzhi, loadi32, WriteBZHI>;
- defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
- X86bzhi, loadi64, WriteBZHI>, REX_W;
-}
-let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
- defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
- X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX;
- defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
- X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W;
-}
-let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in {
- defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
- X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX;
- defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
- X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W;
+ defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI>, VEX;
+ defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI>, VEX;
+}
+let Predicates = [HasBMI, HasEGPR, In64BitMode], Defs = [EFLAGS] in {
+ defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR, "_EVEX">, EVEX;
+ defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR, "_EVEX">, EVEX;
+}
+let Predicates = [HasBMI2, HasEGPR, In64BitMode], Defs = [EFLAGS] in {
+ defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI, "_EVEX">, EVEX;
+ defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI, "_EVEX">, EVEX;
}
def CountTrailingOnes : SDNodeXForm<imm, [{
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 2e08c7b..32941c0 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1524,7 +1524,8 @@ StringRef sys::getHostCPUName() {
// Use processor id to detect cpu name.
uint32_t processor_id;
__asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
- switch (processor_id & 0xff00) {
+ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
+ switch (processor_id & 0xf000) {
case 0xc000: // Loongson 64bit, 4-issue
return "la464";
// TODO: Others.
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 529f730..89a1ad2 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -2953,6 +2953,9 @@ void coro::salvageDebugInfo(
std::optional<BasicBlock::iterator> InsertPt;
if (auto *I = dyn_cast<Instruction>(Storage)) {
InsertPt = I->getInsertionPointAfterDef();
+ // Update DILocation only in O0 since it is easy to get out of sync in
+ // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
+ // an example.
if (!OptimizeFrame && I->getDebugLoc())
DVI.setDebugLoc(I->getDebugLoc());
} else if (isa<Argument>(Storage))
@@ -2988,9 +2991,14 @@ void coro::salvageDebugInfo(
// dbg.declare does.
if (DPV.getType() == DPValue::LocationType::Declare) {
std::optional<BasicBlock::iterator> InsertPt;
- if (auto *I = dyn_cast<Instruction>(Storage))
+ if (auto *I = dyn_cast<Instruction>(Storage)) {
InsertPt = I->getInsertionPointAfterDef();
- else if (isa<Argument>(Storage))
+ // Update DILocation only in O0 since it is easy to get out of sync in
+ // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
+ // an example.
+ if (!OptimizeFrame && I->getDebugLoc())
+ DPV.setDebugLoc(I->getDebugLoc());
+ } else if (isa<Argument>(Storage))
InsertPt = F->getEntryBlock().begin();
if (InsertPt) {
DPV.removeFromParent();
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 49ac1e9..cc93c86 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -366,6 +366,13 @@ struct Decomposition {
append_range(Vars, Other.Vars);
}
+ void sub(const Decomposition &Other) {
+ Decomposition Tmp = Other;
+ Tmp.mul(-1);
+ add(Tmp.Offset);
+ append_range(Vars, Tmp.Vars);
+ }
+
void mul(int64_t Factor) {
Offset = multiplyWithOverflow(Offset, Factor);
for (auto &Var : Vars)
@@ -569,10 +576,12 @@ static Decomposition decompose(Value *V,
return Result;
}
- if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI))
- return {-1 * CI->getSExtValue(), {{1, Op0}}};
- if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
- return {0, {{1, Op0}, {-1, Op1}}};
+ if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) {
+ auto ResA = decompose(Op0, Preconditions, IsSigned, DL);
+ auto ResB = decompose(Op1, Preconditions, IsSigned, DL);
+ ResA.sub(ResB);
+ return ResA;
+ }
return {V, IsKnownNonNegative};
}
@@ -635,7 +644,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
// First try to look up \p V in Value2Index and NewVariables. Otherwise add a
// new entry to NewVariables.
- DenseMap<Value *, unsigned> NewIndexMap;
+ SmallDenseMap<Value *, unsigned> NewIndexMap;
auto GetOrAddIndex = [&Value2Index, &NewVariables,
&NewIndexMap](Value *V) -> unsigned {
auto V2I = Value2Index.find(V);
@@ -659,7 +668,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
IsSigned, IsEq, IsNe);
// Collect variables that are known to be positive in all uses in the
// constraint.
- DenseMap<Value *, bool> KnownNonNegativeVariables;
+ SmallDenseMap<Value *, bool> KnownNonNegativeVariables;
auto &R = Res.Coefficients;
for (const auto &KV : VariablesA) {
R[GetOrAddIndex(KV.Variable)] += KV.Coefficient;
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 0990c75..ea31356 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -33,37 +33,37 @@ STATISTIC(NumVFDeclAdded,
STATISTIC(NumCompUsedAdded,
"Number of `@llvm.compiler.used` operands that have been added.");
-/// A helper function that adds the vector function declaration that
-/// vectorizes the CallInst CI with a vectorization factor of VF
-/// lanes. The TLI assumes that all parameters and the return type of
-/// CI (other than void) need to be widened to a VectorType of VF
-/// lanes.
+/// A helper function that adds the vector variant declaration for vectorizing
+/// the CallInst \p CI with a vectorization factor of \p VF lanes. For each
+/// mapping, TLI provides a VABI prefix, which contains all information required
+/// to create vector function declaration.
static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
- bool Predicate, const StringRef VFName) {
+ const VecDesc *VD) {
Module *M = CI.getModule();
+ FunctionType *ScalarFTy = CI.getFunctionType();
- // Add function declaration.
- Type *RetTy = ToVectorTy(CI.getType(), VF);
- SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI.args())
- Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
- assert(!CI.getFunctionType()->isVarArg() &&
- "VarArg functions are not supported.");
- if (Predicate)
- Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF));
- FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false);
- Function *VectorF =
- Function::Create(FTy, Function::ExternalLinkage, VFName, M);
- VectorF->copyAttributesFrom(CI.getCalledFunction());
+ assert(!ScalarFTy->isVarArg() && "VarArg functions are not supported.");
+
+ const std::optional<VFInfo> Info = VFABI::tryDemangleForVFABI(
+ VD->getVectorFunctionABIVariantString(), ScalarFTy);
+
+ assert(Info && "Failed to demangle vector variant");
+ assert(Info->Shape.VF == VF && "Mangled name does not match VF");
+
+ const StringRef VFName = VD->getVectorFnName();
+ FunctionType *VectorFTy = VFABI::createFunctionType(*Info, ScalarFTy);
+ Function *VecFunc =
+ Function::Create(VectorFTy, Function::ExternalLinkage, VFName, M);
+ VecFunc->copyAttributesFrom(CI.getCalledFunction());
++NumVFDeclAdded;
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName
- << "` of type " << *(VectorF->getType()) << "\n");
+ << "` of type " << *VectorFTy << "\n");
// Make function declaration (without a body) "sticky" in the IR by
// listing it in the @llvm.compiler.used intrinsic.
- assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` "
+ assert(!VecFunc->size() && "VFABI attribute requires `@llvm.compiler.used` "
"only on declarations.");
- appendToCompilerUsed(*M, {VectorF});
+ appendToCompilerUsed(*M, {VecFunc});
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName
<< "` to `@llvm.compiler.used`.\n");
++NumCompUsedAdded;
@@ -100,7 +100,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
}
Function *VariantF = M->getFunction(VD->getVectorFnName());
if (!VariantF)
- addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName());
+ addVariantDeclaration(CI, VF, VD);
}
};
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 55e3756..61d891d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
}
static void createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU) {
+ DomTreeUpdater *DTU,
+ bool RemoveOrigDefaultBlock = true) {
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
auto *BB = Switch->getParent();
auto *OrigDefaultBlock = Switch->getDefaultDest();
- OrigDefaultBlock->removePredecessor(BB);
+ if (RemoveOrigDefaultBlock)
+ OrigDefaultBlock->removePredecessor(BB);
BasicBlock *NewDefaultBlock = BasicBlock::Create(
BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
OrigDefaultBlock);
@@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch,
if (DTU) {
SmallVector<DominatorTree::UpdateType, 2> Updates;
Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
- if (!is_contained(successors(BB), OrigDefaultBlock))
+ if (RemoveOrigDefaultBlock &&
+ !is_contained(successors(BB), OrigDefaultBlock))
Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
DTU->applyUpdates(Updates);
}
@@ -5609,10 +5612,28 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
Known.getBitWidth() - (Known.Zero | Known.One).popcount();
assert(NumUnknownBits <= Known.getBitWidth());
if (HasDefault && DeadCases.empty() &&
- NumUnknownBits < 64 /* avoid overflow */ &&
- SI->getNumCases() == (1ULL << NumUnknownBits)) {
- createUnreachableSwitchDefault(SI, DTU);
- return true;
+ NumUnknownBits < 64 /* avoid overflow */) {
+ uint64_t AllNumCases = 1ULL << NumUnknownBits;
+ if (SI->getNumCases() == AllNumCases) {
+ createUnreachableSwitchDefault(SI, DTU);
+ return true;
+ }
+ // When only one case value is missing, replace default with that case.
+ // Eliminating the default branch will provide more opportunities for
+ // optimization, such as lookup tables.
+ if (SI->getNumCases() == AllNumCases - 1) {
+ assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
+ uint64_t MissingCaseVal = 0;
+ for (const auto &Case : SI->cases())
+ MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
+ auto *MissingCase =
+ cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
+ createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
+ SIW.setSuccessorWeight(0, 0);
+ return true;
+ }
}
if (DeadCases.empty())
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 577ce80..150ab30 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -167,9 +167,14 @@ public:
}
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
- DebugLoc DL, const Twine &Name = "") {
- return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
- Name);
+ DebugLoc DL, const Twine &Name = "",
+ std::optional<FastMathFlags> FMFs = std::nullopt) {
+ auto *Select =
+ FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
+ *FMFs, DL, Name)
+ : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
+ DL, Name);
+ return tryInsertInstruction(Select);
}
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8e135d8..f5f0461 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9141,7 +9141,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe();
+ auto *NewExitingVPV = PhiR->getBackedgeValue();
// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
@@ -9151,21 +9151,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
VPValue *Red = PhiR->getBackedgeValue();
assert(Red->getDefiningRecipe()->getParent() != LatchVPBB &&
"reduction recipe must be defined before latch");
- FastMathFlags FMFs = RdxDesc.getFastMathFlags();
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
- Result =
+ std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
- ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs)
- : new VPInstruction(Instruction::Select, {Cond, Red, PhiR});
- Result->insertBefore(&*Builder.getInsertPoint());
- Red->replaceUsesWithIf(
- Result->getVPSingleValue(),
- [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
+ ? std::make_optional(RdxDesc.getFastMathFlags())
+ : std::nullopt;
+ NewExitingVPV = Builder.createSelect(Cond, Red, PhiR, {}, "", FMFs);
+ Red->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
+ return isa<VPLiveOut>(&U);
+ });
if (PreferPredicatedReductionSelect ||
TTI.preferPredicatedReductionSelect(
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy,
TargetTransformInfo::ReductionFlags()))
- PhiR->setOperand(1, Result->getVPSingleValue());
+ PhiR->setOperand(1, NewExitingVPV);
}
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
@@ -9174,17 +9173,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
- auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc,
- Result->getVPSingleValue(), RdxTy);
+ auto *Trunc =
+ new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
auto *Extnd =
RdxDesc.isSigned()
? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
: new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
- Trunc->insertAfter(Result);
+ Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
Extnd->insertAfter(Trunc);
- Result->getVPSingleValue()->replaceAllUsesWith(Extnd);
- Trunc->setOperand(0, Result->getVPSingleValue());
+ NewExitingVPV->replaceAllUsesWith(Extnd);
+ Trunc->setOperand(0, NewExitingVPV);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 30499152..bd89ec0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11139,6 +11139,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
+ if (const TreeEntry *TE = getTreeEntry(V))
+ V = TE->VectorizedValue;
setInsertPointAfterBundle(E);
V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;