aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp3
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp48
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp5
-rw-r--r--llvm/lib/Analysis/Lint.cpp5
-rw-r--r--llvm/lib/Analysis/Loads.cpp4
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp37
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp2
-rw-r--r--llvm/lib/Analysis/TypeMetadataUtils.cpp47
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp125
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp1
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp8
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp12
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp38
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp26
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp44
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.h68
-rw-r--r--llvm/lib/CodeGen/LowerEmuTLS.cpp7
-rw-r--r--llvm/lib/CodeGen/MachineDebugify.cpp18
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp39
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp3
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp20
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp12
-rw-r--r--llvm/lib/CodeGen/VirtRegMap.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp6
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp4
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp9
-rw-r--r--llvm/lib/IR/Constants.cpp2
-rw-r--r--llvm/lib/IR/Core.cpp8
-rw-r--r--llvm/lib/IR/Function.cpp22
-rw-r--r--llvm/lib/IR/Verifier.cpp37
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp2
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp34
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.cpp5
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp4
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp53
-rw-r--r--llvm/lib/ProfileData/MemProf.cpp49
-rw-r--r--llvm/lib/ProfileData/MemProfReader.cpp40
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp2
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp57
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp84
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp101
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp7
-rw-r--r--llvm/lib/Target/AArch64/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp20
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp38
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMParallelDSP.cpp1
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp19
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp15
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp7
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp24
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp39
-rw-r--r--llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp11
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp57
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp56
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp109
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp510
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h61
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp24
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp85
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td346
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td24
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td30
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td48
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td10
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp89
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td33
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td108
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td81
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td84
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleXSf.td59
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRV.h2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp18
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp167
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h22
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp23
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td21
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp37
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp100
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp18
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td40
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.h7
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.td30
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--llvm/lib/Target/TargetMachine.cpp26
-rw-r--r--llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp139
-rw-r--r--llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h16
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp11
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp80
-rw-r--r--llvm/lib/Target/X86/X86InstrCMovSetCC.td4
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp16
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp28
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp55
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp44
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp111
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp24
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp40
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp20
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp284
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp89
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp93
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h161
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp57
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp23
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h43
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp2
161 files changed, 3559 insertions, 1781 deletions
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index b082dfe..16ee2ca 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1283,8 +1283,7 @@ AliasResult BasicAAResult::aliasGEP(
// VarIndex = Scale*V.
const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
if (Var.Val.TruncBits == 0 &&
- isKnownNonZero(Var.Val.V, /*Depth=*/0,
- SimplifyQuery(DL, DT, &AC, Var.CxtI))) {
+ isKnownNonZero(Var.Val.V, SimplifyQuery(DL, DT, &AC, Var.CxtI))) {
// Check if abs(V*Scale) >= abs(Scale) holds in the presence of
// potentially wrapping math.
auto MultiplyByScaleNoWrap = [](const VariableGEPIndex &Var) {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4e6e666..06ba5ca 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1513,7 +1513,7 @@ static Value *simplifyAShrInst(Value *Op0, Value *Op1, bool IsExact,
// -1 >>a X --> -1
// (-1 << X) a>> X --> -1
- // Do not return Op0 because it may contain undef elements if it's a vector.
+ // We could return the original -1 constant to preserve poison elements.
if (match(Op0, m_AllOnes()) ||
match(Op0, m_Shl(m_AllOnes(), m_Specific(Op1))))
return Constant::getAllOnesValue(Op0->getType());
@@ -1586,10 +1586,10 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
if (match(UnsignedICmp,
m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) {
if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd &&
- EqPred == ICmpInst::ICMP_NE && isKnownNonZero(B, /*Depth=*/0, Q))
+ EqPred == ICmpInst::ICMP_NE && isKnownNonZero(B, Q))
return UnsignedICmp;
if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd &&
- EqPred == ICmpInst::ICMP_EQ && isKnownNonZero(B, /*Depth=*/0, Q))
+ EqPred == ICmpInst::ICMP_EQ && isKnownNonZero(B, Q))
return UnsignedICmp;
}
}
@@ -1607,13 +1607,13 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
// X > Y && Y == 0 --> Y == 0 iff X != 0
// X > Y || Y == 0 --> X > Y iff X != 0
if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
- isKnownNonZero(X, /*Depth=*/0, Q))
+ isKnownNonZero(X, Q))
return IsAnd ? ZeroICmp : UnsignedICmp;
// X <= Y && Y != 0 --> X <= Y iff X != 0
// X <= Y || Y != 0 --> Y != 0 iff X != 0
if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
- isKnownNonZero(X, /*Depth=*/0, Q))
+ isKnownNonZero(X, Q))
return IsAnd ? UnsignedICmp : ZeroICmp;
// The transforms below here are expected to be handled more generally with
@@ -2281,7 +2281,7 @@ static Value *simplifyOrLogic(Value *X, Value *Y) {
// (B ^ ~A) | (A & B) --> B ^ ~A
// (~A ^ B) | (B & A) --> ~A ^ B
// (B ^ ~A) | (B & A) --> B ^ ~A
- if (match(X, m_c_Xor(m_NotForbidUndef(m_Value(A)), m_Value(B))) &&
+ if (match(X, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return X;
@@ -2298,31 +2298,29 @@ static Value *simplifyOrLogic(Value *X, Value *Y) {
// (B & ~A) | ~(A | B) --> ~A
// (B & ~A) | ~(B | A) --> ~A
Value *NotA;
- if (match(X,
- m_c_And(m_CombineAnd(m_Value(NotA), m_NotForbidUndef(m_Value(A))),
- m_Value(B))) &&
+ if (match(X, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))),
+ m_Value(B))) &&
match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
return NotA;
// The same is true of Logical And
// TODO: This could share the logic of the version above if there was a
// version of LogicalAnd that allowed more than just i1 types.
- if (match(X, m_c_LogicalAnd(
- m_CombineAnd(m_Value(NotA), m_NotForbidUndef(m_Value(A))),
- m_Value(B))) &&
+ if (match(X, m_c_LogicalAnd(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))),
+ m_Value(B))) &&
match(Y, m_Not(m_c_LogicalOr(m_Specific(A), m_Specific(B)))))
return NotA;
// ~(A ^ B) | (A & B) --> ~(A ^ B)
// ~(A ^ B) | (B & A) --> ~(A ^ B)
Value *NotAB;
- if (match(X, m_CombineAnd(m_NotForbidUndef(m_Xor(m_Value(A), m_Value(B))),
+ if (match(X, m_CombineAnd(m_Not(m_Xor(m_Value(A), m_Value(B))),
m_Value(NotAB))) &&
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return NotAB;
// ~(A & B) | (A ^ B) --> ~(A & B)
// ~(A & B) | (B ^ A) --> ~(A & B)
- if (match(X, m_CombineAnd(m_NotForbidUndef(m_And(m_Value(A), m_Value(B))),
+ if (match(X, m_CombineAnd(m_Not(m_And(m_Value(A), m_Value(B))),
m_Value(NotAB))) &&
match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
return NotAB;
@@ -2552,9 +2550,8 @@ static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// The 'not' op must contain a complete -1 operand (no undef elements for
// vector) for the transform to be safe.
Value *NotA;
- if (match(X,
- m_c_Or(m_CombineAnd(m_NotForbidUndef(m_Value(A)), m_Value(NotA)),
- m_Value(B))) &&
+ if (match(X, m_c_Or(m_CombineAnd(m_Not(m_Value(A)), m_Value(NotA)),
+ m_Value(B))) &&
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return NotA;
@@ -2817,10 +2814,9 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
// the other operand can not be based on the alloc - if it were, then
// the cmp itself would be a capture.
Value *MI = nullptr;
- if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonZero(RHS, /*Depth=*/0, Q))
+ if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonZero(RHS, Q))
MI = LHS;
- else if (isAllocLikeFn(RHS, TLI) &&
- llvm::isKnownNonZero(LHS, /*Depth=*/0, Q))
+ else if (isAllocLikeFn(RHS, TLI) && llvm::isKnownNonZero(LHS, Q))
MI = RHS;
if (MI) {
// FIXME: This is incorrect, see PR54002. While we can assume that the
@@ -2976,12 +2972,12 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, /*Depth=*/0, Q))
+ if (isKnownNonZero(LHS, Q))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, /*Depth=*/0, Q))
+ if (isKnownNonZero(LHS, Q))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT: {
@@ -2996,7 +2992,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q);
if (LHSKnown.isNegative())
return getTrue(ITy);
- if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, /*Depth=*/0, Q))
+ if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, Q))
return getFalse(ITy);
break;
}
@@ -3012,7 +3008,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q);
if (LHSKnown.isNegative())
return getFalse(ITy);
- if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, /*Depth=*/0, Q))
+ if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, Q))
return getTrue(ITy);
break;
}
@@ -3165,7 +3161,7 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
const APInt *C;
if ((match(LBO, m_LShr(m_Specific(RHS), m_APInt(C))) && *C != 0) ||
(match(LBO, m_UDiv(m_Specific(RHS), m_APInt(C))) && *C != 1)) {
- if (isKnownNonZero(RHS, /*Depth=*/0, Q)) {
+ if (isKnownNonZero(RHS, Q)) {
switch (Pred) {
default:
break;
@@ -3398,7 +3394,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
if (!NUW || (ICmpInst::isSigned(Pred) && !NSW) ||
- !isKnownNonZero(LBO->getOperand(0), /*Depth=*/0, Q))
+ !isKnownNonZero(LBO->getOperand(0), Q))
break;
if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(1),
RBO->getOperand(1), Q, MaxRecurse - 1))
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 3223b05..6cded82 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -645,7 +645,7 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) {
// instruction is placed, even if it could legally be hoisted much higher.
// That is unfortunate.
PointerType *PT = dyn_cast<PointerType>(BBI->getType());
- if (PT && isKnownNonZero(BBI, /*Depth=*/0, DL))
+ if (PT && isKnownNonZero(BBI, DL))
return ValueLatticeElement::getNot(ConstantPointerNull::get(PT));
if (BBI->getType()->isIntegerTy()) {
@@ -1863,8 +1863,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
Module *M = CxtI->getModule();
const DataLayout &DL = M->getDataLayout();
if (V->getType()->isPointerTy() && C->isNullValue() &&
- isKnownNonZero(V->stripPointerCastsSameRepresentation(), /*Depth=*/0,
- DL)) {
+ isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) {
if (Pred == ICmpInst::ICMP_EQ)
return LazyValueInfo::False;
else if (Pred == ICmpInst::ICMP_NE)
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 0694c29..1ab856a 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -350,10 +350,7 @@ void Lint::visitCallBase(CallBase &I) {
}
case Intrinsic::vastart:
- Check(I.getParent()->getParent()->isVarArg(),
- "Undefined behavior: va_start called in a non-varargs function",
- &I);
-
+ // vastart in non-varargs function is rejected by the verifier
visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
std::nullopt, nullptr, MemRef::Read | MemRef::Write);
break;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index b540340..ac508e1 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -100,7 +100,7 @@ static bool isDereferenceableAndAlignedPointer(
if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) &&
!CheckForFreed)
if (!CheckForNonNull ||
- isKnownNonZero(V, /*Depth=*/0, SimplifyQuery(DL, DT, AC, CtxI))) {
+ isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI))) {
// As we recursed through GEPs to get here, we've incrementally checked
// that each step advanced by a multiple of the alignment. If our base is
// properly aligned, then the original offset accessed must also be.
@@ -134,7 +134,7 @@ static bool isDereferenceableAndAlignedPointer(
if (getObjectSize(V, ObjSize, DL, TLI, Opts)) {
APInt KnownDerefBytes(Size.getBitWidth(), ObjSize);
if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) &&
- isKnownNonZero(V, /*Depth=*/0, SimplifyQuery(DL, DT, AC, CtxI)) &&
+ isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI)) &&
!V->canBeFreed()) {
// As we recursed through GEPs to get here, we've incrementally
// checked that each step advanced by a multiple of the alignment. If
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index deda1ee..c3d15af 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
@@ -668,7 +669,8 @@ static void computeFunctionSummary(
/// within the initializer.
static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
const Module &M, ModuleSummaryIndex &Index,
- VTableFuncList &VTableFuncs) {
+ VTableFuncList &VTableFuncs,
+ const GlobalVariable &OrigGV) {
// First check if this is a function pointer.
if (I->getType()->isPointerTy()) {
auto C = I->stripPointerCasts();
@@ -696,7 +698,7 @@ static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
auto Offset = SL->getElementOffset(EI.index());
unsigned Op = SL->getElementContainingOffset(Offset);
findFuncPointers(cast<Constant>(I->getOperand(Op)),
- StartingOffset + Offset, M, Index, VTableFuncs);
+ StartingOffset + Offset, M, Index, VTableFuncs, OrigGV);
}
} else if (auto *C = dyn_cast<ConstantArray>(I)) {
ArrayType *ATy = C->getType();
@@ -704,7 +706,34 @@ static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
findFuncPointers(cast<Constant>(I->getOperand(i)),
- StartingOffset + i * EltSize, M, Index, VTableFuncs);
+ StartingOffset + i * EltSize, M, Index, VTableFuncs,
+ OrigGV);
+ }
+ } else if (const auto *CE = dyn_cast<ConstantExpr>(I)) {
+ // For relative vtables, the next sub-component should be a trunc.
+ if (CE->getOpcode() != Instruction::Trunc ||
+ !(CE = dyn_cast<ConstantExpr>(CE->getOperand(0))))
+ return;
+
+ // If this constant can be reduced to the offset between a function and a
+ // global, then we know this is a valid virtual function if the RHS is the
+ // original vtable we're scanning through.
+ if (CE->getOpcode() == Instruction::Sub) {
+ GlobalValue *LHS, *RHS;
+ APSInt LHSOffset, RHSOffset;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHS, LHSOffset, DL) &&
+ IsConstantOffsetFromGlobal(CE->getOperand(1), RHS, RHSOffset, DL) &&
+ RHS == &OrigGV &&
+
+ // For relative vtables, this component should point to the callable
+ // function without any offsets.
+ LHSOffset == 0 &&
+
+ // Also, the RHS should always point to somewhere within the vtable.
+ RHSOffset <=
+ static_cast<uint64_t>(DL.getTypeAllocSize(OrigGV.getInitializer()->getType()))) {
+ findFuncPointers(LHS, StartingOffset, M, Index, VTableFuncs, OrigGV);
+ }
}
}
}
@@ -717,7 +746,7 @@ static void computeVTableFuncs(ModuleSummaryIndex &Index,
return;
findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
- VTableFuncs);
+ VTableFuncs, V);
#ifndef NDEBUG
// Validate that the VTableFuncs list is ordered by offset.
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 1c98b02..95440dd 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6900,7 +6900,7 @@ const ConstantRange &ScalarEvolution::getRangeRef(
uint64_t Rem = MaxVal.urem(Align);
MaxVal -= APInt(BitWidth, Rem);
APInt MinVal = APInt::getZero(BitWidth);
- if (llvm::isKnownNonZero(V, /*Depth=*/0, DL))
+ if (llvm::isKnownNonZero(V, DL))
MinVal = Align;
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType);
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index b8dcc39..67ce154 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -67,6 +67,14 @@ static void findLoadCallsAtConstantOffset(
findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset,
CI, DT);
}
+ } else if (auto *Call = dyn_cast<CallInst>(User)) {
+ if (Call->getIntrinsicID() == llvm::Intrinsic::load_relative) {
+ if (auto *LoadOffset = dyn_cast<ConstantInt>(Call->getOperand(1))) {
+ findCallsAtConstantOffset(DevirtCalls, nullptr, User,
+ Offset + LoadOffset->getSExtValue(), CI,
+ DT);
+ }
+ }
}
}
}
@@ -131,6 +139,12 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
Constant *TopLevelGlobal) {
+ // TODO: Ideally it would be the caller who knows if it's appropriate to strip
+ // the DSOLocalEquicalent. More generally, it would feel more appropriate to
+ // have two functions that handle absolute and relative pointers separately.
+ if (auto *Equiv = dyn_cast<DSOLocalEquivalent>(I))
+ I = Equiv->getGlobalValue();
+
if (I->getType()->isPointerTy()) {
if (Offset == 0)
return I;
@@ -161,7 +175,7 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
Offset % ElemSize, M, TopLevelGlobal);
}
- // (Swift-specific) relative-pointer support starts here.
+ // Relative-pointer support starts here.
if (auto *CI = dyn_cast<ConstantInt>(I)) {
if (Offset == 0 && CI->isZero()) {
return I;
@@ -221,19 +235,26 @@ llvm::getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset,
return std::pair<Function *, Constant *>(Fn, C);
}
-void llvm::replaceRelativePointerUsersWithZero(Function *F) {
- for (auto *U : F->users()) {
- auto *PtrExpr = dyn_cast<ConstantExpr>(U);
- if (!PtrExpr || PtrExpr->getOpcode() != Instruction::PtrToInt)
- continue;
+static void replaceRelativePointerUserWithZero(User *U) {
+ auto *PtrExpr = dyn_cast<ConstantExpr>(U);
+ if (!PtrExpr || PtrExpr->getOpcode() != Instruction::PtrToInt)
+ return;
- for (auto *PtrToIntUser : PtrExpr->users()) {
- auto *SubExpr = dyn_cast<ConstantExpr>(PtrToIntUser);
- if (!SubExpr || SubExpr->getOpcode() != Instruction::Sub)
- continue;
+ for (auto *PtrToIntUser : PtrExpr->users()) {
+ auto *SubExpr = dyn_cast<ConstantExpr>(PtrToIntUser);
+ if (!SubExpr || SubExpr->getOpcode() != Instruction::Sub)
+ return;
- SubExpr->replaceNonMetadataUsesWith(
- ConstantInt::get(SubExpr->getType(), 0));
- }
+ SubExpr->replaceNonMetadataUsesWith(
+ ConstantInt::get(SubExpr->getType(), 0));
+ }
+}
+
+void llvm::replaceRelativePointerUsersWithZero(Constant *C) {
+ for (auto *U : C->users()) {
+ if (auto *Equiv = dyn_cast<DSOLocalEquivalent>(U))
+ replaceRelativePointerUsersWithZero(Equiv);
+ else
+ replaceRelativePointerUserWithZero(U);
}
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5beea61..ab2f43e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -272,7 +272,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
}
static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const SimplifyQuery &Q);
+ const SimplifyQuery &Q, unsigned Depth);
bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
unsigned Depth) {
@@ -288,7 +288,7 @@ bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
// this updated.
KnownBits Known = computeKnownBits(V, Depth, SQ);
return Known.isNonNegative() &&
- (Known.isNonZero() || isKnownNonZero(V, Depth, SQ));
+ (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
}
bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
@@ -868,7 +868,7 @@ static void computeKnownBitsFromShiftOperator(
bool ShAmtNonZero =
Known.isNonZero() ||
(Known.getMaxValue().ult(Known.getBitWidth()) &&
- isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q));
+ isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
Known = KF(Known2, Known, ShAmtNonZero);
}
@@ -2124,7 +2124,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
case Instruction::Mul:
return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
- (OrZero || isKnownNonZero(I, Depth, Q));
+ (OrZero || isKnownNonZero(I, Q, Depth));
case Instruction::And:
// A power of two and'd with anything is a power of two or zero.
if (OrZero &&
@@ -2134,7 +2134,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// X & (-X) is always a power of two or zero.
if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
- return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q);
+ return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
return false;
case Instruction::Add: {
// Adding a power-of-two or zero to the same power-of-two or zero yields
@@ -2249,7 +2249,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
// If the base pointer is non-null, we cannot walk to a null address with an
// inbounds GEP in address space zero.
- if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q))
+ if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
return true;
// Walk the GEP operands and see if any operand introduces a non-zero offset.
@@ -2288,7 +2288,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
if (Depth++ >= MaxAnalysisRecursionDepth)
continue;
- if (isKnownNonZero(GTI.getOperand(), Depth, Q))
+ if (isKnownNonZero(GTI.getOperand(), Q, Depth))
return true;
}
@@ -2441,8 +2441,8 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
const SimplifyQuery &Q, unsigned BitWidth, Value *X,
Value *Y, bool NSW, bool NUW) {
if (NUW)
- return isKnownNonZero(Y, DemandedElts, Depth, Q) ||
- isKnownNonZero(X, DemandedElts, Depth, Q);
+ return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
+ isKnownNonZero(X, DemandedElts, Q, Depth);
KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
@@ -2450,8 +2450,8 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
// If X and Y are both non-negative (as signed values) then their sum is not
// zero unless both X and Y are zero.
if (XKnown.isNonNegative() && YKnown.isNonNegative())
- if (isKnownNonZero(Y, DemandedElts, Depth, Q) ||
- isKnownNonZero(X, DemandedElts, Depth, Q))
+ if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
+ isKnownNonZero(X, DemandedElts, Q, Depth))
return true;
// If X and Y are both negative (as signed values) then their sum is not
@@ -2485,7 +2485,7 @@ static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
Value *Y) {
// TODO: Move this case into isKnownNonEqual().
if (auto *C = dyn_cast<Constant>(X))
- if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q))
+ if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
return true;
return ::isKnownNonEqual(X, Y, Depth, Q);
@@ -2497,18 +2497,18 @@ static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if (NSW || NUW)
- return isKnownNonZero(X, DemandedElts, Depth, Q) &&
- isKnownNonZero(Y, DemandedElts, Depth, Q);
+ return isKnownNonZero(X, DemandedElts, Q, Depth) &&
+ isKnownNonZero(Y, DemandedElts, Q, Depth);
// If either X or Y is odd, then if the other is non-zero the result can't
// be zero.
KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
if (XKnown.One[0])
- return isKnownNonZero(Y, DemandedElts, Depth, Q);
+ return isKnownNonZero(Y, DemandedElts, Q, Depth);
KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
if (YKnown.One[0])
- return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Depth, Q);
+ return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
// If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
// non-zero, then X * Y is non-zero. We can find sX and sY by just taking
@@ -2564,7 +2564,7 @@ static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
// non-zero then at least one non-zero bit must remain.
if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
.eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
- isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q))
+ isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
return true;
return false;
@@ -2613,7 +2613,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
Type *FromTy = I->getOperand(0)->getType();
if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
(BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
} break;
case Instruction::IntToPtr:
// Note that we have to take special care to avoid looking through
@@ -2622,7 +2622,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
if (!isa<ScalableVectorType>(I->getType()) &&
Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
break;
case Instruction::PtrToInt:
// Similar to int2ptr above, we can look through ptr2int here if the cast
@@ -2630,25 +2630,25 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
if (!isa<ScalableVectorType>(I->getType()) &&
Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
break;
case Instruction::Sub:
return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
I->getOperand(1));
case Instruction::Or:
// X | Y != 0 if X != 0 or Y != 0.
- return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) ||
- isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
+ return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
+ isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
case Instruction::SExt:
case Instruction::ZExt:
// ext X != 0 if X != 0.
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
case Instruction::Shl: {
// shl nsw/nuw can't remove any non-zero bits.
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
// if the lowest bit is shifted off the end.
@@ -2664,7 +2664,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
// shr exact can only shift out zero bits.
const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
if (BO->isExact())
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
// shr X, Y != 0 if X is negative. Note that the value of the shift is not
// defined if the sign bit is shifted off the end.
@@ -2680,7 +2680,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
// X / Y
// div exact can only produce a zero if the dividend is zero.
if (cast<PossiblyExactOperator>(I)->isExact())
- return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
+ return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
std::optional<bool> XUgeY;
KnownBits XKnown =
@@ -2730,7 +2730,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
Value *Op;
Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
// Op is trivially non-zero.
- if (isKnownNonZero(Op, DemandedElts, Depth, Q))
+ if (isKnownNonZero(Op, DemandedElts, Q, Depth))
return true;
// The condition of the select dominates the true/false arm. Check if the
@@ -2780,7 +2780,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
}
}
// Finally recurse on the edge and check it directly.
- return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ);
+ return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
});
}
case Instruction::InsertElement: {
@@ -2802,9 +2802,9 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
// Result is zero if Elt is non-zero and rest of the demanded elts in Vec
// are non-zero.
- return (SkipElt || isKnownNonZero(Elt, Depth, Q)) &&
+ return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
(DemandedVecElts.isZero() ||
- isKnownNonZero(Vec, DemandedVecElts, Depth, Q));
+ isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
}
case Instruction::ExtractElement:
if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
@@ -2816,7 +2816,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
APInt DemandedVecElts = APInt::getAllOnes(NumElts);
if (CIdx && CIdx->getValue().ult(NumElts))
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
- return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
+ return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
}
}
break;
@@ -2831,12 +2831,12 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
break;
// If demanded elements for both vecs are non-zero, the shuffle is non-zero.
return (DemandedRHS.isZero() ||
- isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Depth, Q)) &&
+ isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
(DemandedLHS.isZero() ||
- isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Depth, Q));
+ isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
}
case Instruction::Freeze:
- return isKnownNonZero(I->getOperand(0), Depth, Q) &&
+ return isKnownNonZero(I->getOperand(0), Q, Depth) &&
isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
Depth);
case Instruction::Load: {
@@ -2886,7 +2886,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
if (Call->isReturnNonNull())
return true;
if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
- return isKnownNonZero(RP, Depth, Q);
+ return isKnownNonZero(RP, Q, Depth);
} else {
if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
@@ -2896,7 +2896,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return true;
}
if (const Value *RV = Call->getReturnedArgOperand())
- if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q))
+ if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
return true;
}
@@ -2908,7 +2908,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
case Intrinsic::bitreverse:
case Intrinsic::bswap:
case Intrinsic::ctpop:
- return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
+ return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
// NB: We don't do usub_sat here as in any case we can prove its
// non-zero, we will fold it to `sub nuw` in InstCombine.
case Intrinsic::ssub_sat:
@@ -2924,11 +2924,11 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
- return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+ return isKnownNonZero(II->getArgOperand(0), Q, Depth);
case Intrinsic::umax:
case Intrinsic::uadd_sat:
- return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
- isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
+ return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
+ isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
case Intrinsic::smax: {
// If either arg is strictly positive the result is non-zero. Otherwise
// the result is non-zero if both ops are non-zero.
@@ -2936,7 +2936,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
const KnownBits &OpKnown) {
if (!OpNonZero.has_value())
OpNonZero = OpKnown.isNonZero() ||
- isKnownNonZero(Op, DemandedElts, Depth, Q);
+ isKnownNonZero(Op, DemandedElts, Q, Depth);
return *OpNonZero;
};
// Avoid re-computing isKnownNonZero.
@@ -2971,8 +2971,8 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
}
[[fallthrough]];
case Intrinsic::umin:
- return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) &&
- isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q);
+ return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
+ isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
case Intrinsic::cttz:
return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
.Zero[0];
@@ -2983,12 +2983,12 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
case Intrinsic::fshl:
// If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
if (II->getArgOperand(0) == II->getArgOperand(1))
- return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
+ return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
break;
case Intrinsic::vscale:
return true;
case Intrinsic::experimental_get_vector_length:
- return isKnownNonZero(I->getOperand(0), Depth, Q);
+ return isKnownNonZero(I->getOperand(0), Q, Depth);
default:
break;
}
@@ -3010,8 +3010,8 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
/// specified, perform context-sensitive analysis and return true if the
/// pointer couldn't possibly be null at the specified instruction.
/// Supports values with integer or pointer type and vectors of integers.
-bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
- const SimplifyQuery &Q) {
+bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
+ const SimplifyQuery &Q, unsigned Depth) {
Type *Ty = V->getType();
#ifndef NDEBUG
@@ -3101,12 +3101,12 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
return false;
}
-bool llvm::isKnownNonZero(const Value *V, unsigned Depth,
- const SimplifyQuery &Q) {
+bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q,
+ unsigned Depth) {
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
- return ::isKnownNonZero(V, DemandedElts, Depth, Q);
+ return ::isKnownNonZero(V, DemandedElts, Q, Depth);
}
/// If the pair of operators are the same invertible function, return the
@@ -3253,7 +3253,7 @@ static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
Op = BO->getOperand(0);
else
return false;
- return isKnownNonZero(Op, Depth + 1, Q);
+ return isKnownNonZero(Op, Q, Depth + 1);
}
return false;
}
@@ -3266,7 +3266,7 @@ static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
const APInt *C;
return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
(OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
- !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q);
+ !C->isZero() && !C->isOne() && isKnownNonZero(V1, Q, Depth + 1);
}
return false;
}
@@ -3279,7 +3279,7 @@ static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
const APInt *C;
return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
(OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
- !C->isZero() && isKnownNonZero(V1, Depth + 1, Q);
+ !C->isZero() && isKnownNonZero(V1, Q, Depth + 1);
}
return false;
}
@@ -4664,6 +4664,12 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
}
+ if (isa<PoisonValue>(V)) {
+ Known.KnownFPClasses = fcNone;
+ Known.SignBit = false;
+ return;
+ }
+
// Try to handle fixed width vector constants
auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
const Constant *CV = dyn_cast<Constant>(V);
@@ -5026,6 +5032,19 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
break;
}
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmaximum:
+ case Intrinsic::vector_reduce_fminimum: {
+ // reduce min/max will choose an element from one of the vector elements,
+ // so we can infer and class information that is common to all elements.
+ Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(),
+ InterestedClasses, Depth + 1, Q);
+ // Can only propagate sign if output is never NaN.
+ if (!Known.isKnownNeverNaN())
+ Known.SignBit.reset();
+ break;
+ }
case Intrinsic::trunc:
case Intrinsic::floor:
case Intrinsic::ceil:
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index fe4f0d6..0b7fcd8 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -6454,6 +6454,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_DEBUG_RECORD_LABEL: {
// DbgLabelRecords are placed after the Instructions that they are
// attached to.
+ SeenDebugRecord = true;
Instruction *Inst = getLastInstruction();
if (!Inst)
return error("Invalid dbg record: missing instruction");
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index ecf7bc3..55aa1d4 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -2047,12 +2047,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MBB->splice(Loc, TBB, TBB->begin(), TIB);
FBB->erase(FBB->begin(), FIB);
- if (UpdateLiveIns) {
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*TBB) || recomputeLiveIns(*FBB);
- } while (anyChange);
- }
+ if (UpdateLiveIns)
+ fullyRecomputeLiveIns({TBB, FBB});
++NumHoist;
return true;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 22dbb31..e657872 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2314,7 +2314,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Bail if the value is never zero.
Use &Op = CountZeros->getOperandUse(0);
- if (isKnownNonZero(Op, /*Depth=*/0, *DL))
+ if (isKnownNonZero(Op, *DL))
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 0fe4cfe..8e623c8 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -340,6 +340,8 @@ Value *CachingVPExpander::expandPredicationToFPCall(
replaceOperation(*NewOp, VPI);
return NewOp;
}
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fmuladd: {
Value *Op0 = VPI.getOperand(0);
@@ -347,8 +349,12 @@ Value *CachingVPExpander::expandPredicationToFPCall(
Value *Op2 = VPI.getOperand(2);
Function *Fn = Intrinsic::getDeclaration(
VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
- Value *NewOp =
- Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ Value *NewOp;
+ if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID))
+ NewOp =
+ Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ else
+ NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName());
replaceOperation(*NewOp, VPI);
return NewOp;
}
@@ -731,6 +737,8 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
case Intrinsic::vp_minnum:
case Intrinsic::vp_maximum:
case Intrinsic::vp_minimum:
+ case Intrinsic::vp_fma:
+ case Intrinsic::vp_fmuladd:
return expandPredicationToFPCall(Builder, VPI,
VPI.getFunctionalIntrinsicID().value());
case Intrinsic::vp_load:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 40c5119..3829c33 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6273,8 +6273,21 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
}
bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ unsigned LHSOpndIdx = 1;
+ unsigned RHSOpndIdx = 2;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ LHSOpndIdx = 2;
+ RHSOpndIdx = 3;
+ break;
+ default:
+ break;
+ }
+ Register LHS = MI.getOperand(LHSOpndIdx).getReg();
+ Register RHS = MI.getOperand(RHSOpndIdx).getReg();
if (!getIConstantVRegVal(LHS, MRI)) {
// Skip commuting if LHS is not a constant. But, LHS may be a
// G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
@@ -6300,10 +6313,23 @@ bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
Observer.changingInstr(MI);
- Register LHSReg = MI.getOperand(1).getReg();
- Register RHSReg = MI.getOperand(2).getReg();
- MI.getOperand(1).setReg(RHSReg);
- MI.getOperand(2).setReg(LHSReg);
+ unsigned LHSOpndIdx = 1;
+ unsigned RHSOpndIdx = 2;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ LHSOpndIdx = 2;
+ RHSOpndIdx = 3;
+ break;
+ default:
+ break;
+ }
+ Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
+ Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
+ MI.getOperand(LHSOpndIdx).setReg(RHSReg);
+ MI.getOperand(RHSOpndIdx).setReg(LHSReg);
Observer.changedInstr(MI);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 95c6a35..1563532 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -472,6 +472,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(NEARBYINT_F);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
RTLIBCASE(ROUNDEVEN_F);
+ case TargetOpcode::G_INTRINSIC_LRINT:
+ RTLIBCASE(LRINT_F);
}
llvm_unreachable("Unknown libcall function");
}
@@ -1059,6 +1061,25 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
+ case TargetOpcode::G_INTRINSIC_LRINT: {
+ LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned Size = LLTy.getSizeInBits();
+ Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
+ Type *ITy = IntegerType::get(
+ Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
+ return UnableToLegalize;
+ }
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ LegalizeResult Status =
+ createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
+ {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
+ if (Status != Legalized)
+ return Status;
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_FPOWI: {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
@@ -2639,6 +2660,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_IS_FPCLASS:
Observer.changingInstr(MI);
@@ -4265,6 +4287,10 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
}
}
+ // Set the insert point after the existing PHIs
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
+
// Merge small outputs into MI's def.
if (NumLeftovers) {
mergeMixedSubvectors(MI.getReg(0), OutputRegs);
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index c3bc320..ae43e9c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
}
}
}
+
+bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FCOPYSIGN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 7cb90af..3a59ae7 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -18,7 +18,7 @@
//
//===----------------------------------------------------------------------===//
-#include "LiveDebugVariables.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.h b/llvm/lib/CodeGen/LiveDebugVariables.h
deleted file mode 100644
index 9998ce9e..0000000
--- a/llvm/lib/CodeGen/LiveDebugVariables.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the interface to the LiveDebugVariables analysis.
-//
-// The analysis removes DBG_VALUE instructions for virtual registers and tracks
-// live user variables in a data structure that can be updated during register
-// allocation.
-//
-// After register allocation new DBG_VALUE instructions are emitted to reflect
-// the new locations of user variables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
-#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-template <typename T> class ArrayRef;
-class LiveIntervals;
-class VirtRegMap;
-
-class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
- void *pImpl = nullptr;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- LiveDebugVariables();
- ~LiveDebugVariables() override;
-
- /// splitRegister - Move any user variables in OldReg to the live ranges in
- /// NewRegs where they are live. Mark the values as unavailable where no new
- /// register is live.
- void splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
- LiveIntervals &LIS);
-
- /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
- /// that happened during register allocation.
- /// @param VRM Rename virtual registers according to map.
- void emitDebugValues(VirtRegMap *VRM);
-
- /// dump - Print data structures to dbgs().
- void dump() const;
-
-private:
- bool runOnMachineFunction(MachineFunction &) override;
- void releaseMemory() override;
- void getAnalysisUsage(AnalysisUsage &) const override;
-
- MachineFunctionProperties getSetProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::TracksDebugUserValues);
- }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp
index af0b0a2..ec36b66 100644
--- a/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -139,8 +139,7 @@ bool addEmuTlsVar(Module &M, const GlobalVariable *GV) {
IntegerType *WordType = DL.getIntPtrType(C);
PointerType *InitPtrType = PointerType::getUnqual(C);
Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
- ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
- StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
+ StructType *EmuTlsVarType = StructType::create(ElementTypes);
EmuTlsVar = cast<GlobalVariable>(
M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType));
copyLinkageVisibility(M, GV, EmuTlsVar);
@@ -170,9 +169,7 @@ bool addEmuTlsVar(Module &M, const GlobalVariable *GV) {
ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
ConstantInt::get(WordType, GVAlignment.value()), NullPtr,
EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr};
- ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
- EmuTlsVar->setInitializer(
- ConstantStruct::get(EmuTlsVarType, ElementValueArray));
+ EmuTlsVar->setInitializer(ConstantStruct::get(EmuTlsVarType, ElementValues));
Align MaxAlignment =
std::max(DL.getABITypeAlign(WordType), DL.getABITypeAlign(VoidPtrType));
EmuTlsVar->setAlignment(MaxAlignment);
diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp
index c264e19..bffdd51 100644
--- a/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -65,6 +65,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// all the others.
Function *DbgValF = M.getFunction("llvm.dbg.value");
DbgValueInst *EarliestDVI = nullptr;
+ DbgVariableRecord *EarliestDVR = nullptr;
DenseMap<unsigned, DILocalVariable *> Line2Var;
DIExpression *Expr = nullptr;
if (DbgValF) {
@@ -80,6 +81,20 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
Expr = DVI->getExpression();
}
}
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
+ if (!DVR.isDbgValue())
+ continue;
+ unsigned Line = DVR.getDebugLoc().getLine();
+ assert(Line != 0 && "debugify should not insert line 0 locations");
+ Line2Var[Line] = DVR.getVariable();
+ if (!EarliestDVR || Line < EarliestDVR->getDebugLoc().getLine())
+ EarliestDVR = &DVR;
+ Expr = DVR.getExpression();
+ }
+ }
+ }
if (Line2Var.empty())
return true;
@@ -109,7 +124,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// Find a suitable local variable for the DBG_VALUE.
unsigned Line = MI.getDebugLoc().getLine();
if (!Line2Var.count(Line))
- Line = EarliestDVI->getDebugLoc().getLine();
+ Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine()
+ : EarliestDVR->getDebugLoc().getLine();
DILocalVariable *LocalVar = Line2Var[Line];
assert(LocalVar && "No variable for current line?");
VarSet.insert(LocalVar);
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 6661991..5bd3b12 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "AllocationOrder.h"
-#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index a208bf8..3482772 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -14,7 +14,6 @@
#include "RegAllocGreedy.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
-#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocPriorityAdvisor.h"
@@ -31,6 +30,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index f86aa3a..3fa2244 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -64,7 +64,7 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
const MachineRegisterInfo &MRI, Register Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
- //assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+ assert((NewMask & ~PrevMask).none() && "Must not add bits");
if (NewMask.any() || PrevMask.none())
return;
@@ -617,17 +617,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
++I;
}
}
- for (auto *I = Uses.begin(); I != Uses.end();) {
- LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
- Pos.getBaseIndex());
- LaneBitmask LaneMask = I->LaneMask & LiveBefore;
- if (LaneMask.none()) {
- I = Uses.erase(I);
- } else {
- I->LaneMask = LaneMask;
- ++I;
- }
- }
+
+ // For uses just copy the information from LIS.
+ for (auto &[RegUnit, LaneMask] : Uses)
+ LaneMask = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getBaseIndex());
+
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
Register RegUnit = P.RegUnit;
@@ -1060,18 +1054,27 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Kill liveness at live defs.
for (const RegisterMaskPair &P : RegOpers.Defs) {
Register Reg = P.RegUnit;
- LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask LiveAfter = LiveRegs.contains(Reg);
LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
LaneBitmask DefLanes = P.LaneMask;
- LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes;
- decreaseRegPressure(Reg, LiveLanes, LiveAfter);
+ LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes;
+
+ // There may be parts of the register that were dead before the
+ // instruction, but became live afterwards. Similarly, some parts
+ // may have been killed in this instruction.
+ decreaseRegPressure(Reg, LiveAfter, LiveAfter & LiveBefore);
+ increaseRegPressure(Reg, LiveAfter, ~LiveAfter & LiveBefore);
}
// Generate liveness for uses.
for (const RegisterMaskPair &P : RegOpers.Uses) {
Register Reg = P.RegUnit;
- LaneBitmask LiveLanes = LiveRegs.contains(Reg);
- LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
- increaseRegPressure(Reg, LiveLanes, LiveAfter);
+ // If this register was also in a def operand, we've handled it
+ // with defs.
+ if (getRegLanes(RegOpers.Defs, Reg).any())
+ continue;
+ LaneBitmask LiveAfter = LiveRegs.contains(Reg);
+ LaneBitmask LiveBefore = LiveAfter | P.LaneMask;
+ increaseRegPressure(Reg, LiveAfter, LiveBefore);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d9c6c28..c36b1cc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5577,9 +5577,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return RMINMAX;
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
- // Only do this if the current op isn't legal and the flipped is.
- if (!TLI.isOperationLegal(Opcode, VT) &&
- (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
+ // Only do this if:
+ // 1. The current op isn't legal and the flipped is.
+ // 2. The saturation pattern is broken by canonicalization in InstCombine.
+ bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
+ bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
+ if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
unsigned AltOpcode;
switch (Opcode) {
@@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
default: llvm_unreachable("Unknown MINMAX opcode");
}
- if (TLI.isOperationLegal(AltOpcode, VT))
+ if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
@@ -9530,7 +9533,8 @@ static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
- return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
+ LogicOp->getFlags());
}
/// Handle transforms common to the three shifts, when the shift amount is a
@@ -24473,11 +24477,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
unsigned InsIdx = V.getConstantOperandVal(2);
unsigned NumSubElts = NVT.getVectorMinNumElements();
if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
- TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx)) {
- SDLoc DL(N);
+ TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
+ InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
- }
}
// Try to move vector bitcast after extract_subv by scaling extraction index:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 57a3f6a..7a9cfdf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1159,8 +1159,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
}
SDValue Unrolled = DAG.UnrollVectorOp(Node);
- for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
- Results.push_back(Unrolled.getValue(I));
+ if (Node->getNumValues() == 1) {
+ Results.push_back(Unrolled);
+ } else {
+ assert(Node->getNumValues() == Unrolled->getNumValues() &&
+ "VectorLegalizer Expand returned wrong number of results!");
+ for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
+ Results.push_back(Unrolled.getValue(I));
+ }
}
SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index fc97266..ca0a957 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1111,9 +1111,11 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
#ifndef NDEBUG
/// VerifySDNode - Check the given SDNode. Aborts if it is invalid.
-static void VerifySDNode(SDNode *N) {
+static void VerifySDNode(SDNode *N, const TargetLowering *TLI) {
switch (N->getOpcode()) {
default:
+ if (N->getOpcode() > ISD::BUILTIN_OP_END)
+ TLI->verifyTargetSDNode(N);
break;
case ISD::BUILD_PAIR: {
EVT VT = N->getValueType(0);
@@ -1157,7 +1159,7 @@ void SelectionDAG::InsertNode(SDNode *N) {
AllNodes.push_back(N);
#ifndef NDEBUG
N->PersistentId = NextPersistentId++;
- VerifySDNode(N);
+ VerifySDNode(N, TLI);
#endif
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
DUL->NodeInserted(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4939976..c938b39 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -544,7 +544,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
- SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
+ Op->getFlags());
return TLO.CombineTo(Op, NewOp);
}
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 5ed67bd..f5dd21cb927 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -68,6 +68,18 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
+static cl::opt<unsigned>
+ TailDupPredSize("tail-dup-pred-size",
+ cl::desc("Maximum predecessors (maximum successors at the "
+ "same time) to consider tail duplicating blocks."),
+ cl::init(16), cl::Hidden);
+
+static cl::opt<unsigned>
+ TailDupSuccSize("tail-dup-succ-size",
+ cl::desc("Maximum successors (maximum predecessors at the "
+ "same time) to consider tail duplicating blocks."),
+ cl::init(16), cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -565,6 +577,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
+ // Duplicating a BB which has both multiple predecessors and successors will
+ // result in a complex CFG and also may cause huge amount of PHI nodes. If we
+ // want to remove this limitation, we have to address
+ // https://github.com/llvm/llvm-project/issues/78578.
+ if (TailBB.pred_size() > TailDupPredSize &&
+ TailBB.succ_size() > TailDupSuccSize)
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f64ded4..6e7b67d 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1809,8 +1809,16 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ for (unsigned i = 0; i < NumParts; ++i) {
+ ISD::ArgFlagsTy OutFlags = Flags;
+ if (NumParts > 1 && i == 0)
+ OutFlags.setSplit();
+ else if (i == NumParts - 1 && i != 0)
+ OutFlags.setSplitEnd();
+
+ Outs.push_back(
+ ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ }
}
}
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 48f4ee2..2c77898 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -16,9 +16,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/VirtRegMap.h"
-#include "LiveDebugVariables.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 22c9e8c..ac19ac7 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -572,12 +572,12 @@ dwarf::findDebugNamesOffsets(uint64_t EndOfHeaderOffset,
Error DWARFDebugNames::NameIndex::extract() {
const DWARFDataExtractor &AS = Section.AccelSection;
- uint64_t hdrSize = Base;
- if (Error E = Hdr.extract(AS, &hdrSize))
+ uint64_t EndOfHeaderOffset = Base;
+ if (Error E = Hdr.extract(AS, &EndOfHeaderOffset))
return E;
const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format);
- Offsets = dwarf::findDebugNamesOffsets(hdrSize, Hdr);
+ Offsets = dwarf::findDebugNamesOffsets(EndOfHeaderOffset, Hdr);
uint64_t Offset =
Offsets.EntryOffsetsBase + (Hdr.NameCount * SectionOffsetSize);
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7fd8474..4d2d352 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4786,11 +4786,9 @@ OpenMPIRBuilder::readTeamBoundsForKernel(const Triple &, Function &Kernel) {
void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
int32_t LB, int32_t UB) {
- if (T.isNVPTX()) {
+ if (T.isNVPTX())
if (UB > 0)
updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
- updateNVPTXMetadata(Kernel, "minctasm", LB, false);
- }
if (T.isAMDGPU())
Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2c480fb..634b2dd 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5341,10 +5341,11 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Triple T(TT);
- // The only data layout upgrades needed for pre-GCN are setting the address
- // space of globals to 1.
- if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
- !DL.starts_with("G")) {
+ // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
+ // the address space of globals to 1. This does not apply to SPIRV Logical.
+ if (((T.isAMDGPU() && !T.isAMDGCN()) ||
+ (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
+ !DL.contains("-G") && !DL.starts_with("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index a5fb497..45b359a 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -316,7 +316,7 @@ bool Constant::isElementWiseEqual(Value *Y) const {
Constant *C0 = ConstantExpr::getBitCast(const_cast<Constant *>(this), IntTy);
Constant *C1 = ConstantExpr::getBitCast(cast<Constant>(Y), IntTy);
Constant *CmpEq = ConstantExpr::getICmp(ICmpInst::ICMP_EQ, C0, C1);
- return isa<UndefValue>(CmpEq) || match(CmpEq, m_One());
+ return isa<PoisonValue>(CmpEq) || match(CmpEq, m_One());
}
static bool
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 8ce9c5c..6aff94f 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -3769,6 +3769,10 @@ static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) {
case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub;
case LLVMAtomicRMWBinOpFMax: return AtomicRMWInst::FMax;
case LLVMAtomicRMWBinOpFMin: return AtomicRMWInst::FMin;
+ case LLVMAtomicRMWBinOpUIncWrap:
+ return AtomicRMWInst::UIncWrap;
+ case LLVMAtomicRMWBinOpUDecWrap:
+ return AtomicRMWInst::UDecWrap;
}
llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!");
@@ -3791,6 +3795,10 @@ static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) {
case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub;
case AtomicRMWInst::FMax: return LLVMAtomicRMWBinOpFMax;
case AtomicRMWInst::FMin: return LLVMAtomicRMWBinOpFMin;
+ case AtomicRMWInst::UIncWrap:
+ return LLVMAtomicRMWBinOpUIncWrap;
+ case AtomicRMWInst::UDecWrap:
+ return LLVMAtomicRMWBinOpUDecWrap;
default: break;
}
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 96953ac..818a167 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -499,15 +499,7 @@ static MutableArrayRef<Argument> makeArgArray(Argument *Args, size_t Count) {
}
bool Function::isConstrainedFPIntrinsic() const {
- switch (getIntrinsicID()) {
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
- case Intrinsic::INTRINSIC:
-#include "llvm/IR/ConstrainedOps.def"
- return true;
-#undef INSTRUCTION
- default:
- return false;
- }
+ return Intrinsic::isConstrainedFPIntrinsic(getIntrinsicID());
}
void Function::clearArguments() {
@@ -1486,6 +1478,18 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
#include "llvm/IR/IntrinsicImpl.inc"
#undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
+bool Intrinsic::isConstrainedFPIntrinsic(ID QID) {
+ switch (QID) {
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
+ return true;
+#undef INSTRUCTION
+ default:
+ return false;
+ }
+}
+
using DeferredIntrinsicMatchPair =
std::pair<Type *, ArrayRef<Intrinsic::IITDescriptor>>;
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 25cb99f..4cd61e6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5122,6 +5122,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
+ if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa_struct))
+ TBAAVerifyHelper.visitTBAAStructMetadata(I, TBAA);
+
if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias))
visitAliasScopeListMetadata(MD);
if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope))
@@ -5795,6 +5798,11 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
+ case Intrinsic::vastart: {
+ Check(Call.getFunction()->isVarArg(),
+ "va_start called in a non-varargs function");
+ break;
+ }
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
@@ -7453,6 +7461,35 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
return true;
}
+bool TBAAVerifier::visitTBAAStructMetadata(Instruction &I, const MDNode *MD) {
+ CheckTBAA(MD->getNumOperands() % 3 == 0,
+ "tbaa.struct operands must occur in groups of three", &I, MD);
+
+ // Each group of three operands must consist of two integers and a
+ // tbaa node. Moreover, the regions described by the offset and size
+ // operands must be non-overlapping.
+ std::optional<APInt> NextFree;
+ for (unsigned int Idx = 0; Idx < MD->getNumOperands(); Idx += 3) {
+ auto *OffsetCI =
+ mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx));
+ CheckTBAA(OffsetCI, "Offset must be a constant integer", &I, MD);
+
+ auto *SizeCI =
+ mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx + 1));
+ CheckTBAA(SizeCI, "Size must be a constant integer", &I, MD);
+
+ MDNode *TBAA = dyn_cast_or_null<MDNode>(MD->getOperand(Idx + 2));
+ CheckTBAA(TBAA, "TBAA tag missing", &I, MD);
+ visitTBAAMetadata(I, TBAA);
+
+ bool NonOverlapping = !NextFree || NextFree->ule(OffsetCI->getValue());
+ CheckTBAA(NonOverlapping, "Overlapping tbaa.struct regions", &I, MD);
+
+ NextFree = OffsetCI->getValue() + SizeCI->getValue();
+ }
+ return true;
+}
+
char VerifierLegacyPass::ID = 0;
INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index eb3894d..cec5032 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -343,7 +343,7 @@ template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readUnencodedNumber() {
if (Data + sizeof(T) > End) {
return std::error_code();
}
- T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data);
+ T Val = endian::readNext<T, llvm::endianness::little>(Data);
return ErrorOr<T>(Val);
}
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 205bc1e..f343d14 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -215,23 +215,41 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
}
Error Object::compressOrDecompressSections(const CommonConfig &Config) {
- // Build a list of the debug sections we are going to replace.
- // We can't call `AddSection` while iterating over sections,
+ // Build a list of sections we are going to replace.
+ // We can't call `addSection` while iterating over sections,
// because it would mutate the sections array.
SmallVector<std::pair<SectionBase *, std::function<SectionBase *()>>, 0>
ToReplace;
for (SectionBase &Sec : sections()) {
- if ((Sec.Flags & SHF_ALLOC) || !StringRef(Sec.Name).starts_with(".debug"))
+ std::optional<DebugCompressionType> CType;
+ for (auto &[Matcher, T] : Config.compressSections)
+ if (Matcher.matches(Sec.Name))
+ CType = T;
+ // Handle --compress-debug-sections and --decompress-debug-sections, which
+ // apply to non-ALLOC debug sections.
+ if (!(Sec.Flags & SHF_ALLOC) && StringRef(Sec.Name).starts_with(".debug")) {
+ if (Config.CompressionType != DebugCompressionType::None)
+ CType = Config.CompressionType;
+ else if (Config.DecompressDebugSections)
+ CType = DebugCompressionType::None;
+ }
+ if (!CType)
continue;
+
+ if (Sec.ParentSegment)
+ return createStringError(
+ errc::invalid_argument,
+ "section '" + Sec.Name +
+ "' within a segment cannot be (de)compressed");
+
if (auto *CS = dyn_cast<CompressedSection>(&Sec)) {
- if (Config.DecompressDebugSections) {
+ if (*CType == DebugCompressionType::None)
ToReplace.emplace_back(
&Sec, [=] { return &addSection<DecompressedSection>(*CS); });
- }
- } else if (Config.CompressionType != DebugCompressionType::None) {
- ToReplace.emplace_back(&Sec, [&, S = &Sec] {
+ } else if (*CType != DebugCompressionType::None) {
+ ToReplace.emplace_back(&Sec, [=, S = &Sec] {
return &addSection<CompressedSection>(
- CompressedSection(*S, Config.CompressionType, Is64Bits));
+ CompressedSection(*S, *CType, Is64Bits));
});
}
}
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 8b6a003..02591e6 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -548,6 +548,7 @@ CompressedSection::CompressedSection(const SectionBase &Sec,
CompressedData);
Flags |= ELF::SHF_COMPRESSED;
+ OriginalFlags |= ELF::SHF_COMPRESSED;
size_t ChdrSize = Is64Bits ? sizeof(object::Elf_Chdr_Impl<object::ELF64LE>)
: sizeof(object::Elf_Chdr_Impl<object::ELF32LE>);
Size = ChdrSize + CompressedData.size();
@@ -2161,6 +2162,10 @@ Error Object::removeSections(
std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) {
if (ToRemove(*Sec))
return false;
+ // TODO: A compressed relocation section may be recognized as
+ // RelocationSectionBase. We don't want such a section to be removed.
+ if (isa<CompressedSection>(Sec))
+ return true;
if (auto RelSec = dyn_cast<RelocationSectionBase>(Sec.get())) {
if (auto ToRelSec = RelSec->getSection())
return !ToRemove(*ToRelSec);
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index a5abf63..f9ba80b 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1135,9 +1135,9 @@ static T swapToHostOrder(const unsigned char *&D, llvm::endianness Orig) {
using namespace support;
if (Orig == llvm::endianness::little)
- return endian::readNext<T, llvm::endianness::little, unaligned>(D);
+ return endian::readNext<T, llvm::endianness::little>(D);
else
- return endian::readNext<T, llvm::endianness::big, unaligned>(D);
+ return endian::readNext<T, llvm::endianness::big>(D);
}
static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index a35366a..8574a96 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -115,10 +115,9 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
uint64_t BILen = 0;
if (Endian == llvm::endianness::little)
- BILen =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(BI);
+ BILen = endian::readNext<uint64_t, llvm::endianness::little>(BI);
else
- BILen = endian::readNext<uint64_t, llvm::endianness::big, unaligned>(BI);
+ BILen = endian::readNext<uint64_t, llvm::endianness::big>(BI);
if (BILen == 0)
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -923,8 +922,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
// Read hash.
if (D + sizeof(uint64_t) >= End)
return data_type();
- uint64_t Hash =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
+ uint64_t Hash = endian::readNext<uint64_t, llvm::endianness::little>(D);
// Initialize number of counters for GET_VERSION(FormatVersion) == 1.
uint64_t CountsSize = N / sizeof(uint64_t) - 1;
@@ -932,8 +930,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
if (D + sizeof(uint64_t) > End)
return data_type();
- CountsSize =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
+ CountsSize = endian::readNext<uint64_t, llvm::endianness::little>(D);
}
// Read counter values.
if (D + CountsSize * sizeof(uint64_t) > End)
@@ -943,15 +940,14 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
CounterBuffer.reserve(CountsSize);
for (uint64_t J = 0; J < CountsSize; ++J)
CounterBuffer.push_back(
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D));
+ endian::readNext<uint64_t, llvm::endianness::little>(D));
// Read bitmap bytes for GET_VERSION(FormatVersion) > 10.
if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) {
uint64_t BitmapBytes = 0;
if (D + sizeof(uint64_t) > End)
return data_type();
- BitmapBytes =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
+ BitmapBytes = endian::readNext<uint64_t, llvm::endianness::little>(D);
// Read bitmap byte values.
if (D + BitmapBytes * sizeof(uint8_t) > End)
return data_type();
@@ -959,8 +955,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
BitmapByteBuffer.reserve(BitmapBytes);
for (uint64_t J = 0; J < BitmapBytes; ++J)
BitmapByteBuffer.push_back(static_cast<uint8_t>(
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(
- D)));
+ endian::readNext<uint64_t, llvm::endianness::little>(D)));
}
DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer),
@@ -1256,8 +1251,7 @@ Error IndexedInstrProfReader::readHeader() {
// memprof::MemProfVersion0 or the MemProf version number in
// memprof::MemProfVersion1.
const uint64_t FirstWord =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
memprof::IndexedVersion Version = memprof::Version0;
if (FirstWord == memprof::Version1) {
@@ -1282,17 +1276,15 @@ Error IndexedInstrProfReader::readHeader() {
const uint64_t RecordTableOffset =
Version == memprof::Version0
? FirstWord
- : support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ : support::endian::readNext<uint64_t, llvm::endianness::little>(
+ Ptr);
// The offset in the stream right before invoking
// FrameTableGenerator.Emit.
const uint64_t FramePayloadOffset =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The value returned from FrameTableGenerator.Emit.
const uint64_t FrameTableOffset =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
@@ -1330,8 +1322,7 @@ Error IndexedInstrProfReader::readHeader() {
const unsigned char *Ptr = Start + BinaryIdOffset;
// Read binary ids size.
BinaryIdsSize =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (BinaryIdsSize % sizeof(uint64_t))
return error(instrprof_error::bad_header);
// Set the binary ids start.
@@ -1348,8 +1339,7 @@ Error IndexedInstrProfReader::readHeader() {
const unsigned char *Ptr = Start + VTableNamesOffset;
CompressedVTableNamesLen =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// Writer first writes the length of compressed string, and then the actual
// content.
@@ -1369,29 +1359,24 @@ Error IndexedInstrProfReader::readHeader() {
if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
return error(instrprof_error::truncated);
const uint64_t NumTraces =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
TemporalProfTraceStreamSize =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (unsigned i = 0; i < NumTraces; i++) {
// Expect at least two 64 bit fields: Weight and NumFunctions
if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
return error(instrprof_error::truncated);
TemporalProfTraceTy Trace;
Trace.Weight =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
const uint64_t NumFunctions =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// Expect at least NumFunctions 64 bit fields
if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
return error(instrprof_error::truncated);
for (unsigned j = 0; j < NumFunctions; j++) {
const uint64_t NameRef =
- support::endian::readNext<uint64_t, llvm::endianness::little,
- unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Trace.FunctionNameRefs.push_back(NameRef);
}
TemporalProfTraces.push_back(std::move(Trace));
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 96aeedf..8e0402d 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -42,7 +42,8 @@ size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
}
static size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
- size_t Result = sizeof(GlobalValue::GUID);
+ // The number of alloc sites to serialize.
+ size_t Result = sizeof(uint64_t);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version0);
@@ -57,7 +58,8 @@ static size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
}
static size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
- size_t Result = sizeof(GlobalValue::GUID);
+ // The number of alloc sites to serialize.
+ size_t Result = sizeof(uint64_t);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version2);
@@ -142,14 +144,14 @@ static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
// Read the meminfo nodes.
const uint64_t NumNodes =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
const uint64_t NumFrames =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t J = 0; J < NumFrames; J++) {
const FrameId Id =
- endian::readNext<FrameId, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<FrameId, llvm::endianness::little>(Ptr);
Node.CallStack.push_back(Id);
}
Node.CSId = hashCallStack(Node.CallStack);
@@ -160,15 +162,15 @@ static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
// Read the callsite information.
const uint64_t NumCtxs =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t J = 0; J < NumCtxs; J++) {
const uint64_t NumFrames =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
llvm::SmallVector<FrameId> Frames;
Frames.reserve(NumFrames);
for (uint64_t K = 0; K < NumFrames; K++) {
const FrameId Id =
- endian::readNext<FrameId, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<FrameId, llvm::endianness::little>(Ptr);
Frames.push_back(Id);
}
Record.CallSites.push_back(Frames);
@@ -186,11 +188,10 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
// Read the meminfo nodes.
const uint64_t NumNodes =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
- Node.CSId =
- endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
+ Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
Node.Info.deserialize(Schema, Ptr);
Ptr += PortableMemInfoBlock::serializedSize();
Record.AllocSites.push_back(Node);
@@ -198,10 +199,10 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
// Read the callsite information.
const uint64_t NumCtxs =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t J = 0; J < NumCtxs; J++) {
CallStackId CSId =
- endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
Record.CallSiteIds.push_back(CSId);
}
@@ -222,6 +223,24 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
llvm_unreachable("unsupported MemProf version");
}
+MemProfRecord IndexedMemProfRecord::toMemProfRecord(
+ std::function<const llvm::SmallVector<Frame>(const CallStackId)> Callback)
+ const {
+ MemProfRecord Record;
+
+ for (const memprof::IndexedAllocationInfo &IndexedAI : AllocSites) {
+ memprof::AllocationInfo AI;
+ AI.Info = IndexedAI.Info;
+ AI.CallStack = Callback(IndexedAI.CSId);
+ Record.AllocSites.push_back(AI);
+ }
+
+ for (memprof::CallStackId CSId : CallSiteIds)
+ Record.CallSites.push_back(Callback(CSId));
+
+ return Record;
+}
+
GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
// Canonicalize the function name to drop suffixes such as ".llvm.". Note
// we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
@@ -243,7 +262,7 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
const unsigned char *Ptr = Buffer;
const uint64_t NumSchemaIds =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
return make_error<InstrProfError>(instrprof_error::malformed,
"memprof schema invalid");
@@ -252,7 +271,7 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
MemProfSchema Result;
for (size_t I = 0; I < NumSchemaIds; I++) {
const uint64_t Tag =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (Tag >= static_cast<uint64_t>(Meta::Size)) {
return make_error<InstrProfError>(instrprof_error::malformed,
"memprof schema invalid");
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 580867a..b4d2c6f 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -86,7 +86,7 @@ llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
llvm::SmallVector<SegmentEntry> Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
Items.push_back(*reinterpret_cast<const SegmentEntry *>(
@@ -100,11 +100,11 @@ readMemInfoBlocks(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
const uint64_t Id =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
Items.push_back({Id, MIB});
// Only increment by size of MIB since readNext implicitly increments.
@@ -117,20 +117,20 @@ CallStackMap readStackInfo(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
CallStackMap Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
const uint64_t StackId =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
const uint64_t NumPCs =
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
SmallVector<uint64_t> CallStack;
CallStack.reserve(NumPCs);
for (uint64_t J = 0; J < NumPCs; J++) {
CallStack.push_back(
- endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr));
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
}
Items[StackId] = CallStack;
@@ -183,6 +183,28 @@ std::string getBuildIdString(const SegmentEntry &Entry) {
}
} // namespace
+MemProfReader::MemProfReader(
+ llvm::DenseMap<FrameId, Frame> FrameIdMap,
+ llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
+ : IdToFrame(std::move(FrameIdMap)),
+ FunctionProfileData(std::move(ProfData)) {
+ // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord
+ // while storing CallStack in CSIdToCallStack.
+ for (auto &KV : FunctionProfileData) {
+ IndexedMemProfRecord &Record = KV.second;
+ for (auto &AS : Record.AllocSites) {
+ CallStackId CSId = hashCallStack(AS.CallStack);
+ AS.CSId = CSId;
+ CSIdToCallStack.insert({CSId, AS.CallStack});
+ }
+ for (auto &CS : Record.CallSites) {
+ CallStackId CSId = hashCallStack(CS);
+ Record.CallSiteIds.push_back(CSId);
+ CSIdToCallStack.insert({CSId, CS});
+ }
+ }
+}
+
Expected<std::unique_ptr<RawMemProfReader>>
RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
bool KeepName) {
@@ -445,6 +467,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
}
CallStackId CSId = hashCallStack(Callstack);
+ CSIdToCallStack.insert({CSId, Callstack});
// We attach the memprof record to each function bottom-up including the
// first non-inline frame.
@@ -467,7 +490,10 @@ Error RawMemProfReader::mapRawProfileToRecords() {
auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
IndexedMemProfRecord &Record = Result.first->second;
for (LocationPtr Loc : Locs) {
+ CallStackId CSId = hashCallStack(*Loc);
+ CSIdToCallStack.insert({CSId, *Loc});
Record.CallSites.push_back(*Loc);
+ Record.CallSiteIds.push_back(CSId);
}
}
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 98d0aa7..f91a0e6 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -503,7 +503,7 @@ ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
}
using namespace support;
- T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data);
+ T Val = endian::readNext<T, llvm::endianness::little>(Data);
return Val;
}
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 7a19d24..fa96740 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -119,6 +119,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"za128rs", {1, 0}},
{"za64rs", {1, 0}},
{"zacas", {1, 0}},
+ {"zama16b", {1, 0}},
{"zawrs", {1, 0}},
{"zba", {1, 0}},
@@ -935,7 +936,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
// The canonical order specified in ISA manual.
// Ref: Table 22.1 in RISC-V User-Level ISA V2.2
- StringRef StdExts = AllStdExts;
char Baseline = Arch[4];
// First letter should be 'e', 'i' or 'g'.
@@ -951,7 +951,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
if (Arch.size() > 5 && isDigit(Arch[5]))
return createStringError(errc::invalid_argument,
"version not supported for 'g'");
- StdExts = StdExts.drop_front(4);
break;
}
@@ -1001,11 +1000,11 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
Exts = Exts.drop_front(ConsumeLength);
Exts.consume_front("_");
- std::vector<std::string> SplittedExts;
- if (auto E = splitExtsByUnderscore(Exts, SplittedExts))
+ std::vector<std::string> SplitExts;
+ if (auto E = splitExtsByUnderscore(Exts, SplitExts))
return std::move(E);
- for (auto &Ext : SplittedExts) {
+ for (auto &Ext : SplitExts) {
StringRef CurrExt = Ext;
while (!CurrExt.empty()) {
if (AllStdExts.contains(CurrExt.front())) {
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index f7d81f4..b70fbe4 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -54,6 +54,7 @@ FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64PointerAuthPass();
FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64MIPeepholeOptPass();
+FunctionPass *createAArch64PostCoalescerPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -93,6 +94,7 @@ void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
+void initializeAArch64PostCoalescerPass(PassRegistry &);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
void initializeAArch64PostSelectOptimizePass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 5cc612e..419c141 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4325,10 +4325,7 @@ AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
MBB.addSuccessor(LoopMBB);
// Update liveins.
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({ExitMBB, LoopMBB});
return ExitMBB->begin();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 51bec36..8027221 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -258,6 +258,11 @@ public:
return SelectSVEAddSubImm(N, VT, Imm, Shift);
}
+ template <MVT::SimpleValueType VT, bool Negate>
+ bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
+ return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
+ }
+
template <MVT::SimpleValueType VT>
bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVECpyDupImm(N, VT, Imm, Shift);
@@ -484,6 +489,8 @@ private:
bool SelectCMP_SWAP(SDNode *N);
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+ bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
+ bool Negate);
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
@@ -4014,6 +4021,56 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
return false;
}
+bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
+ SDValue &Imm, SDValue &Shift,
+ bool Negate) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ SDLoc DL(N);
+ int64_t Val = cast<ConstantSDNode>(N)
+ ->getAPIntValue()
+ .trunc(VT.getFixedSizeInBits())
+ .getSExtValue();
+
+ if (Negate)
+ Val = -Val;
+
+ // Signed saturating instructions treat their immediate operand as unsigned,
+ // whereas the related intrinsics define their operands to be signed. This
+ // means we can only use the immediate form when the operand is non-negative.
+ if (Val < 0)
+ return false;
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ // All positive immediates are supported.
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ return true;
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ // Support 8bit positive immediates.
+ if (Val <= 255) {
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ return true;
+ }
+ // Support 16bit positive immediates that are a multiple of 256.
+ if (Val <= 65280 && Val % 256 == 0) {
+ Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
SDValue &Shift) {
if (!isa<ConstantSDNode>(N))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80181a7..7947d73 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5035,8 +5035,8 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
-static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
- bool IsLess, bool IsEqual) {
+static SDValue optimizeIncrementingWhile(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsEqual) {
if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
!isa<ConstantSDNode>(Op.getOperand(2)))
return SDValue();
@@ -5044,12 +5044,9 @@ static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
SDLoc dl(Op);
APInt X = Op.getConstantOperandAPInt(1);
APInt Y = Op.getConstantOperandAPInt(2);
- APInt NumActiveElems;
bool Overflow;
- if (IsLess)
- NumActiveElems = IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
- else
- NumActiveElems = IsSigned ? X.ssub_ov(Y, Overflow) : X.usub_ov(Y, Overflow);
+ APInt NumActiveElems =
+ IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
if (Overflow)
return SDValue();
@@ -5396,29 +5393,17 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
case Intrinsic::aarch64_sve_whilelo:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
- /*IsEqual=*/false);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
+ /*IsEqual=*/false);
case Intrinsic::aarch64_sve_whilelt:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
- /*IsEqual=*/false);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true,
+ /*IsEqual=*/false);
case Intrinsic::aarch64_sve_whilels:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
- /*IsEqual=*/true);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
+ /*IsEqual=*/true);
case Intrinsic::aarch64_sve_whilele:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
- /*IsEqual=*/true);
- case Intrinsic::aarch64_sve_whilege:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
- /*IsEqual=*/true);
- case Intrinsic::aarch64_sve_whilegt:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
- /*IsEqual=*/false);
- case Intrinsic::aarch64_sve_whilehs:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
- /*IsEqual=*/true);
- case Intrinsic::aarch64_sve_whilehi:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
- /*IsEqual=*/false);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true,
+ /*IsEqual=*/true);
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
@@ -15903,7 +15888,7 @@ unsigned AArch64TargetLowering::getNumInterleavedAccesses(
unsigned VecSize = 128;
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
- if (UseScalable)
+ if (UseScalable && isa<FixedVectorType>(VecTy))
VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
}
@@ -27883,3 +27868,46 @@ bool AArch64TargetLowering::hasInlineStackProbe(
return !Subtarget->isTargetWindows() &&
MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
}
+
+#ifndef NDEBUG
+void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case AArch64ISD::SUNPKLO:
+ case AArch64ISD::SUNPKHI:
+ case AArch64ISD::UUNPKLO:
+ case AArch64ISD::UUNPKHI: {
+ assert(N->getNumValues() == 1 && "Expected one result!");
+ assert(N->getNumOperands() == 1 && "Expected one operand!");
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ assert(OpVT.isVector() && VT.isVector() && OpVT.isInteger() &&
+ VT.isInteger() && "Expected integer vectors!");
+ assert(OpVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Expected vectors of equal size!");
+ // TODO: Enable assert once bogus creations have been fixed.
+ // assert(OpVT.getVectorElementCount() == VT.getVectorElementCount()*2 &&
+ // "Expected result vector with half the lanes of its input!");
+ break;
+ }
+ case AArch64ISD::TRN1:
+ case AArch64ISD::TRN2:
+ case AArch64ISD::UZP1:
+ case AArch64ISD::UZP2:
+ case AArch64ISD::ZIP1:
+ case AArch64ISD::ZIP2: {
+ assert(N->getNumValues() == 1 && "Expected one result!");
+ assert(N->getNumOperands() == 2 && "Expected two operands!");
+ EVT VT = N->getValueType(0);
+ EVT Op0VT = N->getOperand(0).getValueType();
+ EVT Op1VT = N->getOperand(1).getValueType();
+ assert(VT.isVector() && Op0VT.isVector() && Op1VT.isVector() &&
+ "Expected vectors!");
+ // TODO: Enable assert once bogus creations have been fixed.
+ // assert(VT == Op0VT && VT == Op1VT && "Expected matching vectors!");
+ break;
+ }
+ }
+}
+#endif
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 18439dc..db6e8a0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -998,6 +998,10 @@ public:
/// True if stack clash protection is enabled for this functions.
bool hasInlineStackProbe(const MachineFunction &MF) const override;
+#ifndef NDEBUG
+ void verifyTargetSDNode(const SDNode *N) const override;
+#endif
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 92647cb..9518d57 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9556,15 +9556,8 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
MBB.addSuccessor(LoopTestMBB);
// Update liveins.
- if (MF.getRegInfo().reservedRegsFrozen()) {
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*ExitMBB) ||
- recomputeLiveIns(*LoopBodyMBB) ||
- recomputeLiveIns(*LoopTestMBB);
- } while (anyChange);
- ;
- }
+ if (MF.getRegInfo().reservedRegsFrozen())
+ fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
return ExitMBB->begin();
}
diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
new file mode 100644
index 0000000..dd5234c
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
@@ -0,0 +1,101 @@
+//===- AArch64PostCoalescerPass.cpp - AArch64 Post Coalescer pass ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-post-coalescer-pass"
+
+namespace {
+
+struct AArch64PostCoalescer : public MachineFunctionPass {
+ static char ID;
+
+ AArch64PostCoalescer() : MachineFunctionPass(ID) {
+ initializeAArch64PostCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+ MachineRegisterInfo *MRI;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "AArch64 Post Coalescer pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+char AArch64PostCoalescer::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
+ "AArch64 Post Coalescer Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
+ "AArch64 Post Coalescer Pass", false, false)
+
+bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ if (!FuncInfo->hasStreamingModeChanges())
+ return false;
+
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : make_early_inc_range(MBB)) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::COALESCER_BARRIER_FPR16:
+ case AArch64::COALESCER_BARRIER_FPR32:
+ case AArch64::COALESCER_BARRIER_FPR64:
+ case AArch64::COALESCER_BARRIER_FPR128: {
+ Register Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ if (Src != Dst)
+ MRI->replaceRegWith(Dst, Src);
+
+ // MI must be erased from the basic block before recalculating the live
+ // interval.
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+
+ LIS->removeInterval(Src);
+ LIS->createAndComputeVirtRegInterval(Src);
+
+ Changed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64PostCoalescerPass() {
+ return new AArch64PostCoalescer();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 9c74719..6972acd 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -554,9 +554,9 @@ let Predicates = [HasSVEorSME] in {
defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>;
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>;
defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
- defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
+ defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat, ssubsat>;
defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
- defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
+ defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>;
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7d4a57d..7ef78cb 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -235,6 +235,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PointerAuthPass(*PR);
+ initializeAArch64PostCoalescerPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerLoweringPass(*PR);
initializeAArch64PostSelectOptimizePass(*PR);
@@ -539,6 +540,7 @@ public:
void addPreEmitPass() override;
void addPostBBSections() override;
void addPreEmitPass2() override;
+ bool addRegAssignAndRewriteOptimized() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -876,6 +878,11 @@ void AArch64PassConfig::addPreEmitPass2() {
addPass(createUnpackMachineBundles(nullptr));
}
+bool AArch64PassConfig::addRegAssignAndRewriteOptimized() {
+ addPass(createAArch64PostCoalescerPass());
+ return TargetPassConfig::addRegAssignAndRewriteOptimized();
+}
+
MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 95b228f..8e76f6c 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -73,6 +73,7 @@ add_llvm_target(AArch64CodeGen
AArch64MIPeepholeOpt.cpp
AArch64MCInstLower.cpp
AArch64PointerAuth.cpp
+ AArch64PostCoalescerPass.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 96ded69..661ea15 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -263,23 +263,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.scalarize(0);
getActionDefinitionsBuilder(G_INTRINSIC_LRINT)
- // If we don't have full FP16 support, then scalarize the elements of
- // vectors containing fp16 types.
- .fewerElementsIf(
- [=, &ST](const LegalityQuery &Query) {
- const auto &Ty = Query.Types[0];
- return Ty.isVector() && Ty.getElementType() == s16 &&
- !ST.hasFullFP16();
- },
- [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
- // If we don't have full FP16 support, then widen s16 to s32 if we
- // encounter it.
- .widenScalarIf(
- [=, &ST](const LegalityQuery &Query) {
- return Query.Types[0] == s16 && !ST.hasFullFP16();
- },
- [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
- .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
+ .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
+ .libcallFor({{s64, s128}})
+ .minScalarOrElt(1, MinFPScalar);
getActionDefinitionsBuilder(
{G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 58d000b..d5c4ce1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
}
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
const MachineInstr &MI) const {
@@ -829,6 +792,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_INTRINSIC_LRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index fb0c618..3317cf8 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -249,6 +249,16 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
+def SVEAddSubSSatNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
+def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
+def SVEAddSubSSatNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, true>", []>;
+def SVEAddSubSSatNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, true>", []>;
+
+def SVEAddSubSSatPosImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, false>", []>;
+def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
+def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
+def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
+
def SVECpyDupImm8Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
def SVECpyDupImm32Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i32>", []>;
@@ -4775,6 +4785,24 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator inv_op> {
+ def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
+ def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
+ def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
+ def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubSSatPosImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatPosImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatPosImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatPosImm64Pat, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, inv_op, ZPR8, i32, SVEAddSubSSatNegImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubSSatNegImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubSSatNegImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubSSatNegImm64Pat, !cast<Instruction>(NAME # _D)>;
+}
+
class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f283af6..db69d50 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -59,6 +59,12 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
+ // Always lower memset, memcpy, and memmove intrinsics to load/store
+ // instructions, rather then generating calls to memset, mempcy or memmove.
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = ~0U;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = ~0U;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = ~0U;
+
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::LOAD, MVT::f32, Promote);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 595f096..c8bf9dd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1052,21 +1052,18 @@ public:
void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) {
KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id");
- SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
- if (!Tmp.back())
- return;
-
+ SmallVector<Function *> WorkList({CG[KernelRoot]->getFunction()});
SmallPtrSet<Function *, 8> Visited;
bool SeenUnknownCall = false;
- do {
- Function *F = Tmp.pop_back_val();
+ while (!WorkList.empty()) {
+ Function *F = WorkList.pop_back_val();
- for (auto &N : *CG[F]) {
- if (!N.second)
+ for (auto &CallRecord : *CG[F]) {
+ if (!CallRecord.second)
continue;
- Function *Callee = N.second->getFunction();
+ Function *Callee = CallRecord.second->getFunction();
if (!Callee) {
if (!SeenUnknownCall) {
SeenUnknownCall = true;
@@ -1074,21 +1071,21 @@ public:
// If we see any indirect calls, assume nothing about potential
// targets.
// TODO: This could be refined to possible LDS global users.
- for (auto &N : *CG.getExternalCallingNode()) {
- Function *PotentialCallee = N.second->getFunction();
+ for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee =
+ ExternalCallRecord.second->getFunction();
+ assert(PotentialCallee);
if (!isKernelLDS(PotentialCallee))
PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id");
}
-
- continue;
}
+ } else {
+ Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
+ if (Visited.insert(Callee).second)
+ WorkList.push_back(Callee);
}
-
- Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
- if (Visited.insert(Callee).second)
- Tmp.push_back(Callee);
}
- } while (!Tmp.empty());
+ }
}
DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f7e5521..305a6c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -655,9 +655,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(
PB.registerPipelineStartEPCallback(
[](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
- FPM.addPass(AMDGPUUseNativeCallsPass());
- if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
- FPM.addPass(AMDGPUSimplifyLibCallsPass());
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
if (EnableHipStdPar)
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
@@ -681,6 +678,16 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(
PM.addPass(AMDGPUAlwaysInlinePass());
});
+ PB.registerPeepholeEPCallback(
+ [](FunctionPassManager &FPM, OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0)
+ return;
+
+ FPM.addPass(AMDGPUUseNativeCallsPass());
+ if (EnableLibCallSimplify)
+ FPM.addPass(AMDGPUSimplifyLibCallsPass());
+ });
+
PB.registerCGSCCOptimizerLateEPCallback(
[this](CGSCCPassManager &PM, OptimizationLevel Level) {
if (Level == OptimizationLevel::O0)
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 05e10a9..1dda1b8 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -101,6 +101,7 @@ public:
}
}
+ SII->fixImplicitOperands(*VOPDInst);
for (auto CompIdx : VOPD::COMPONENTS)
VOPDInst.copyImplicitOps(*MI[CompIdx]);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 245731a..acb54fd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -612,13 +612,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve null register - it shall never be allocated
reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
- // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
- // will result in bugs.
- if (isWave32) {
- Reserved.set(AMDGPU::VCC);
- Reserved.set(AMDGPU::VCC_HI);
- }
-
// Reserve SGPRs.
//
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index b13ddf6..26ef295 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -27,7 +27,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/CodeGen.h"
#include <optional>
@@ -42,6 +41,7 @@ class FunctionLoweringInfo;
class GlobalValue;
class InstrItineraryData;
class Instruction;
+class IRBuilderBase;
class MachineBasicBlock;
class MachineInstr;
class SelectionDAG;
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 8629551..ea5dd54 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -1806,13 +1806,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
DFS.ProcessLoop();
const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
- bool anyChange = false;
- do {
- anyChange = false;
- for (auto *MBB : PostOrder) {
- anyChange = recomputeLiveIns(*MBB) || anyChange;
- }
- } while (anyChange);
+ fullyRecomputeLiveIns(PostOrder);
for (auto *MBB : reverse(PostOrder))
recomputeLivenessFlags(*MBB);
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index 1efda5d..8c1b332 100644
--- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 083f25f..fc2834c 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -286,6 +286,25 @@ MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
+bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // BTI clearing instructions shall not take part in scheduling regions as
+ // they must stay in their intended place. Although PAC isn't BTI clearing,
+ // it can be transformed into PACBTI after the pre-RA Machine Scheduling
+ // has taken place, so its movement must also be restricted.
+ switch (MI.getOpcode()) {
+ case ARM::t2BTI:
+ case ARM::t2PAC:
+ case ARM::t2PACBTI:
+ case ARM::t2SG:
+ return true;
+ default:
+ break;
+ }
+ return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, Register DestReg,
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 4bb412f..8915da8 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -68,6 +68,10 @@ public:
unsigned OpIdx1,
unsigned OpIdx2) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index c13b10a..285d5c2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4876,20 +4876,19 @@ bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
- // Require a 12 or 14 bit signed offset.
- if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
+ // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
+ // with `UAL` feature.
+ if (!isInt<12>(AM.BaseOffs) &&
+ !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
return false;
switch (AM.Scale) {
case 0:
- // "i" is not allowed.
- if (!AM.HasBaseReg)
- return false;
- // Otherwise we have "r+i".
+ // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
// "r+r+i" is not allowed.
- if (AM.HasBaseReg && AM.BaseOffs != 0)
+ if (AM.HasBaseReg && AM.BaseOffs)
return false;
// Otherwise we have "r+r" or "r+i".
break;
@@ -4897,7 +4896,7 @@ bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// "2*r+r" or "2*r+i" is not allowed.
if (AM.HasBaseReg || AM.BaseOffs)
return false;
- // Otherwise we have "r+r".
+ // Allow "2*r" as "r+r".
break;
default:
return false;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index de492f2..98f5014 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -226,11 +226,8 @@ bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign(
MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align));
const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec];
if (MCSym == nullptr) {
- // Create a symbol and make the value of symbol is zero.
- MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align");
- Sym->setFragment(&*Sec->getBeginSymbol()->getFragment());
- Asm.registerSymbol(*Sym);
- MCSym = MCSymbolRefExpr::create(Sym, Ctx);
+ // Use section symbol directly.
+ MCSym = MCSymbolRefExpr::create(Sec->getBeginSymbol(), Ctx);
getSecToAlignSym()[Sec] = MCSym;
}
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6af1fd8..62b58cb 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
}
}
-// Instructions where all register operands are floating point.
-static bool isFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- return true;
- default:
- return false;
- }
-}
-
// Instructions where use operands are floating point registers.
// Def operands are general purpose.
static bool isFloatingPointOpcodeUse(unsigned Opc) {
@@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) {
case TargetOpcode::G_FCMP:
return true;
default:
- return isFloatingPointOpcode(Opc);
+ return isPreISelGenericFloatingPointOpcode(Opc);
}
}
@@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) {
case TargetOpcode::G_UITOFP:
return true;
default:
- return isFloatingPointOpcode(Opc);
+ return isPreISelGenericFloatingPointOpcode(Opc);
}
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 6aeef145..125a49d 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -13,6 +13,7 @@
#include "PPCRegisterBankInfo.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands);
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
static bool isFPIntrinsic(unsigned ID) {
// TODO: Add more intrinsics.
diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index b43eee8..b3cfcb2 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -208,10 +208,7 @@ bool PPCExpandAtomicPseudo::expandAtomicRMW128(
.addMBB(LoopMBB);
CurrentMBB->addSuccessor(LoopMBB);
CurrentMBB->addSuccessor(ExitMBB);
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({ExitMBB, LoopMBB});
NMBBI = MBB.end();
MI.eraseFromParent();
return true;
@@ -288,11 +285,7 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
CurrentMBB->addSuccessor(LoopCmpMBB);
CurrentMBB->addSuccessor(ExitMBB);
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*CmpSuccMBB) ||
- recomputeLiveIns(*LoopCmpMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({ExitMBB, CmpSuccMBB, LoopCmpMBB});
NMBBI = MBB.end();
MI.eraseFromParent();
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 6dcb59a..04e9f9e 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1435,11 +1435,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
}
// Update liveins.
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*ProbeExitMBB) ||
- recomputeLiveIns(*ProbeLoopBodyMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB});
return ProbeExitMBB;
};
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
@@ -1531,10 +1527,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
}
// Update liveins.
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({ExitMBB, LoopMBB});
}
}
++NumPrologProbed;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 45e19cd..c18892a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -34,14 +34,15 @@ private:
// Whether this is assigning args for a return.
bool IsRet;
- // true if assignArg has been called for a mask argument, false otherwise.
- bool AssignedFirstMaskArg = false;
+ RVVArgDispatcher &RVVDispatcher;
public:
RISCVOutgoingValueAssigner(
- RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
+ RVVArgDispatcher &RVVDispatcher)
: CallLowering::OutgoingValueAssigner(nullptr),
- RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
+ RVVDispatcher(RVVDispatcher) {}
bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
@@ -51,16 +52,9 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
- ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
- FirstMaskArgument = ValNo;
- AssignedFirstMaskArg = true;
- }
-
if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty,
- *Subtarget.getTargetLowering(), FirstMaskArgument))
+ *Subtarget.getTargetLowering(), RVVDispatcher))
return true;
StackSize = State.getStackSize();
@@ -181,14 +175,15 @@ private:
// Whether this is assigning args from a return.
bool IsRet;
- // true if assignArg has been called for a mask argument, false otherwise.
- bool AssignedFirstMaskArg = false;
+ RVVArgDispatcher &RVVDispatcher;
public:
RISCVIncomingValueAssigner(
- RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
+ RVVArgDispatcher &RVVDispatcher)
: CallLowering::IncomingValueAssigner(nullptr),
- RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
+ RVVDispatcher(RVVDispatcher) {}
bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
@@ -201,16 +196,9 @@ public:
if (LocVT.isScalableVector())
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
- ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
- FirstMaskArgument = ValNo;
- AssignedFirstMaskArg = true;
- }
-
if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty,
- *Subtarget.getTargetLowering(), FirstMaskArgument))
+ *Subtarget.getTargetLowering(), RVVDispatcher))
return true;
StackSize = State.getStackSize();
@@ -420,9 +408,11 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC);
+ RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(F.getReturnType())};
RISCVOutgoingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/true);
+ /*IsRet=*/true, Dispatcher);
RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret);
return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
MIRBuilder, CC, F.isVarArg());
@@ -531,6 +521,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CallingConv::ID CC = F.getCallingConv();
SmallVector<ArgInfo, 32> SplitArgInfos;
+ SmallVector<Type *, 4> TypeList;
unsigned Index = 0;
for (auto &Arg : F.args()) {
// Construct the ArgInfo object from destination register and argument type.
@@ -542,12 +533,16 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// correspondingly and appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ TypeList.push_back(Arg.getType());
+
++Index;
}
+ RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(TypeList)};
RISCVIncomingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/false);
+ /*IsRet=*/false, Dispatcher);
RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());
SmallVector<CCValAssign, 16> ArgLocs;
@@ -585,11 +580,13 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<Type *, 4> TypeList;
for (auto &AInfo : Info.OrigArgs) {
// Handle any required unmerging of split value types from a given VReg into
// physical registers. ArgInfo objects are constructed correspondingly and
// appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ TypeList.push_back(AInfo.Ty);
}
// TODO: Support tail calls.
@@ -607,9 +604,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
+ RVVArgDispatcher ArgDispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(TypeList)};
RISCVOutgoingValueAssigner ArgAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/false);
+ /*IsRet=*/false, ArgDispatcher);
RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos,
MIRBuilder, CC, Info.IsVarArg))
@@ -637,9 +636,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
+ RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(F.getReturnType())};
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/true);
+ /*IsRet=*/true, RetDispatcher);
RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, CC, Info.IsVarArg))
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 86e4434..c1fde73 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -110,6 +110,8 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT Ty) const {
switch (RC.getID()) {
default:
+ if (RISCVRI::isVRegClass(RC.TSFlags))
+ return getRegBank(RISCV::VRBRegBankID);
llvm_unreachable("Register class not supported");
case RISCV::GPRRegClassID:
case RISCV::GPRF16RegClassID:
@@ -131,20 +133,6 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case RISCV::FPR64CRegClassID:
case RISCV::FPR32CRegClassID:
return getRegBank(RISCV::FPRBRegBankID);
- case RISCV::VMRegClassID:
- case RISCV::VRRegClassID:
- case RISCV::VRNoV0RegClassID:
- case RISCV::VRM2RegClassID:
- case RISCV::VRM2NoV0RegClassID:
- case RISCV::VRM4RegClassID:
- case RISCV::VRM4NoV0RegClassID:
- case RISCV::VMV0RegClassID:
- case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID:
- case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID:
- case RISCV::VRM8RegClassID:
- case RISCV::VRM8NoV0RegClassID:
- case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID:
- return getRegBank(RISCV::VRBRegBankID);
}
}
@@ -154,46 +142,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
return &RISCV::ValueMappings[Idx];
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOPYSIGN:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
// TODO: Make this more like AArch64?
bool RISCVRegisterBankInfo::hasFPConstraints(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 173995f..d93709a 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -326,8 +326,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
.setMemRefs(MMOLo);
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
- // FIXME: Zdinx RV32 can not work on unaligned memory.
- assert(!STI->hasFastUnalignedAccess());
+ // FIXME: Zdinx RV32 can not work on unaligned scalar memory.
+ assert(!STI->enableUnalignedScalarMem());
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 794455a..f830ead 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -208,6 +208,13 @@ def HasStdExtAOrZalrsc
"'A' (Atomic Instructions) or "
"'Zalrsc' (Load-Reserved/Store-Conditional)">;
+def FeatureStdExtZama16b
+ : SubtargetFeature<"zama16b", "HasStdExtZama16b", "true",
+ "'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)">;
+def HasStdExtZama16b : Predicate<"Subtarget->hasStdExtZama16b()">,
+ AssemblerPredicate<(all_of FeatureStdExtZama16b),
+ "'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)">;
+
def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true",
"'Zawrs' (Wait on Reservation Set)">;
def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">,
@@ -1183,10 +1190,15 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
"true",
"Enable trailing fence for seq-cst store.">;
-def FeatureFastUnalignedAccess
- : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess",
- "true", "Has reasonably performant unaligned "
- "loads and stores (both scalar and vector)">;
+def FeatureUnalignedScalarMem
+ : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
+ "true", "Has reasonably performant unaligned scalar "
+ "loads and stores">;
+
+def FeatureUnalignedVectorMem
+ : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
+ "true", "Has reasonably performant unaligned vector "
+ "loads and stores">;
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
@@ -1226,9 +1238,9 @@ def TuneNoSinkSplatOperands
"false", "Disable sink splat operands to enable .vx, .vf,"
".wx, and .wf instructions">;
-def TuneNoStripWSuffix
- : SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false",
- "Disable strip W suffix">;
+def TunePreferWInst
+ : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
+ "Prefer instructions with W suffix">;
def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 71672ed..cb41577 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -435,6 +435,33 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF,
Flag, getStackAlign());
}
+static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
+ SmallVectorImpl<char> &Expr,
+ int FixedOffset, int ScalableOffset,
+ llvm::raw_string_ostream &Comment) {
+ unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true);
+ uint8_t Buffer[16];
+ if (FixedOffset) {
+ Expr.push_back(dwarf::DW_OP_consts);
+ Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer));
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+ Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset);
+ }
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_consts);
+ Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer));
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
+ Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer));
+ Expr.push_back(0);
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_mul);
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+ Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset)
+ << " * vlenb";
+}
+
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
Register Reg,
uint64_t FixedOffset,
@@ -452,30 +479,38 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
else
Comment << printReg(Reg, &TRI);
- uint8_t buffer[16];
- if (FixedOffset) {
- Expr.push_back(dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(FixedOffset, buffer));
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
- Comment << " + " << FixedOffset;
- }
+ appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
+ Comment);
- Expr.push_back((uint8_t)dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(ScalableOffset, buffer));
+ SmallString<64> DefCfaExpr;
+ uint8_t Buffer[16];
+ DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+ DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
+ DefCfaExpr.append(Expr.str());
- unsigned DwarfVlenb = TRI.getDwarfRegNum(RISCV::VLENB, true);
- Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
- Expr.append(buffer, buffer + encodeULEB128(DwarfVlenb, buffer));
- Expr.push_back(0);
+ return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
+ Comment.str());
+}
- Expr.push_back((uint8_t)dwarf::DW_OP_mul);
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
+ Register Reg, uint64_t FixedOffset,
+ uint64_t ScalableOffset) {
+ assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV");
+ SmallString<64> Expr;
+ std::string CommentBuffer;
+ llvm::raw_string_ostream Comment(CommentBuffer);
+ Comment << printReg(Reg, &TRI) << " @ cfa";
- Comment << " + " << ScalableOffset << " * vlenb";
+ // Build up the expression (FixedOffset + ScalableOffset * VLENB).
+ appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
+ Comment);
SmallString<64> DefCfaExpr;
- DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
- DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
+ uint8_t Buffer[16];
+ unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ DefCfaExpr.push_back(dwarf::DW_CFA_expression);
+ DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer));
+ DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
DefCfaExpr.append(Expr.str());
return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
@@ -671,6 +706,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
}
+
+ std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
+ emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
}
if (hasFP(MF)) {
@@ -1492,6 +1530,41 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
return true;
}
+void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
+ if (RVVCSI.empty())
+ return;
+
+ uint64_t FixedSize = getStackSizeWithRVVPadding(*MF);
+ if (!HasFP) {
+ uint64_t ScalarLocalVarSize =
+ MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
+ RVFI->getRVPushStackSize() - RVFI->getVarArgsSaveSize() +
+ RVFI->getRVVPadding();
+ FixedSize -= ScalarLocalVarSize;
+ }
+
+ for (auto &CS : RVVCSI) {
+ // Insert the spill to the stack frame.
+ int FI = CS.getFrameIdx();
+ if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ unsigned CFIIndex = MF->addFrameInst(
+ createDefCFAOffset(*STI.getRegisterInfo(), CS.getReg(), -FixedSize,
+ MFI.getObjectOffset(FI) / 8));
+ BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+}
+
bool RISCVFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 210f8c1..28ab4af 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -88,6 +88,9 @@ private:
void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
int64_t Amount, MachineInstr::MIFlag Flag) const;
+ void emitCalleeSavedRVVPrologCFI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ bool HasFP) const;
std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction &MF) const;
};
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5a57200..b0deb1d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -1484,6 +1485,11 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
return VF > MaxVF || !isPowerOf2_32(VF);
}
+bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
+ return !Subtarget.hasVInstructions() ||
+ VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
+}
+
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
@@ -1918,7 +1924,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
// replace. If we don't support unaligned scalar mem, prefer the constant
// pool.
// TODO: Can the caller pass down the alignment?
- if (!Subtarget.hasFastUnalignedAccess())
+ if (!Subtarget.enableUnalignedScalarMem())
return true;
// Prefer to keep the load if it would require many instructions.
@@ -8718,6 +8724,29 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
}
+static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Op0 = N->getOperand(1);
+ MVT OpVT = Op0.getSimpleValueType();
+ MVT ContainerVT = OpVT;
+ if (OpVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
+ }
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDLoc DL(N);
+ auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
+ if (isOneConstant(N->getOperand(2)))
+ return Res;
+
+ // Convert -1 to VL.
+ SDValue Setcc =
+ DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
+ VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
+ return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
+}
+
static inline void promoteVCIXScalar(const SDValue &Op,
SmallVectorImpl<SDValue> &Operands,
SelectionDAG &DAG) {
@@ -8913,6 +8942,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::experimental_get_vector_length:
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
+ case Intrinsic::experimental_cttz_elts:
+ return lowerCttzElts(Op.getNode(), DAG, Subtarget);
case Intrinsic::riscv_vmv_x_s: {
SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
@@ -10403,14 +10434,10 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
MachineMemOperand *MMO = Load->getMemOperand();
- MachineFunction &MF = DAG.getMachineFunction();
- MMO = MF.getMachineMemOperand(
- MMO, MMO->getPointerInfo(),
- MMO->getMemoryType().isValid()
- ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
- : MMO->getMemoryType());
SDValue NewLoad =
- DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO);
+ DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
+ MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
+ MMO->getAAInfo(), MMO->getRanges());
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
}
@@ -10470,14 +10497,9 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
MachineMemOperand *MMO = Store->getMemOperand();
- MachineFunction &MF = DAG.getMachineFunction();
- MMO = MF.getMachineMemOperand(
- MMO, MMO->getPointerInfo(),
- MMO->getMemoryType().isValid()
- ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
- : MMO->getMemoryType());
return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
- MMO);
+ MMO->getPointerInfo(), MMO->getBaseAlign(),
+ MMO->getFlags(), MMO->getAAInfo());
}
SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
@@ -12336,6 +12358,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
+ case Intrinsic::experimental_cttz_elts: {
+ SDValue Res = lowerCttzElts(N, DAG, Subtarget);
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
+ return;
+ }
case Intrinsic::riscv_orc_b:
case Intrinsic::riscv_brev8:
case Intrinsic::riscv_sha256sig0:
@@ -13363,11 +13391,100 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}
-static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
+// Try to expand a scalar multiply to a faster sequence.
+static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+
EVT VT = N->getValueType(0);
- if (!VT.isVector())
+
+ // LI + MUL is usually smaller than the alternative sequence.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ if (VT != Subtarget.getXLenVT())
+ return SDValue();
+
+ if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
+ return SDValue();
+
+ ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CNode)
+ return SDValue();
+ uint64_t MulAmt = CNode->getZExtValue();
+
+ // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
+ // Matched in tablegen, avoid perturbing patterns.
+ for (uint64_t Divisor : {3, 5, 9})
+ if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor))
+ return SDValue();
+
+ // If this is a power 2 + 2/4/8, we can use a shift followed by a single
+ // shXadd. First check if this a sum of two power of 2s because that's
+ // easy. Then count how many zeros are up to the first bit.
+ if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ScaleShift = llvm::countr_zero(MulAmt);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
+ SDLoc DL(N);
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
+ }
+ }
+
+ // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
+ // Matched in tablegen, avoid perturbing patterns.
+ switch (MulAmt) {
+ case 11:
+ case 13:
+ case 19:
+ case 21:
+ case 25:
+ case 27:
+ case 29:
+ case 37:
+ case 41:
+ case 45:
+ case 73:
+ case 91:
+ return SDValue();
+ default:
+ break;
+ }
+
+ // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
+ if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
+ unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
+ SDLoc DL(N);
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, VT));
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Shift1,
+ DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0)));
+ }
+ }
+
+ return SDValue();
+}
+
+
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return expandMul(N, DAG, DCI, Subtarget);
+
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -15720,7 +15837,7 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
if (WiderElementSize > ST.getELen()/8)
return false;
- if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
+ if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
return false;
for (unsigned i = 0; i < Index->getNumOperands(); i++) {
@@ -15913,7 +16030,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MUL:
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
- return performMULCombine(N, DAG);
+ return performMULCombine(N, DAG, DCI, Subtarget);
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -17642,8 +17759,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
MachineBasicBlock *BB,
- unsigned CVTXOpc,
- unsigned CVTFOpc) {
+ unsigned CVTXOpc) {
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
@@ -17674,6 +17790,85 @@ static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
/*IsImp*/ true));
// Emit a VFCVT_F_X
+ RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+ unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+ // There is no E8 variant for VFCVT_F_X.
+ assert(Log2SEW >= 4);
+ // Since MI (VFROUND) isn't SEW specific, we cannot use a macro to make
+ // handling of different (LMUL, SEW) pairs easier because we need to pull the
+ // SEW immediate from MI, and that information is not avaliable during macro
+ // expansion.
+ unsigned CVTFOpc;
+ if (Log2SEW == 4) {
+ switch (LMul) {
+ case RISCVII::LMUL_1:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E16_MASK;
+ break;
+ case RISCVII::LMUL_2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E16_MASK;
+ break;
+ case RISCVII::LMUL_4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E16_MASK;
+ break;
+ case RISCVII::LMUL_8:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E16_MASK;
+ break;
+ case RISCVII::LMUL_F2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E16_MASK;
+ break;
+ case RISCVII::LMUL_F4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF4_E16_MASK;
+ break;
+ case RISCVII::LMUL_F8:
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+ } else if (Log2SEW == 5) {
+ switch (LMul) {
+ case RISCVII::LMUL_1:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E32_MASK;
+ break;
+ case RISCVII::LMUL_2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E32_MASK;
+ break;
+ case RISCVII::LMUL_4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E32_MASK;
+ break;
+ case RISCVII::LMUL_8:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E32_MASK;
+ break;
+ case RISCVII::LMUL_F2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E32_MASK;
+ break;
+ case RISCVII::LMUL_F4:
+ case RISCVII::LMUL_F8:
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+ } else if (Log2SEW == 6) {
+ switch (LMul) {
+ case RISCVII::LMUL_1:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E64_MASK;
+ break;
+ case RISCVII::LMUL_2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E64_MASK;
+ break;
+ case RISCVII::LMUL_4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E64_MASK;
+ break;
+ case RISCVII::LMUL_8:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E64_MASK;
+ break;
+ case RISCVII::LMUL_F2:
+ case RISCVII::LMUL_F4:
+ case RISCVII::LMUL_F8:
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+ } else {
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+
BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
@@ -17883,23 +18078,17 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Subtarget);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
- RISCV::PseudoVFCVT_F_X_V_M1_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
- RISCV::PseudoVFCVT_F_X_V_M2_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
- RISCV::PseudoVFCVT_F_X_V_M4_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
- RISCV::PseudoVFCVT_F_X_V_M8_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
- RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
- RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
case RISCV::PseudoFROUND_H:
case RISCV::PseudoFROUND_H_INX:
case RISCV::PseudoFROUND_S:
@@ -18078,33 +18267,12 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
return false;
}
-static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
- std::optional<unsigned> FirstMaskArgument,
- CCState &State, const RISCVTargetLowering &TLI) {
- const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
- if (RC == &RISCV::VRRegClass) {
- // Assign the first mask argument to V0.
- // This is an interim calling convention and it may be changed in the
- // future.
- if (FirstMaskArgument && ValNo == *FirstMaskArgument)
- return State.AllocateReg(RISCV::V0);
- return State.AllocateReg(ArgVRs);
- }
- if (RC == &RISCV::VRM2RegClass)
- return State.AllocateReg(ArgVRM2s);
- if (RC == &RISCV::VRM4RegClass)
- return State.AllocateReg(ArgVRM4s);
- if (RC == &RISCV::VRM8RegClass)
- return State.AllocateReg(ArgVRM8s);
- llvm_unreachable("Unhandled register class for ValueType");
-}
-
// Implements the RISC-V calling convention. Returns true upon failure.
bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument) {
+ RVVArgDispatcher &RVVDispatcher) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
@@ -18273,7 +18441,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
else if (ValVT == MVT::f64 && !UseGPRForF64)
Reg = State.AllocateReg(ArgFPR64s);
else if (ValVT.isVector()) {
- Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
+ Reg = RVVDispatcher.getNextPhysReg();
if (!Reg) {
// For return values, the vector must be passed fully via registers or
// via the stack.
@@ -18359,9 +18527,15 @@ void RISCVTargetLowering::analyzeInputArgs(
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Ins);
+ RVVArgDispatcher Dispatcher;
+ if (IsRet) {
+ Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
+ } else {
+ SmallVector<Type *, 4> TypeList;
+ for (const Argument &Arg : MF.getFunction().args())
+ TypeList.push_back(Arg.getType());
+ Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
+ }
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
@@ -18376,7 +18550,7 @@ void RISCVTargetLowering::analyzeInputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
- FirstMaskArgument)) {
+ Dispatcher)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< ArgVT << '\n');
llvm_unreachable(nullptr);
@@ -18390,9 +18564,13 @@ void RISCVTargetLowering::analyzeOutputArgs(
CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
unsigned NumArgs = Outs.size();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Outs);
+ SmallVector<Type *, 4> TypeList;
+ if (IsRet)
+ TypeList.push_back(MF.getFunction().getReturnType());
+ else if (CLI)
+ for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
+ TypeList.push_back(Arg.Ty);
+ RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
@@ -18402,7 +18580,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
- FirstMaskArgument)) {
+ Dispatcher)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< ArgVT << "\n");
llvm_unreachable(nullptr);
@@ -18583,7 +18761,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument) {
+ RVVArgDispatcher &RVVDispatcher) {
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
@@ -18661,13 +18839,14 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
if (LocVT.isVector()) {
- if (unsigned Reg =
- allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
+ MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
+ if (AllocatedVReg) {
// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(
+ CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
@@ -19295,17 +19474,15 @@ bool RISCVTargetLowering::CanLowerReturn(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Outs);
+ RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
- ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
- *this, FirstMaskArgument))
+ ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
+ nullptr, *this, Dispatcher))
return false;
}
return true;
@@ -20486,8 +20663,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
unsigned *Fast) const {
if (!VT.isVector()) {
if (Fast)
- *Fast = Subtarget.hasFastUnalignedAccess();
- return Subtarget.hasFastUnalignedAccess();
+ *Fast = Subtarget.enableUnalignedScalarMem();
+ return Subtarget.enableUnalignedScalarMem();
}
// All vector implementations must support element alignment
@@ -20503,8 +20680,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
// misaligned accesses. TODO: Work through the codegen implications of
// allowing such accesses to be formed, and considered fast.
if (Fast)
- *Fast = Subtarget.hasFastUnalignedAccess();
- return Subtarget.hasFastUnalignedAccess();
+ *Fast = Subtarget.enableUnalignedVectorMem();
+ return Subtarget.enableUnalignedVectorMem();
}
@@ -20539,7 +20716,7 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
// Do we have sufficient alignment for our preferred VT? If not, revert
// to largest size allowed by our alignment criteria.
- if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
+ if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
Align RequiredAlign(PreferredVT.getStoreSize());
if (Op.isFixedDstAlign())
RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
@@ -20731,7 +20908,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
if (!isLegalElementTypeForRVV(ScalarType))
return false;
- if (!Subtarget.hasFastUnalignedAccess() &&
+ if (!Subtarget.enableUnalignedVectorMem() &&
Alignment < ScalarType.getStoreSize())
return false;
@@ -21102,6 +21279,181 @@ unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
return Subtarget.getMinimumJumpTableEntries();
}
+// Handle single arg such as return value.
+template <typename Arg>
+void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
+ // This lambda determines whether an array of types are constructed by
+ // homogeneous vector types.
+ auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
+ // First, extract the first element in the argument type.
+ auto It = ArgList.begin();
+ MVT FirstArgRegType = It->VT;
+
+ // Return if there is no return or the type needs split.
+ if (It == ArgList.end() || It->Flags.isSplit())
+ return false;
+
+ ++It;
+
+ // Return if this argument type contains only 1 element, or it's not a
+ // vector type.
+ if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
+ return false;
+
+ // Second, check if the following elements in this argument type are all the
+ // same.
+ for (; It != ArgList.end(); ++It)
+ if (It->Flags.isSplit() || It->VT != FirstArgRegType)
+ return false;
+
+ return true;
+ };
+
+ if (isHomogeneousScalableVectorType(ArgList)) {
+ // Handle as tuple type
+ RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
+ } else {
+ // Handle as normal vector type
+ bool FirstVMaskAssigned = false;
+ for (const auto &OutArg : ArgList) {
+ MVT RegisterVT = OutArg.VT;
+
+ // Skip non-RVV register type
+ if (!RegisterVT.isVector())
+ continue;
+
+ if (RegisterVT.isFixedLengthVector())
+ RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
+
+ if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
+ RVVArgInfos.push_back({1, RegisterVT, true});
+ FirstVMaskAssigned = true;
+ continue;
+ }
+
+ RVVArgInfos.push_back({1, RegisterVT, false});
+ }
+ }
+}
+
+// Handle multiple args.
+template <>
+void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
+ const DataLayout &DL = MF->getDataLayout();
+ const Function &F = MF->getFunction();
+ LLVMContext &Context = F.getContext();
+
+ bool FirstVMaskAssigned = false;
+ for (Type *Ty : TypeList) {
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (STy && STy->containsHomogeneousScalableVectorTypes()) {
+ Type *ElemTy = STy->getTypeAtIndex(0U);
+ EVT VT = TLI->getValueType(DL, ElemTy);
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
+
+ RVVArgInfos.push_back(
+ {NumRegs * STy->getNumElements(), RegisterVT, false});
+ } else {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
+
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
+ EVT VT = ValueVTs[Value];
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
+
+ // Skip non-RVV register type
+ if (!RegisterVT.isVector())
+ continue;
+
+ if (RegisterVT.isFixedLengthVector())
+ RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
+
+ if (!FirstVMaskAssigned &&
+ RegisterVT.getVectorElementType() == MVT::i1) {
+ RVVArgInfos.push_back({1, RegisterVT, true});
+ FirstVMaskAssigned = true;
+ --NumRegs;
+ }
+
+ RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
+ }
+ }
+ }
+}
+
+void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
+ unsigned StartReg) {
+ assert((StartReg % LMul) == 0 &&
+ "Start register number should be multiple of lmul");
+ const MCPhysReg *VRArrays;
+ switch (LMul) {
+ default:
+ report_fatal_error("Invalid lmul");
+ case 1:
+ VRArrays = ArgVRs;
+ break;
+ case 2:
+ VRArrays = ArgVRM2s;
+ break;
+ case 4:
+ VRArrays = ArgVRM4s;
+ break;
+ case 8:
+ VRArrays = ArgVRM8s;
+ break;
+ }
+
+ for (unsigned i = 0; i < NF; ++i)
+ if (StartReg)
+ AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
+ else
+ AllocatedPhysRegs.push_back(MCPhysReg());
+}
+
+/// This function determines if each RVV argument is passed by register, if the
+/// argument can be assigned to a VR, then give it a specific register.
+/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
+void RVVArgDispatcher::compute() {
+ uint32_t AssignedMap = 0;
+ auto allocate = [&](const RVVArgInfo &ArgInfo) {
+ // Allocate first vector mask argument to V0.
+ if (ArgInfo.FirstVMask) {
+ AllocatedPhysRegs.push_back(RISCV::V0);
+ return;
+ }
+
+ unsigned RegsNeeded = divideCeil(
+ ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
+ unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
+ for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
+ StartReg += RegsNeeded) {
+ uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
+ if ((AssignedMap & Map) == 0) {
+ allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
+ AssignedMap |= Map;
+ return;
+ }
+ }
+
+ allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
+ };
+
+ for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
+ allocate(RVVArgInfos[i]);
+}
+
+MCPhysReg RVVArgDispatcher::getNextPhysReg() {
+ assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
+ return AllocatedPhysRegs[CurIdx++];
+}
+
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ace5b3f..b10da3d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -24,6 +24,7 @@ namespace llvm {
class InstructionCost;
class RISCVSubtarget;
struct RISCVRegisterInfo;
+class RVVArgDispatcher;
namespace RISCVISD {
// clang-format off
@@ -875,7 +876,7 @@ public:
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
@@ -986,6 +987,8 @@ private:
bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
bool IsScalable) const override;
+ bool shouldExpandCttzElements(EVT VT) const override;
+
/// RVV code generation for fixed length vectors does not lower all
/// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
/// merge. However, merging them creates a BUILD_VECTOR that is just as
@@ -1015,19 +1018,71 @@ private:
unsigned getMinimumJumpTableEntries() const override;
};
+/// As per the spec, the rules for passing vector arguments are as follows:
+///
+/// 1. For the first vector mask argument, use v0 to pass it.
+/// 2. For vector data arguments or rest vector mask arguments, starting from
+/// the v8 register, if a vector register group between v8-v23 that has not been
+/// allocated can be found and the first register number is a multiple of LMUL,
+/// then allocate this vector register group to the argument and mark these
+/// registers as allocated. Otherwise, pass it by reference and are replaced in
+/// the argument list with the address.
+/// 3. For tuple vector data arguments, starting from the v8 register, if
+/// NFIELDS consecutive vector register groups between v8-v23 that have not been
+/// allocated can be found and the first register number is a multiple of LMUL,
+/// then allocate these vector register groups to the argument and mark these
+/// registers as allocated. Otherwise, pass it by reference and are replaced in
+/// the argument list with the address.
+class RVVArgDispatcher {
+public:
+ static constexpr unsigned NumArgVRs = 16;
+
+ struct RVVArgInfo {
+ unsigned NF;
+ MVT VT;
+ bool FirstVMask = false;
+ };
+
+ template <typename Arg>
+ RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
+ ArrayRef<Arg> ArgList)
+ : MF(MF), TLI(TLI) {
+ constructArgInfos(ArgList);
+ compute();
+ }
+
+ RVVArgDispatcher() = default;
+
+ MCPhysReg getNextPhysReg();
+
+private:
+ SmallVector<RVVArgInfo, 4> RVVArgInfos;
+ SmallVector<MCPhysReg, 4> AllocatedPhysRegs;
+
+ const MachineFunction *MF = nullptr;
+ const RISCVTargetLowering *TLI = nullptr;
+
+ unsigned CurIdx = 0;
+
+ template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
+ void compute();
+ void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
+ unsigned StartReg = 0);
+};
+
namespace RISCV {
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a14f9a2..aab91ad 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -468,6 +468,7 @@ public:
bool isUnknown() const { return State == Unknown; }
void setAVLReg(Register Reg) {
+ assert(Reg.isVirtual() || Reg == RISCV::X0 || Reg == RISCV::NoRegister);
AVLReg = Reg;
State = AVLIsReg;
}
@@ -1514,17 +1515,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
- if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
- if (AVL.getReg().isPhysical())
- return false;
- if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())
- return false;
- }
+ if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
+ (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
+ return false;
}
- if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
- return false;
-
+ assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
auto PriorVType = PrevMI.getOperand(2).getImm();
auto VType = MI.getOperand(2).getImm();
return areCompatibleVTYPEs(PriorVType, VType, Used);
@@ -1545,9 +1541,9 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
continue;
}
- Register VRegDef = MI.getOperand(0).getReg();
- if (VRegDef != RISCV::X0 &&
- !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
+ Register RegDef = MI.getOperand(0).getReg();
+ assert(RegDef == RISCV::X0 || RegDef.isVirtual());
+ if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
Used.demandVL();
if (NextMI) {
@@ -1555,7 +1551,9 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
+ }
+
+ if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
MI.getOperand(0).setIsDead(false);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 508f607..8331fc0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -361,15 +361,12 @@ void RISCVInstrInfo::copyPhysRegVector(
return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
};
- auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass,
- uint16_t Encoding) {
- ArrayRef<MCPhysReg> Regs = RegClass.getRegisters();
- const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) {
- return TRI->getEncodingValue(Reg) == Encoding;
- });
- // We should be always able to find one valid register.
- assert(FoundReg != Regs.end());
- return *FoundReg;
+ auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
+ uint16_t Encoding) {
+ MCRegister Reg = RISCV::V0 + Encoding;
+ if (&RegClass == &RISCV::VRRegClass)
+ return Reg;
+ return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
};
while (I != NumRegs) {
// For non-segment copying, we only do this once as the registers are always
@@ -1986,7 +1983,7 @@ genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());
unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
- assert(InnerShiftAmt > OuterShiftAmt && "Unexpected shift amount");
+ assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
unsigned InnerOpc;
switch (InnerShiftAmt - OuterShiftAmt) {
@@ -2719,6 +2716,50 @@ std::string RISCVInstrInfo::createMIROperandComment(
}
// clang-format off
+#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
+ RISCV::Pseudo##OP##_##LMUL
+
+#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
+ RISCV::Pseudo##OP##_##LMUL##_MASK
+
+#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
+ CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
+
+#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
+ CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
+
+#define CASE_RVV_OPCODE_UNMASK(OP) \
+ CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
+
+#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
+ CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
+
+#define CASE_RVV_OPCODE_MASK(OP) \
+ CASE_RVV_OPCODE_MASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
+
+#define CASE_RVV_OPCODE_WIDEN(OP) \
+ CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_MASK_WIDEN(OP)
+
+#define CASE_RVV_OPCODE(OP) \
+ CASE_RVV_OPCODE_UNMASK(OP): \
+ case CASE_RVV_OPCODE_MASK(OP)
+// clang-format on
+
+// clang-format off
#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
@@ -2798,6 +2839,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
+ case CASE_RVV_OPCODE(VADD_VV):
+ case CASE_RVV_OPCODE(VAND_VV):
+ case CASE_RVV_OPCODE(VOR_VV):
+ case CASE_RVV_OPCODE(VXOR_VV):
+ case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
+ case CASE_RVV_OPCODE_MASK(VMSNE_VV):
+ case CASE_RVV_OPCODE(VMIN_VV):
+ case CASE_RVV_OPCODE(VMINU_VV):
+ case CASE_RVV_OPCODE(VMAX_VV):
+ case CASE_RVV_OPCODE(VMAXU_VV):
+ case CASE_RVV_OPCODE(VMUL_VV):
+ case CASE_RVV_OPCODE(VMULH_VV):
+ case CASE_RVV_OPCODE(VMULHU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
+ case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
+ // Operands 2 and 3 are commutable.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FMACC):
@@ -2950,7 +3013,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 0dc466f..cd5caa4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -143,22 +143,24 @@ class PseudoToVInst<string PseudoInst> {
// This class describes information associated to the LMUL.
class LMULInfo<int lmul, int oct, VReg regclass, VReg wregclass,
- VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> {
+ VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx,
+ VReg moutregclass = VMM1> {
bits<3> value = lmul; // This is encoded as the vlmul field of vtype.
VReg vrclass = regclass;
VReg wvrclass = wregclass;
VReg f8vrclass = f8regclass;
VReg f4vrclass = f4regclass;
VReg f2vrclass = f2regclass;
+ VReg moutclass = moutregclass;
string MX = mx;
int octuple = oct;
}
// Associate LMUL with tablegen records of register classes.
def V_M1 : LMULInfo<0b000, 8, VR, VRM2, VR, VR, VR, "M1">;
-def V_M2 : LMULInfo<0b001, 16, VRM2, VRM4, VR, VR, VR, "M2">;
-def V_M4 : LMULInfo<0b010, 32, VRM4, VRM8, VRM2, VR, VR, "M4">;
-def V_M8 : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">;
+def V_M2 : LMULInfo<0b001, 16, VRM2, VRM4, VR, VR, VR, "M2", VMM2>;
+def V_M4 : LMULInfo<0b010, 32, VRM4, VRM8, VRM2, VR, VR, "M4", VMM4>;
+def V_M8 : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8", VMM8>;
def V_MF8 : LMULInfo<0b101, 1, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">;
def V_MF4 : LMULInfo<0b110, 2, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">;
@@ -2127,8 +2129,9 @@ multiclass VPseudoBinary<VReg RetClass,
LMULInfo MInfo,
string Constraint = "",
int sew = 0,
- int TargetConstraintType = 1> {
- let VLMul = MInfo.value, SEW=sew in {
+ int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
@@ -2167,8 +2170,9 @@ multiclass VPseudoBinaryM<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- int TargetConstraintType = 1> {
- let VLMul = MInfo.value in {
+ int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let VLMul = MInfo.value, isCommutable = Commutable in {
def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
@@ -2226,8 +2230,8 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
}
-multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> {
- defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>;
+multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0, bit Commutable = 0> {
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew, Commutable=Commutable>;
}
multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> {
@@ -2331,9 +2335,10 @@ multiclass VPseudoVALU_MM<bit Commutable = 0> {
// * The destination EEW is greater than the source EEW, the source EMUL is
// at least 1, and the overlap is in the highest-numbered part of the
// destination register group is legal. Otherwise, it is illegal.
-multiclass VPseudoBinaryW_VV<LMULInfo m> {
+multiclass VPseudoBinaryW_VV<LMULInfo m, bit Commutable = 0> {
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd", TargetConstraintType=3>;
+ "@earlyclobber $rd", TargetConstraintType=3,
+ Commutable=Commutable>;
}
multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> {
@@ -2453,7 +2458,9 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}
-multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> {
+multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, m.vrclass, m, 1, "",
@@ -2667,26 +2674,24 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
-multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
- defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "",
+ TargetConstraintType, Commutable=Commutable>;
}
multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
defm "_VX" :
- VPseudoBinaryM<VR, m.vrclass, GPR, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+ VPseudoBinaryM<m.moutclass, m.vrclass, GPR, m, "", TargetConstraintType>;
}
multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defm "_V" # f.FX :
- VPseudoBinaryM<VR, m.vrclass, f.fprclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+ VPseudoBinaryM<m.moutclass, m.vrclass, f.fprclass, m, "", TargetConstraintType>;
}
multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> {
- defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+ defm _VI : VPseudoBinaryM<m.moutclass, m.vrclass, simm5, m, "", TargetConstraintType>;
}
multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
@@ -2751,10 +2756,11 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint =
}
}
-multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "",
+ bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m, Constraint>,
+ defm "" : VPseudoBinaryV_VV<m, Constraint, Commutable=Commutable>,
SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
@@ -2804,17 +2810,17 @@ multiclass VPseudoVAALU_VV_VX_RM {
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV<m, Commutable=1>,
SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>;
}
}
-multiclass VPseudoVMUL_VV_VX {
+multiclass VPseudoVMUL_VV_VX<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>;
@@ -2894,32 +2900,34 @@ multiclass VPseudoVALU_VV_VX {
multiclass VPseudoVSGNJ_VV_VF {
foreach m = MxListF in {
- defm "" : VPseudoBinaryFV_VV<m>,
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoBinaryFV_VV<m, sew=e>,
SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
- forceMergeOpRead=true>;
+ e, forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF<m, f>,
+ defm "" : VPseudoBinaryV_VF<m, f, sew=f.SEW>,
SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVMAX_VV_VF {
foreach m = MxListF in {
- defm "" : VPseudoBinaryFV_VV<m>,
- SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoBinaryFV_VV<m, sew=e>,
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV",
+ m.MX, e, forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF<m, f>,
- SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", m.MX,
- forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_VF<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF",
+ m.MX, f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -2962,10 +2970,10 @@ multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
}
}
-multiclass VPseudoVWALU_VV_VX {
+multiclass VPseudoVWALU_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
@@ -2974,10 +2982,10 @@ multiclass VPseudoVWALU_VV_VX {
}
}
-multiclass VPseudoVWMUL_VV_VX {
+multiclass VPseudoVWMUL_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
@@ -3072,7 +3080,7 @@ multiclass VPseudoVMRG_VM_XM_IM {
multiclass VPseudoVCALU_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoTiedBinaryV_VM<m>,
+ defm "" : VPseudoTiedBinaryV_VM<m, Commutable=1>,
SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
@@ -3285,10 +3293,10 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f,
sew, Commutable=1>;
}
-multiclass VPseudoTernaryW_VV<LMULInfo m> {
+multiclass VPseudoTernaryW_VV<LMULInfo m, bit Commutable = 0> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint, /*Commutable*/ 0, TargetConstraintType=3>;
+ constraint, Commutable=Commutable, TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> {
@@ -3378,10 +3386,10 @@ multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
}
}
-multiclass VPseudoVWMAC_VV_VX {
+multiclass VPseudoVWMAC_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoTernaryW_VV<m>,
+ defm "" : VPseudoTernaryW_VV<m, Commutable=Commutable>,
SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
"ReadVIWMulAddV", mx>;
defm "" : VPseudoTernaryW_VX<m>,
@@ -3434,10 +3442,10 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM {
}
}
-multiclass VPseudoVCMPM_VV_VX_VI {
+multiclass VPseudoVCMPM_VV_VX_VI<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2, Commutable=Commutable>,
SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
@@ -3580,12 +3588,14 @@ multiclass VPseudoConversion<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
- Constraint, TargetConstraintType>,
- RISCVMaskedPseudo<MaskIdx=2>;
+ def suffix : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>;
+ def suffix # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
+ Constraint, TargetConstraintType>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3593,12 +3603,15 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
- Constraint, TargetConstraintType>,
- RISCVMaskedPseudo<MaskIdx=2>;
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
+ def suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
+ Constraint,
+ TargetConstraintType>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3607,13 +3620,15 @@ multiclass VPseudoConversionRM<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class,
- Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class,
- Constraint, TargetConstraintType>,
- RISCVMaskedPseudo<MaskIdx=2>;
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoUnaryNoMask_FRM<RetClass, Op1Class,
+ Constraint, TargetConstraintType>;
+ def suffix # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class,
+ Constraint, TargetConstraintType>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3660,17 +3675,19 @@ multiclass VPseudoVFROUND_NOEXCEPT_V {
multiclass VPseudoVCVTF_V_RM {
foreach m = MxListF in {
- defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
- SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m, sew=e>,
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTF_RM_V {
foreach m = MxListF in {
- defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
- SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m, sew=e>,
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
@@ -3704,18 +3721,22 @@ multiclass VPseudoVWCVTI_RM_V {
multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
- SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=0, isWidening=1>.val in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=e,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTD_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
- SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=e,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
@@ -3749,36 +3770,45 @@ multiclass VPseudoVNCVTI_RM_W {
multiclass VPseudoVNCVTF_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
- SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=e,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTF_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
- SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint, sew=e>,
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
- SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=e,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
- SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=e,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
@@ -4889,14 +4919,17 @@ multiclass VPatConversionTA<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op1_reg_class> {
+ VReg op1_reg_class,
+ bit isSEWAware = 0> {
def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, result_reg_class, op1_reg_class>;
+ log2sew, vlmul, result_reg_class, op1_reg_class,
+ isSEWAware>;
def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+ mask_type, log2sew, vlmul, result_reg_class, op1_reg_class,
+ isSEWAware>;
}
multiclass VPatConversionTARoundingMode<string intrinsic,
@@ -4905,14 +4938,17 @@ multiclass VPatConversionTARoundingMode<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op1_reg_class> {
+ VReg op1_reg_class,
+ bit isSEWAware = 0> {
def : VPatUnaryNoMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, result_reg_class, op1_reg_class>;
+ log2sew, vlmul, result_reg_class,
+ op1_reg_class, isSEWAware>;
def : VPatUnaryMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+ mask_type, log2sew, vlmul, result_reg_class,
+ op1_reg_class, isSEWAware>;
}
multiclass VPatBinaryV_VV<string intrinsic, string instruction,
@@ -5905,15 +5941,16 @@ multiclass VPatConversionVI_VF_RM<string intrinsic,
}
}
-multiclass VPatConversionVF_VI_RM<string intrinsic,
- string instruction> {
+multiclass VPatConversionVF_VI_RM<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<ivti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "V",
fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
- ivti.LMul, fvti.RegClass, ivti.RegClass>;
+ ivti.LMul, fvti.RegClass, ivti.RegClass,
+ isSEWAware>;
}
}
@@ -5941,7 +5978,8 @@ multiclass VPatConversionWI_VF_RM<string intrinsic, string instruction> {
}
}
-multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
+multiclass VPatConversionWF_VI<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
@@ -5949,11 +5987,12 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
- vti.LMul, fwti.RegClass, vti.RegClass>;
+ vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>;
}
}
-multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
+multiclass VPatConversionWF_VF<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -5963,11 +6002,12 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates)) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
}
}
-multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
+multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in
{
defvar fvti = fvtiToFWti.Vti;
@@ -5976,7 +6016,7 @@ multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
}
}
@@ -6004,7 +6044,8 @@ multiclass VPatConversionVI_WF_RM <string intrinsic, string instruction> {
}
}
-multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> {
+multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
@@ -6012,11 +6053,13 @@ multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> {
GetVTypePredicates<iwti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, iwti.RegClass>;
+ fvti.LMul, fvti.RegClass, iwti.RegClass,
+ isSEWAware>;
}
}
-multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
+multiclass VPatConversionVF_WF<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -6024,12 +6067,13 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
}
}
-multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction,
- list<VTypeInfoToWide> wlist = AllWidenableFloatVectors> {
+multiclass VPatConversionVF_WF_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> wlist = AllWidenableFloatVectors,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = wlist in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -6037,11 +6081,13 @@ multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction,
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
}
}
-multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
+multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -6049,7 +6095,8 @@ multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
}
}
@@ -6207,7 +6254,7 @@ defm PseudoVLSEG : VPseudoUSSegLoadFF;
//===----------------------------------------------------------------------===//
// 11.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVADD : VPseudoVALU_VV_VX_VI;
+defm PseudoVADD : VPseudoVALU_VV_VX_VI<Commutable=1>;
defm PseudoVSUB : VPseudoVALU_VV_VX;
defm PseudoVRSUB : VPseudoVALU_VX_VI;
@@ -6272,9 +6319,9 @@ foreach vti = AllIntegerVectors in {
//===----------------------------------------------------------------------===//
// 11.2. Vector Widening Integer Add/Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVWADDU : VPseudoVWALU_VV_VX;
+defm PseudoVWADDU : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUBU : VPseudoVWALU_VV_VX;
-defm PseudoVWADD : VPseudoVWALU_VV_VX;
+defm PseudoVWADD : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUB : VPseudoVWALU_VV_VX;
defm PseudoVWADDU : VPseudoVWALU_WV_WX;
defm PseudoVWSUBU : VPseudoVWALU_WV_WX;
@@ -6305,9 +6352,9 @@ defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 11.5. Vector Bitwise Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVAND : VPseudoVALU_VV_VX_VI;
-defm PseudoVOR : VPseudoVALU_VV_VX_VI;
-defm PseudoVXOR : VPseudoVALU_VV_VX_VI;
+defm PseudoVAND : VPseudoVALU_VV_VX_VI<Commutable=1>;
+defm PseudoVOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
+defm PseudoVXOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
//===----------------------------------------------------------------------===//
// 11.6. Vector Single-Width Bit Shift Instructions
@@ -6325,8 +6372,8 @@ defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI;
//===----------------------------------------------------------------------===//
// 11.8. Vector Integer Comparison Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI;
-defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
+defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
defm PseudoVMSLTU : VPseudoVCMPM_VV_VX;
defm PseudoVMSLT : VPseudoVCMPM_VV_VX;
defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI;
@@ -6345,9 +6392,9 @@ defm PseudoVMAX : VPseudoVMINMAX_VV_VX;
//===----------------------------------------------------------------------===//
// 11.10. Vector Single-Width Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMUL : VPseudoVMUL_VV_VX;
-defm PseudoVMULH : VPseudoVMUL_VV_VX;
-defm PseudoVMULHU : VPseudoVMUL_VV_VX;
+defm PseudoVMUL : VPseudoVMUL_VV_VX<Commutable=1>;
+defm PseudoVMULH : VPseudoVMUL_VV_VX<Commutable=1>;
+defm PseudoVMULHU : VPseudoVMUL_VV_VX<Commutable=1>;
defm PseudoVMULHSU : VPseudoVMUL_VV_VX;
//===----------------------------------------------------------------------===//
@@ -6361,8 +6408,8 @@ defm PseudoVREM : VPseudoVDIV_VV_VX;
//===----------------------------------------------------------------------===//
// 11.12. Vector Widening Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMUL : VPseudoVWMUL_VV_VX;
-defm PseudoVWMULU : VPseudoVWMUL_VV_VX;
+defm PseudoVWMUL : VPseudoVWMUL_VV_VX<Commutable=1>;
+defm PseudoVWMULU : VPseudoVWMUL_VV_VX<Commutable=1>;
defm PseudoVWMULSU : VPseudoVWMUL_VV_VX;
//===----------------------------------------------------------------------===//
@@ -6376,8 +6423,8 @@ defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA;
//===----------------------------------------------------------------------===//
// 11.14. Vector Widening Integer Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMACCU : VPseudoVWMAC_VV_VX;
-defm PseudoVWMACC : VPseudoVWMAC_VV_VX;
+defm PseudoVWMACCU : VPseudoVWMAC_VV_VX<Commutable=1>;
+defm PseudoVWMACC : VPseudoVWMAC_VV_VX<Commutable=1>;
defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX;
defm PseudoVWMACCUS : VPseudoVWMAC_VX;
@@ -7197,15 +7244,20 @@ defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors, isSE
//===----------------------------------------------------------------------===//
// 13.11. Vector Floating-Point Min/Max Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors,
+ isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors,
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.12. Vector Floating-Point Sign-Injection Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors,
+ isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors,
+ isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors,
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.13. Vector Floating-Point Compare Instructions
@@ -7269,8 +7321,10 @@ defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">;
defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">;
defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">;
defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_x_f_v", "PseudoVFCVT_RTZ_X_F">;
-defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">;
-defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">;
+defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X",
+ isSEWAware=1>;
+defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU",
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
@@ -7279,11 +7333,14 @@ defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">;
defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">;
defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">;
defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">;
-defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
-defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">;
-defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
+defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU",
+ isSEWAware=1>;
+defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X",
+ isSEWAware=1>;
+defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F",
+ isSEWAware=1>;
defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
- "PseudoVFWCVTBF16_F_F">;
+ "PseudoVFWCVTBF16_F_F", isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -7292,21 +7349,24 @@ defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">;
defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">;
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">;
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
-defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
-defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
+defm : VPatConversionVF_WI_RM<"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU",
+ isSEWAware=1>;
+defm : VPatConversionVF_WI_RM<"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X",
+ isSEWAware=1>;
defvar WidenableFloatVectorsExceptF16 = !filter(fvtiToFWti, AllWidenableFloatVectors,
!ne(fvtiToFWti.Vti.Scalar, f16));
defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
- WidenableFloatVectorsExceptF16>;
+ WidenableFloatVectorsExceptF16, isSEWAware=1>;
// Define vfncvt.f.f.w for f16 when Zvfhmin is enable.
defvar F16WidenableFloatVectors = !filter(fvtiToFWti, AllWidenableFloatVectors,
!eq(fvtiToFWti.Vti.Scalar, f16));
let Predicates = [HasVInstructionsF16Minimal] in
defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
- F16WidenableFloatVectors>;
+ F16WidenableFloatVectors, isSEWAware=1>;
defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w",
- "PseudoVFNCVTBF16_F_F">;
-defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
+ "PseudoVFNCVTBF16_F_F", isSEWAware=1>;
+defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F",
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 14. Vector Reduction Operations
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b6cd6dc..3397d55 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -410,7 +410,7 @@ multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop,
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<ivti>.Predicates) in
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
ivti.RegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -441,7 +441,7 @@ multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop,
let Predicates = !listconcat(GetVTypePredicates<ivti>.Predicates,
GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW)
(fwti.Vector (IMPLICIT_DEF)),
ivti.RegClass:$rs1,
ivti.AVL, ivti.Log2SEW, TA_MA)>;
@@ -470,7 +470,7 @@ multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop,
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<iwti>.Predicates) in
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
iwti.RegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -1339,42 +1339,42 @@ foreach vti = AllFloatVectors in {
// 13.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
- (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
- (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg vti.RegClass:$rs2)))),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
- (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN">;
-defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX">;
+defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN", isSEWAware=1>;
+defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX", isSEWAware=1>;
// 13.13. Vector Floating-Point Compare Instructions
defm : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
@@ -1445,7 +1445,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
!listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<fwti>.Predicates)) in
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
- (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
+ (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
fwti.RegClass:$rs1,
// Value to indicate no rounding mode change in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 6fde30a..42fee1a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1229,7 +1229,7 @@ multiclass VPatConvertI2FPVL_V_RM<SDPatternOperator vop, string instruction_name
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
(ivti.Mask VMV0:$vm),
// Value to indicate no rounding mode change in
@@ -1247,7 +1247,7 @@ multiclass VPatConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask VMV0:$vm), (XLenVT timm:$frm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
(ivti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
@@ -1315,7 +1315,7 @@ multiclass VPatWConvertI2FPVL_V<SDPatternOperator vop,
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW#"_MASK")
(fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
(ivti.Mask VMV0:$vm),
GPR:$vl, ivti.Log2SEW, TA_MA)>;
@@ -1389,7 +1389,7 @@ multiclass VPatNConvertI2FPVL_W_RM<SDPatternOperator vop,
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
(iwti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1,
(iwti.Mask VMV0:$vm),
// Value to indicate no rounding mode change in
@@ -1408,7 +1408,7 @@ multiclass VPatNConvertI2FP_RM_VL_W<SDNode vop, string instruction_name> {
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
(iwti.Mask VMV0:$vm), (XLenVT timm:$frm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1,
(iwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
@@ -2468,8 +2468,8 @@ defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">;
defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN">;
-defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX">;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN", isSEWAware=1>;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX", isSEWAware=1>;
// 13.13. Vector Floating-Point Compare Instructions
defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ,
@@ -2505,14 +2505,14 @@ foreach vti = AllFloatVectors in {
// 13.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TA_MA)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TA_MA)>;
@@ -2522,7 +2522,7 @@ foreach vti = AllFloatVectors in {
vti.RegClass:$merge,
(vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs1,
vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TAIL_AGNOSTIC)>;
@@ -2534,7 +2534,7 @@ foreach vti = AllFloatVectors in {
srcvalue,
(vti.Mask true_mask),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
@@ -2543,7 +2543,7 @@ foreach vti = AllFloatVectors in {
vti.RegClass:$merge,
(vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs1,
vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TAIL_AGNOSTIC)>;
@@ -2672,7 +2672,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
(fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
(fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MA)>;
@@ -2703,7 +2703,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
def : Pat<(fvti.Vector (any_riscv_fpround_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask VMV0:$vm), VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
(fwti.Mask VMV0:$vm),
// Value to indicate no rounding mode change in
@@ -2716,7 +2716,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask VMV0:$vm), VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
(fwti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 9a6818c..71aa1f1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -307,10 +307,16 @@ multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_X<OpClass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
@@ -318,10 +324,16 @@ multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
@@ -329,10 +341,16 @@ multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
@@ -340,11 +358,17 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
let Constraints = "@earlyclobber $rd, $rd = $rs3" in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
index dd13a07..32e7f96 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
@@ -20,13 +20,7 @@ class CMOPInst<bits<3> imm3, string opcodestr>
let Inst{12-11} = 0;
}
-// CMOP1, CMOP5 is used by Zicfiss.
-let Predicates = [HasStdExtZcmop, NoHasStdExtZicfiss] in {
- def CMOP1 : CMOPInst<0, "cmop.1">, Sched<[]>;
- def CMOP5 : CMOPInst<2, "cmop.5">, Sched<[]>;
-}
-
-foreach n = [3, 7, 9, 11, 13, 15] in {
+foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in {
let Predicates = [HasStdExtZcmop] in
- def CMOP # n : CMOPInst<!srl(n, 1), "cmop." # n>, Sched<[]>;
+ def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 2d72e98..16f7279 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -152,7 +152,7 @@ let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = Sha2Constraint in {
def VSHA2MS_VV : PALUVVNoVmTernary<0b101101, OPMVV, "vsha2ms.vv">;
} // Predicates = [HasStdExtZvknhaOrZvknhb]
-let Predicates = [HasStdExtZvkned]in {
+let Predicates = [HasStdExtZvkned] in {
defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">;
defm VAESDM : VAES_MV_V_S<0b101000, 0b101001, 0b00000, OPMVV, "vaesdm">;
defm VAESEF : VAES_MV_V_S<0b101000, 0b101001, 0b00011, OPMVV, "vaesef">;
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 39d420c..ead91c5 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,15 +12,24 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
-// Then it removes the -w suffix from opw instructions whenever all users are
-// dependent only on the lower word of the result of the instruction.
-// The cases handled are:
-// * addw because c.add has a larger register encoding than c.addw.
-// * addiw because it helps reduce test differences between RV32 and RV64
-// w/o being a pessimization.
-// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
-// * slliw because c.slliw doesn't exist and c.slli does
+// Then:
+// 1. Unless explicit disabled or the target prefers instructions with W suffix,
+// it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction.
+// The cases handled are:
+// * addw because c.add has a larger register encoding than c.addw.
+// * addiw because it helps reduce test differences between RV32 and RV64
+// w/o being a pessimization.
+// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
+// * slliw because c.slliw doesn't exist and c.slli does
//
+// 2. Or if explicit enabled or the target prefers instructions with W suffix,
+// it adds the W suffix to the instruction whenever all users are dependent
+// only on the lower word of the result of the instruction.
+// The cases handled are:
+// * add/addi/sub/mul.
+// * slli with imm < 32.
+// * ld/lwu.
//===---------------------------------------------------------------------===//
#include "RISCV.h"
@@ -60,6 +69,8 @@ public:
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
+ bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
const RISCVInstrInfo &TII,
const RISCVSubtarget &ST,
MachineRegisterInfo &MRI) {
- if (DisableStripWSuffix || !ST.enableStripWSuffix())
- return false;
-
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
@@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
return MadeChange;
}
+bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned WOpc;
+ // TODO: Add more?
+ switch (MI.getOpcode()) {
+ default:
+ continue;
+ case RISCV::ADD:
+ WOpc = RISCV::ADDW;
+ break;
+ case RISCV::ADDI:
+ WOpc = RISCV::ADDIW;
+ break;
+ case RISCV::SUB:
+ WOpc = RISCV::SUBW;
+ break;
+ case RISCV::MUL:
+ WOpc = RISCV::MULW;
+ break;
+ case RISCV::SLLI:
+ // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
+ if (MI.getOperand(2).getImm() >= 32)
+ continue;
+ WOpc = RISCV::SLLIW;
+ break;
+ case RISCV::LD:
+ case RISCV::LWU:
+ WOpc = RISCV::LW;
+ break;
+ }
+
+ if (hasAllWUsers(MI, ST, MRI)) {
+ LLVM_DEBUG(dbgs() << "Replacing " << MI);
+ MI.setDesc(TII.get(WOpc));
+ MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MI.clearFlag(MachineInstr::MIFlag::IsExact);
+ LLVM_DEBUG(dbgs() << " with " << MI);
+ ++NumTransformedToWInstrs;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
- MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ if (!(DisableStripWSuffix || ST.preferWInst()))
+ MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ if (ST.preferWInst())
+ MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
return MadeChange;
}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index fd6d607..f9a557e 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -56,11 +56,13 @@ class RISCVTuneProcessorModel<string n,
def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
- [Feature32Bit]>,
+ [Feature32Bit,
+ FeatureStdExtI]>,
GenericTuneInfo;
def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
NoSchedModel,
- [Feature64Bit]>,
+ [Feature64Bit,
+ FeatureStdExtI]>,
GenericTuneInfo;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate rv32/rv64 version.
@@ -69,11 +71,13 @@ def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr]>;
def ROCKET_RV64 : RISCVProcessorModel<"rocket-rv64",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr]>;
def ROCKET : RISCVTuneProcessorModel<"rocket",
@@ -86,6 +90,7 @@ def SIFIVE_7 : RISCVTuneProcessorModel<"sifive-7-series",
def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -94,6 +99,7 @@ def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20",
def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -103,6 +109,7 @@ def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21",
def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -112,6 +119,7 @@ def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24",
def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr,
FeatureStdExtM,
@@ -121,6 +129,7 @@ def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31",
def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -130,6 +139,7 @@ def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34",
def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76",
SiFive7Model,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -140,6 +150,7 @@ def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76",
def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -149,6 +160,7 @@ def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21",
def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -158,6 +170,7 @@ def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51",
def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -168,6 +181,7 @@ def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54",
def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76",
SiFive7Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -180,6 +194,7 @@ def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76",
def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -190,6 +205,7 @@ def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
SiFive7Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -200,6 +216,7 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -217,6 +234,7 @@ def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -239,7 +257,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
FeatureStdExtZbb,
FeatureStdExtZbs,
FeatureStdExtZfhmin,
- FeatureFastUnalignedAccess],
+ FeatureUnalignedScalarMem,
+ FeatureUnalignedVectorMem],
[TuneNoDefaultUnroll,
TuneConditionalCompressedMoveFusion,
TuneLUIADDIFusion,
@@ -247,6 +266,7 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -276,7 +296,8 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
FeatureStdExtZvkng,
FeatureStdExtZvksc,
FeatureStdExtZvksg,
- FeatureFastUnalignedAccess],
+ FeatureUnalignedScalarMem,
+ FeatureUnalignedVectorMem],
[TuneNoDefaultUnroll,
TuneConditionalCompressedMoveFusion,
TuneLUIADDIFusion,
@@ -286,6 +307,7 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
SyntacoreSCR1Model,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtC],
@@ -294,6 +316,7 @@ def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
SyntacoreSCR1Model,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -303,6 +326,7 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
NoSchedModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr,
FeatureStdExtZicntr,
@@ -332,6 +356,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
XiangShanNanHuModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 316daf2..1a0533c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -533,6 +533,12 @@ def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
(add (sequence "V%u", 8, 31),
(sequence "V%u", 7, 0)), 1>;
+// V0 is likely to be used as mask, so we move it in front of allocation order.
+def VMM1 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31)), 1>;
+def VMM2 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 2)), 1>;
+def VMM4 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 4)), 1>;
+def VMM8 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 8)), 1>;
+
def VRNoV0 : VReg<!listconcat(VM1VTs, VMaskVTs), (sub VR, V0), 1>;
def VRM2 : VReg<VM2VTs, (add (sequence "V%uM2", 8, 31, 2),
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index e74c7aa..65494e7 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -261,4 +261,5 @@ defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 2a13cb4..a532066 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -744,6 +744,13 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -751,14 +758,9 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
@@ -784,10 +786,11 @@ foreach mx = SchedMxListF in {
// Widening
foreach mx = SchedMxListW in {
- defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
- defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
@@ -801,16 +804,13 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
-}
-foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- }
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
// Narrowing
foreach mx = SchedMxListW in {
@@ -821,11 +821,13 @@ foreach mx = SchedMxListW in {
}
}
foreach mx = SchedMxListFW in {
- defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
- defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ }
}
}
@@ -960,6 +962,54 @@ let Latency = 3 in
def : InstRW<[WriteIALU], (instrs COPY)>;
+// VCIX
+//
+// In principle we don't know the latency of any VCIX instructions. But instead
+// of taking the default of 1, which can lead to issues [1], we assume that they
+// have a fairly high latency.
+//
+// [1] https://github.com/llvm/llvm-project/issues/83391
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !mul(Cycles, 10),
+ AcquireAtCycles = [0, 1],
+ ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVC_V_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ }
+ defm "" : LMULWriteResMX<"WriteVC_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm "" : LMULWriteResMX<"WriteVC_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Bypass and advance
@@ -1169,24 +1219,24 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
-defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
-defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
// 15. Vector Reduction Operations
def : ReadAdvance<ReadVIRedV, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 8ec2e4f..fccdd7e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -366,4 +366,5 @@ defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedSFB;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedV;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 80090a0..6e4fb19 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -495,45 +495,37 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
-
- }
- }
-}
-
-foreach mx = SchedMxListF in {
- foreach sew = SchedSEWSet<mx, isF=1>.val in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- let Latency = 2, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList, isF=1>.c;
+ let Latency = 1, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- }
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
let Latency = 1, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -542,19 +534,18 @@ foreach mx = SchedMxList in {
// Widening
foreach mx = SchedMxListW in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- }
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
@@ -567,6 +558,7 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
}
@@ -579,11 +571,13 @@ foreach mx = SchedMxListW in {
}
}
foreach mx = SchedMxListFW in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
}
}
@@ -968,22 +962,22 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULSEWReadAdvance<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
-defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
-defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
// 15. Vector Reduction Operations
def : ReadAdvance<ReadVIRedV, 0>;
@@ -1046,4 +1040,5 @@ defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedSFB;
defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 9625d17..0885e32 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -212,4 +212,5 @@ defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 4fc7b03..e0f1fab 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -311,4 +311,5 @@ defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
defm : UnsupportedSchedSFB;
defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 1d19624..0086557 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -296,3 +296,4 @@ def : ReadAdvance<ReadAtomicHD, 0>;
// Include the scheduler resources for other instruction extensions.
include "RISCVScheduleZb.td"
include "RISCVScheduleV.td"
+include "RISCVScheduleXSf.td"
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 6070482..5993884 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -253,6 +253,18 @@ multiclass LMULReadAdvanceFW<string name, int val, list<SchedWrite> writes = []>
: LMULReadAdvanceImpl<name, val, writes>;
class LMULSchedWriteListFW<list<string> names> : LMULSchedWriteListImpl<names, SchedMxListFW>;
+multiclass LMULSEWSchedWritesW<string name>
+ : LMULSEWSchedWritesImpl<name, SchedMxListW, isF = 0, isWidening = 1>;
+multiclass LMULSEWSchedReadsW<string name>
+ : LMULSEWSchedReadsImpl<name, SchedMxListW, isF = 0, isWidening = 1>;
+multiclass LMULSEWWriteResW<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxListW, isF = 0,
+ isWidening = 1>;
+multiclass
+ LMULSEWReadAdvanceW<string name, int val, list<SchedWrite> writes = []>
+ : LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListW, isF = 0,
+ isWidening = 1>;
+
multiclass LMULSEWSchedWritesFW<string name>
: LMULSEWSchedWritesImpl<name, SchedMxListFW, isF = 1, isWidening = 1>;
multiclass LMULSEWSchedReadsFW<string name>
@@ -434,11 +446,11 @@ defm "" : LMULSEWSchedWritesF<"WriteVFSqrtV">;
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
defm "" : LMULSEWSchedWritesF<"WriteVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm "" : LMULSchedWrites<"WriteVFMinMaxV">;
-defm "" : LMULSchedWrites<"WriteVFMinMaxF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMinMaxV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMinMaxF">;
// 13.12. Vector Floating-Point Sign-Injection Instructions
-defm "" : LMULSchedWrites<"WriteVFSgnjV">;
-defm "" : LMULSchedWrites<"WriteVFSgnjF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFSgnjV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFSgnjF">;
// 13.13. Vector Floating-Point Compare Instructions
defm "" : LMULSchedWrites<"WriteVFCmpV">;
defm "" : LMULSchedWrites<"WriteVFCmpF">;
@@ -449,16 +461,16 @@ defm "" : LMULSchedWrites<"WriteVFMergeV">;
// 13.16. Vector Floating-Point Move Instruction
defm "" : LMULSchedWrites<"WriteVFMovV">;
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedWrites<"WriteVFCvtIToFV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFCvtIToFV">;
defm "" : LMULSchedWrites<"WriteVFCvtFToIV">;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedWritesW<"WriteVFWCvtIToFV">;
+defm "" : LMULSEWSchedWritesW<"WriteVFWCvtIToFV">;
defm "" : LMULSchedWritesFW<"WriteVFWCvtFToIV">;
-defm "" : LMULSchedWritesFW<"WriteVFWCvtFToFV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWCvtFToFV">;
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedWritesFW<"WriteVFNCvtIToFV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFNCvtIToFV">;
defm "" : LMULSchedWritesW<"WriteVFNCvtFToIV">;
-defm "" : LMULSchedWritesFW<"WriteVFNCvtFToFV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFNCvtFToFV">;
// 14. Vector Reduction Operations
// The latency of reduction is determined by the size of the read resource.
@@ -659,11 +671,11 @@ defm "" : LMULSEWSchedReadsF<"ReadVFSqrtV">;
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
defm "" : LMULSEWSchedReadsF<"ReadVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm "" : LMULSchedReads<"ReadVFMinMaxV">;
-defm "" : LMULSchedReads<"ReadVFMinMaxF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMinMaxV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMinMaxF">;
// 13.12. Vector Floating-Point Sign-Injection Instructions
-defm "" : LMULSchedReads<"ReadVFSgnjV">;
-defm "" : LMULSchedReads<"ReadVFSgnjF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFSgnjV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFSgnjF">;
// 13.13. Vector Floating-Point Compare Instructions
defm "" : LMULSchedReads<"ReadVFCmpV">;
defm "" : LMULSchedReads<"ReadVFCmpF">;
@@ -675,16 +687,16 @@ defm "" : LMULSchedReads<"ReadVFMergeF">;
// 13.16. Vector Floating-Point Move Instruction
defm "" : LMULSchedReads<"ReadVFMovF">;
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedReads<"ReadVFCvtIToFV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFCvtIToFV">;
defm "" : LMULSchedReads<"ReadVFCvtFToIV">;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedReadsW<"ReadVFWCvtIToFV">;
+defm "" : LMULSEWSchedReadsW<"ReadVFWCvtIToFV">;
defm "" : LMULSchedReadsFW<"ReadVFWCvtFToIV">;
-defm "" : LMULSchedReadsFW<"ReadVFWCvtFToFV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWCvtFToFV">;
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedReadsFW<"ReadVFNCvtIToFV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFNCvtIToFV">;
defm "" : LMULSchedReadsW<"ReadVFNCvtFToIV">;
-defm "" : LMULSchedReadsFW<"ReadVFNCvtFToFV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFNCvtFToFV">;
// 14. Vector Reduction Operations
// 14.1. Vector Single-Width Integer Reduction Instructions
@@ -896,23 +908,23 @@ defm "" : LMULSEWWriteResFW<"WriteVFWMulAddV", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulAddF", []>;
defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>;
defm "" : LMULSEWWriteResF<"WriteVFRecpV", []>;
-defm "" : LMULWriteRes<"WriteVFMinMaxV", []>;
-defm "" : LMULWriteRes<"WriteVFMinMaxF", []>;
-defm "" : LMULWriteRes<"WriteVFSgnjV", []>;
-defm "" : LMULWriteRes<"WriteVFSgnjF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMinMaxV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMinMaxF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFSgnjV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFSgnjF", []>;
defm "" : LMULWriteRes<"WriteVFCmpV", []>;
defm "" : LMULWriteRes<"WriteVFCmpF", []>;
defm "" : LMULWriteRes<"WriteVFClassV", []>;
defm "" : LMULWriteRes<"WriteVFMergeV", []>;
defm "" : LMULWriteRes<"WriteVFMovV", []>;
-defm "" : LMULWriteRes<"WriteVFCvtIToFV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFCvtIToFV", []>;
defm "" : LMULWriteRes<"WriteVFCvtFToIV", []>;
-defm "" : LMULWriteResW<"WriteVFWCvtIToFV", []>;
+defm "" : LMULSEWWriteResW<"WriteVFWCvtIToFV", []>;
defm "" : LMULWriteResFW<"WriteVFWCvtFToIV", []>;
-defm "" : LMULWriteResFW<"WriteVFWCvtFToFV", []>;
-defm "" : LMULWriteResFW<"WriteVFNCvtIToFV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWCvtFToFV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFNCvtIToFV", []>;
defm "" : LMULWriteResW<"WriteVFNCvtFToIV", []>;
-defm "" : LMULWriteResFW<"WriteVFNCvtFToFV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFNCvtFToFV", []>;
// 14. Vector Reduction Operations
defm "" : LMULSEWWriteRes<"WriteVIRedV_From", []>;
@@ -1052,24 +1064,24 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
-defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
-defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"SEWReadVFNCvtIToFV", 0>;
defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
// 14. Vector Reduction Operations
def : ReadAdvance<ReadVIRedV, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
new file mode 100644
index 0000000..58d5084
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
@@ -0,0 +1,59 @@
+//===-- RISCVScheduleXSf.td - Scheduling Definitions XSf ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the scheduling information for SiFive extensions.
+//
+//===----------------------------------------------------------------------===//
+
+multiclass LMULSchedWritesVCIX<string id>{
+defm "" : LMULSchedWrites<"WriteVC_" # id>;
+defm "" : LMULSchedWrites<"WriteVC_V_" # id>;
+}
+
+defm "" : LMULSchedWritesVCIX<"I">;
+defm "" : LMULSchedWritesVCIX<"X">;
+defm "" : LMULSchedWritesVCIX<"IV">;
+defm "" : LMULSchedWritesVCIX<"VV">;
+defm "" : LMULSchedWritesVCIX<"XV">;
+defm "" : LMULSchedWritesVCIX<"IVV">;
+defm "" : LMULSchedWritesVCIX<"IVW">;
+defm "" : LMULSchedWritesVCIX<"VVV">;
+defm "" : LMULSchedWritesVCIX<"VVW">;
+defm "" : LMULSchedWritesVCIX<"XVV">;
+defm "" : LMULSchedWritesVCIX<"XVW">;
+foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm "" : LMULSchedWritesVCIX<f # "V">;
+ defm "" : LMULSchedWritesVCIX<f # "VV">;
+ defm "" : LMULSchedWritesVCIX<f # "VW">;
+}
+
+multiclass LMULWriteResVCIX<string id, list<ProcResourceKind> resources>{
+defm : LMULWriteRes<"WriteVC_" # id, resources>;
+defm : LMULWriteRes<"WriteVC_V_" # id, resources>;
+}
+
+multiclass UnsupportedSchedXsfvcp {
+let Unsupported = true in {
+defm : LMULWriteResVCIX<"I", []>;
+defm : LMULWriteResVCIX<"X", []>;
+defm : LMULWriteResVCIX<"IV", []>;
+defm : LMULWriteResVCIX<"VV", []>;
+defm : LMULWriteResVCIX<"XV", []>;
+defm : LMULWriteResVCIX<"IVV", []>;
+defm : LMULWriteResVCIX<"IVW", []>;
+defm : LMULWriteResVCIX<"VVV", []>;
+defm : LMULWriteResVCIX<"VVW", []>;
+defm : LMULWriteResVCIX<"XVV", []>;
+defm : LMULWriteResVCIX<"XVW", []>;
+foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm : LMULWriteResVCIX<f # "V", []>;
+ defm : LMULWriteResVCIX<f # "VV", []>;
+ defm : LMULWriteResVCIX<f # "VW", []>;
+}
+}
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index bc9756c..56f5bd8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1335,8 +1335,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
if (CondTy->isVectorTy()) {
if (ValTy->getScalarSizeInBits() == 1) {
// vmandn.mm v8, v8, v9
@@ -1375,14 +1375,15 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
LT.second, CostKind);
}
- if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
- ValTy->isVectorTy()) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
-
- // Support natively.
- if (CmpInst::isIntPredicate(VecPred))
- return LT.first * 1;
+ if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
+ CmpInst::isIntPredicate(VecPred)) {
+ // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE
+ // provided they incur the same cost across all implementations
+ return LT.first *
+ getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind);
+ }
+ if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy()) {
// If we do not support the input floating point vector type, use the base
// one which will calculate as:
// ScalarizeCost + Num * Cost for fixed vector,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index e0c0e65..2f9281a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -228,7 +228,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
+ if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
@@ -253,7 +253,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
+ if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h
index 6979107..fb8580c 100644
--- a/llvm/lib/Target/SPIRV/SPIRV.h
+++ b/llvm/lib/Target/SPIRV/SPIRV.h
@@ -24,7 +24,7 @@ FunctionPass *createSPIRVStripConvergenceIntrinsicsPass();
FunctionPass *createSPIRVRegularizerPass();
FunctionPass *createSPIRVPreLegalizerPass();
FunctionPass *createSPIRVPostLegalizerPass();
-FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM);
+ModulePass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM);
InstructionSelector *
createSPIRVInstructionSelector(const SPIRVTargetMachine &TM,
const SPIRVSubtarget &Subtarget,
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 9e4ba21..c107b99 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -383,7 +383,16 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (F.isDeclaration())
GR->add(&F, &MIRBuilder.getMF(), FuncVReg);
FunctionType *FTy = getOriginalFunctionType(F);
- SPIRVType *RetTy = GR->getOrCreateSPIRVType(FTy->getReturnType(), MIRBuilder);
+ Type *FRetTy = FTy->getReturnType();
+ if (isUntypedPointerTy(FRetTy)) {
+ if (Type *FRetElemTy = GR->findDeducedElementType(&F)) {
+ TypedPointerType *DerivedTy =
+ TypedPointerType::get(FRetElemTy, getPointerAddressSpace(FRetTy));
+ GR->addReturnType(&F, DerivedTy);
+ FRetTy = DerivedTy;
+ }
+ }
+ SPIRVType *RetTy = GR->getOrCreateSPIRVType(FRetTy, MIRBuilder);
FTy = fixFunctionTypeIfPtrArgs(GR, F, FTy, RetTy, ArgTypeVRegs);
SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs(
FTy, RetTy, ArgTypeVRegs, MIRBuilder);
@@ -505,8 +514,13 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// TODO: support constexpr casts and indirect calls.
if (CF == nullptr)
return false;
- if (FunctionType *FTy = getOriginalFunctionType(*CF))
+ if (FunctionType *FTy = getOriginalFunctionType(*CF)) {
OrigRetTy = FTy->getReturnType();
+ if (isUntypedPointerTy(OrigRetTy)) {
+ if (auto *DerivedRetTy = GR->findReturnType(CF))
+ OrigRetTy = DerivedRetTy;
+ }
+ }
}
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e8ce5a3..472bc86 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -51,7 +51,7 @@ void initializeSPIRVEmitIntrinsicsPass(PassRegistry &);
namespace {
class SPIRVEmitIntrinsics
- : public FunctionPass,
+ : public ModulePass,
public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
SPIRVTargetMachine *TM = nullptr;
SPIRVGlobalRegistry *GR = nullptr;
@@ -61,6 +61,9 @@ class SPIRVEmitIntrinsics
DenseMap<Instruction *, Type *> AggrConstTypes;
DenseSet<Instruction *> AggrStores;
+ // a registry of created Intrinsic::spv_assign_ptr_type instructions
+ DenseMap<Value *, CallInst *> AssignPtrTypeInstr;
+
// deduce element type of untyped pointers
Type *deduceElementType(Value *I);
Type *deduceElementTypeHelper(Value *I);
@@ -75,6 +78,9 @@ class SPIRVEmitIntrinsics
Type *deduceNestedTypeHelper(User *U, Type *Ty,
std::unordered_set<Value *> &Visited);
+ // deduce Types of operands of the Instruction if possible
+ void deduceOperandElementType(Instruction *I);
+
void preprocessCompositeConstants(IRBuilder<> &B);
void preprocessUndefs(IRBuilder<> &B);
@@ -111,10 +117,10 @@ class SPIRVEmitIntrinsics
public:
static char ID;
- SPIRVEmitIntrinsics() : FunctionPass(ID) {
+ SPIRVEmitIntrinsics() : ModulePass(ID) {
initializeSPIRVEmitIntrinsicsPass(*PassRegistry::getPassRegistry());
}
- SPIRVEmitIntrinsics(SPIRVTargetMachine *_TM) : FunctionPass(ID), TM(_TM) {
+ SPIRVEmitIntrinsics(SPIRVTargetMachine *_TM) : ModulePass(ID), TM(_TM) {
initializeSPIRVEmitIntrinsicsPass(*PassRegistry::getPassRegistry());
}
Instruction *visitInstruction(Instruction &I) { return &I; }
@@ -130,7 +136,15 @@ public:
Instruction *visitAllocaInst(AllocaInst &I);
Instruction *visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
Instruction *visitUnreachableInst(UnreachableInst &I);
- bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return "SPIRV emit intrinsics"; }
+
+ bool runOnModule(Module &M) override;
+ bool runOnFunction(Function &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ ModulePass::getAnalysisUsage(AU);
+ }
};
} // namespace
@@ -269,6 +283,12 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
if (Ty)
break;
}
+ } else if (auto *Ref = dyn_cast<SelectInst>(I)) {
+ for (Value *Op : {Ref->getTrueValue(), Ref->getFalseValue()}) {
+ Ty = deduceElementTypeByUsersDeep(Op, Visited);
+ if (Ty)
+ break;
+ }
}
// remember the found relationship
@@ -368,6 +388,112 @@ Type *SPIRVEmitIntrinsics::deduceElementType(Value *I) {
return IntegerType::getInt8Ty(I->getContext());
}
+// If the Instruction has Pointer operands with unresolved types, this function
+// tries to deduce them. If the Instruction has Pointer operands with known
+// types which differ from expected, this function tries to insert a bitcast to
+// resolve the issue.
+void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
+ SmallVector<std::pair<Value *, unsigned>> Ops;
+ Type *KnownElemTy = nullptr;
+ // look for known basic patterns of type inference
+ if (auto *Ref = dyn_cast<PHINode>(I)) {
+ if (!isPointerTy(I->getType()) ||
+ !(KnownElemTy = GR->findDeducedElementType(I)))
+ return;
+ for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) {
+ Value *Op = Ref->getIncomingValue(i);
+ if (isPointerTy(Op->getType()))
+ Ops.push_back(std::make_pair(Op, i));
+ }
+ } else if (auto *Ref = dyn_cast<SelectInst>(I)) {
+ if (!isPointerTy(I->getType()) ||
+ !(KnownElemTy = GR->findDeducedElementType(I)))
+ return;
+ for (unsigned i = 0; i < Ref->getNumOperands(); i++) {
+ Value *Op = Ref->getOperand(i);
+ if (isPointerTy(Op->getType()))
+ Ops.push_back(std::make_pair(Op, i));
+ }
+ } else if (auto *Ref = dyn_cast<ReturnInst>(I)) {
+ Type *RetTy = F->getReturnType();
+ if (!isPointerTy(RetTy))
+ return;
+ Value *Op = Ref->getReturnValue();
+ if (!Op)
+ return;
+ if (!(KnownElemTy = GR->findDeducedElementType(F))) {
+ if (Type *OpElemTy = GR->findDeducedElementType(Op)) {
+ GR->addDeducedElementType(F, OpElemTy);
+ TypedPointerType *DerivedTy =
+ TypedPointerType::get(OpElemTy, getPointerAddressSpace(RetTy));
+ GR->addReturnType(F, DerivedTy);
+ }
+ return;
+ }
+ Ops.push_back(std::make_pair(Op, 0));
+ } else if (auto *Ref = dyn_cast<ICmpInst>(I)) {
+ if (!isPointerTy(Ref->getOperand(0)->getType()))
+ return;
+ Value *Op0 = Ref->getOperand(0);
+ Value *Op1 = Ref->getOperand(1);
+ Type *ElemTy0 = GR->findDeducedElementType(Op0);
+ Type *ElemTy1 = GR->findDeducedElementType(Op1);
+ if (ElemTy0) {
+ KnownElemTy = ElemTy0;
+ Ops.push_back(std::make_pair(Op1, 1));
+ } else if (ElemTy1) {
+ KnownElemTy = ElemTy1;
+ Ops.push_back(std::make_pair(Op0, 0));
+ }
+ }
+
+ // There is no enough info to deduce types or all is valid.
+ if (!KnownElemTy || Ops.size() == 0)
+ return;
+
+ LLVMContext &Ctx = F->getContext();
+ IRBuilder<> B(Ctx);
+ for (auto &OpIt : Ops) {
+ Value *Op = OpIt.first;
+ if (Op->use_empty())
+ continue;
+ Type *Ty = GR->findDeducedElementType(Op);
+ if (Ty == KnownElemTy)
+ continue;
+ if (Instruction *User = dyn_cast<Instruction>(Op->use_begin()->get()))
+ setInsertPointSkippingPhis(B, User->getNextNode());
+ else
+ B.SetInsertPoint(I);
+ Value *OpTyVal = Constant::getNullValue(KnownElemTy);
+ Type *OpTy = Op->getType();
+ if (!Ty) {
+ GR->addDeducedElementType(Op, KnownElemTy);
+ // check if there is existing Intrinsic::spv_assign_ptr_type instruction
+ auto It = AssignPtrTypeInstr.find(Op);
+ if (It == AssignPtrTypeInstr.end()) {
+ CallInst *CI =
+ buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {OpTy}, OpTyVal, Op,
+ {B.getInt32(getPointerAddressSpace(OpTy))}, B);
+ AssignPtrTypeInstr[Op] = CI;
+ } else {
+ It->second->setArgOperand(
+ 1,
+ MetadataAsValue::get(
+ Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(OpTyVal))));
+ }
+ } else {
+ SmallVector<Type *, 2> Types = {OpTy, OpTy};
+ MetadataAsValue *VMD = MetadataAsValue::get(
+ Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(OpTyVal)));
+ SmallVector<Value *, 2> Args = {Op, VMD,
+ B.getInt32(getPointerAddressSpace(OpTy))};
+ CallInst *PtrCastI =
+ B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+ I->setOperand(OpIt.second, PtrCastI);
+ }
+ }
+}
+
void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
Instruction *New,
IRBuilder<> &B) {
@@ -630,6 +756,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
ExpectedElementTypeConst, Pointer, {B.getInt32(AddressSpace)}, B);
GR->addDeducedElementType(CI, ExpectedElementType);
GR->addDeducedElementType(Pointer, ExpectedElementType);
+ AssignPtrTypeInstr[Pointer] = CI;
return;
}
@@ -914,6 +1041,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I,
CallInst *CI = buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()},
EltTyConst, I, {B.getInt32(AddressSpace)}, B);
GR->addDeducedElementType(CI, ElemTy);
+ AssignPtrTypeInstr[I] = CI;
}
void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
@@ -1070,6 +1198,7 @@ void SPIRVEmitIntrinsics::processParamTypes(Function *F, IRBuilder<> &B) {
{B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
GR->addDeducedElementType(AssignPtrTyCI, ElemTy);
GR->addDeducedElementType(Arg, ElemTy);
+ AssignPtrTypeInstr[Arg] = AssignPtrTyCI;
}
}
}
@@ -1114,6 +1243,10 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
insertAssignTypeIntrs(I, B);
insertPtrCastOrAssignTypeInstr(I, B);
}
+
+ for (auto &I : instructions(Func))
+ deduceOperandElementType(&I);
+
for (auto *I : Worklist) {
TrackConstants = true;
if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
@@ -1126,13 +1259,29 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
processInstrAfterVisit(I, B);
}
- // check if function parameter types are set
- if (!F->isIntrinsic())
- processParamTypes(F, B);
-
return true;
}
-FunctionPass *llvm::createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM) {
+bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (auto &F : M) {
+ Changed |= runOnFunction(F);
+ }
+
+ for (auto &F : M) {
+ // check if function parameter types are set
+ if (!F.isDeclaration() && !F.isIntrinsic()) {
+ const SPIRVSubtarget &ST = TM->getSubtarget<SPIRVSubtarget>(F);
+ GR = ST.getSPIRVGlobalRegistry();
+ IRBuilder<> B(F.getContext());
+ processParamTypes(&F, B);
+ }
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM) {
return new SPIRVEmitIntrinsics(TM);
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 70197e9..cebe230 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/TypedPointerType.h"
#include "llvm/Support/Casting.h"
#include <cassert>
@@ -61,7 +60,6 @@ SPIRVType *SPIRVGlobalRegistry::assignVectTypeToVReg(
SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg(
const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
-
SPIRVType *SpirvType =
getOrCreateSPIRVType(Type, MIRBuilder, AccessQual, EmitIR);
assignSPIRVTypeToVReg(SpirvType, VReg, MIRBuilder.getMF());
@@ -655,7 +653,8 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
auto MRI = MIRBuilder.getMRI();
assert(MRI->getType(ResVReg).isPointer() && "Pointer type is expected");
if (Reg != ResVReg) {
- LLT RegLLTy = LLT::pointer(MRI->getType(ResVReg).getAddressSpace(), 32);
+ LLT RegLLTy =
+ LLT::pointer(MRI->getType(ResVReg).getAddressSpace(), getPointerSize());
MRI->setType(Reg, RegLLTy);
assignSPIRVTypeToVReg(BaseType, Reg, MIRBuilder.getMF());
} else {
@@ -726,7 +725,8 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty,
bool EmitIR) {
SmallVector<Register, 4> FieldTypes;
for (const auto &Elem : Ty->elements()) {
- SPIRVType *ElemTy = findSPIRVType(Elem, MIRBuilder);
+ SPIRVType *ElemTy =
+ findSPIRVType(toTypedPointer(Elem, Ty->getContext()), MIRBuilder);
assert(ElemTy && ElemTy->getOpcode() != SPIRV::OpTypeVoid &&
"Invalid struct element type");
FieldTypes.push_back(getSPIRVTypeID(ElemTy));
@@ -919,8 +919,10 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
return SpirvType;
}
-SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const {
- auto t = VRegToTypeMap.find(CurMF);
+SPIRVType *
+SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg,
+ const MachineFunction *MF) const {
+ auto t = VRegToTypeMap.find(MF ? MF : CurMF);
if (t != VRegToTypeMap.end()) {
auto tt = t->second.find(VReg);
if (tt != t->second.end())
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 2e3e694..55979ba 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -21,6 +21,7 @@
#include "SPIRVInstrInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/TypedPointerType.h"
namespace llvm {
using SPIRVType = const MachineInstr;
@@ -58,6 +59,9 @@ class SPIRVGlobalRegistry {
SmallPtrSet<const Type *, 4> TypesInProcessing;
DenseMap<const Type *, SPIRVType *> ForwardPointerTypes;
+ // if a function returns a pointer, this is to map it into TypedPointerType
+ DenseMap<const Function *, TypedPointerType *> FunResPointerTypes;
+
// Number of bits pointers and size_t integers require.
const unsigned PointerSize;
@@ -134,6 +138,16 @@ public:
void setBound(unsigned V) { Bound = V; }
unsigned getBound() { return Bound; }
+ // Add a record to the map of function return pointer types.
+ void addReturnType(const Function *ArgF, TypedPointerType *DerivedTy) {
+ FunResPointerTypes[ArgF] = DerivedTy;
+ }
+ // Find a record in the map of function return pointer types.
+ const TypedPointerType *findReturnType(const Function *ArgF) {
+ auto It = FunResPointerTypes.find(ArgF);
+ return It == FunResPointerTypes.end() ? nullptr : It->second;
+ }
+
// Deduced element types of untyped pointers and composites:
// - Add a record to the map of deduced element types.
void addDeducedElementType(Value *Val, Type *Ty) { DeducedElTys[Val] = Ty; }
@@ -276,8 +290,12 @@ public:
SPIRV::AccessQualifier::ReadWrite);
// Return the SPIR-V type instruction corresponding to the given VReg, or
- // nullptr if no such type instruction exists.
- SPIRVType *getSPIRVTypeForVReg(Register VReg) const;
+ // nullptr if no such type instruction exists. The second argument MF
+ // allows to search for the association in a context of the machine functions
+ // than the current one, without switching between different "current" machine
+ // functions.
+ SPIRVType *getSPIRVTypeForVReg(Register VReg,
+ const MachineFunction *MF = nullptr) const;
// Whether the given VReg has a SPIR-V type mapped to it yet.
bool hasSPIRVTypeForVReg(Register VReg) const {
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 8db54c7..b8296c3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -88,19 +88,24 @@ static void validatePtrTypes(const SPIRVSubtarget &STI,
MachineRegisterInfo *MRI, SPIRVGlobalRegistry &GR,
MachineInstr &I, unsigned OpIdx,
SPIRVType *ResType, const Type *ResTy = nullptr) {
+ // Get operand type
+ MachineFunction *MF = I.getParent()->getParent();
Register OpReg = I.getOperand(OpIdx).getReg();
SPIRVType *TypeInst = MRI->getVRegDef(OpReg);
- SPIRVType *OpType = GR.getSPIRVTypeForVReg(
+ Register OpTypeReg =
TypeInst && TypeInst->getOpcode() == SPIRV::OpFunctionParameter
? TypeInst->getOperand(1).getReg()
- : OpReg);
+ : OpReg;
+ SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpTypeReg, MF);
if (!ResType || !OpType || OpType->getOpcode() != SPIRV::OpTypePointer)
return;
- SPIRVType *ElemType = GR.getSPIRVTypeForVReg(OpType->getOperand(2).getReg());
+ // Get operand's pointee type
+ Register ElemTypeReg = OpType->getOperand(2).getReg();
+ SPIRVType *ElemType = GR.getSPIRVTypeForVReg(ElemTypeReg, MF);
if (!ElemType)
return;
- bool IsSameMF =
- ElemType->getParent()->getParent() == ResType->getParent()->getParent();
+ // Check if we need a bitcast to make a statement valid
+ bool IsSameMF = MF == ResType->getParent()->getParent();
bool IsEqualTypes = IsSameMF ? ElemType == ResType
: GR.getTypeForSPIRVType(ElemType) == ResTy;
if (IsEqualTypes)
@@ -156,7 +161,8 @@ void validateFunCallMachineDef(const SPIRVSubtarget &STI,
SPIRVType *DefPtrType = DefMRI->getVRegDef(FunDef->getOperand(1).getReg());
SPIRVType *DefElemType =
DefPtrType && DefPtrType->getOpcode() == SPIRV::OpTypePointer
- ? GR.getSPIRVTypeForVReg(DefPtrType->getOperand(2).getReg())
+ ? GR.getSPIRVTypeForVReg(DefPtrType->getOperand(2).getReg(),
+ DefPtrType->getParent()->getParent())
: nullptr;
if (DefElemType) {
const Type *DefElemTy = GR.getTypeForSPIRVType(DefElemType);
@@ -177,7 +183,7 @@ void validateFunCallMachineDef(const SPIRVSubtarget &STI,
// with a processed definition. Return Function pointer if it's a forward
// call (ahead of definition), and nullptr otherwise.
const Function *validateFunCall(const SPIRVSubtarget &STI,
- MachineRegisterInfo *MRI,
+ MachineRegisterInfo *CallMRI,
SPIRVGlobalRegistry &GR,
MachineInstr &FunCall) {
const GlobalValue *GV = FunCall.getOperand(2).getGlobal();
@@ -186,7 +192,8 @@ const Function *validateFunCall(const SPIRVSubtarget &STI,
const_cast<MachineInstr *>(GR.getFunctionDefinition(F));
if (!FunDef)
return F;
- validateFunCallMachineDef(STI, MRI, MRI, GR, FunCall, FunDef);
+ MachineRegisterInfo *DefMRI = &FunDef->getParent()->getParent()->getRegInfo();
+ validateFunCallMachineDef(STI, DefMRI, CallMRI, GR, FunCall, FunDef);
return nullptr;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
index e3f7641..af98f2f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
@@ -247,8 +247,10 @@ void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool SPIRVInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == SPIRV::GET_ID || MI.getOpcode() == SPIRV::GET_fID ||
- MI.getOpcode() == SPIRV::GET_pID || MI.getOpcode() == SPIRV::GET_vfID ||
- MI.getOpcode() == SPIRV::GET_vID) {
+ MI.getOpcode() == SPIRV::GET_pID32 ||
+ MI.getOpcode() == SPIRV::GET_pID64 || MI.getOpcode() == SPIRV::GET_vfID ||
+ MI.getOpcode() == SPIRV::GET_vID || MI.getOpcode() == SPIRV::GET_vpID32 ||
+ MI.getOpcode() == SPIRV::GET_vpID64) {
auto &MRI = MI.getMF()->getRegInfo();
MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.eraseFromParent();
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 99c57da..151d0ec 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -19,9 +19,12 @@ let isCodeGenOnly=1 in {
def DECL_TYPE: Pseudo<(outs ANYID:$dst_id), (ins ANYID:$src_id, TYPE:$src_ty)>;
def GET_ID: Pseudo<(outs ID:$dst_id), (ins ANYID:$src)>;
def GET_fID: Pseudo<(outs fID:$dst_id), (ins ANYID:$src)>;
- def GET_pID: Pseudo<(outs pID:$dst_id), (ins ANYID:$src)>;
+ def GET_pID32: Pseudo<(outs pID32:$dst_id), (ins ANYID:$src)>;
+ def GET_pID64: Pseudo<(outs pID64:$dst_id), (ins ANYID:$src)>;
def GET_vID: Pseudo<(outs vID:$dst_id), (ins ANYID:$src)>;
def GET_vfID: Pseudo<(outs vfID:$dst_id), (ins ANYID:$src)>;
+ def GET_vpID32: Pseudo<(outs vpID32:$dst_id), (ins ANYID:$src)>;
+ def GET_vpID64: Pseudo<(outs vpID64:$dst_id), (ins ANYID:$src)>;
}
def SPVTypeBin : SDTypeProfile<1, 2, []>;
@@ -55,7 +58,7 @@ multiclass BinOpTypedGen<string name, bits<16> opCode, SDNode node, bit genF = 0
}
}
-multiclass TernOpTypedGen<string name, bits<16> opCode, SDNode node, bit genI = 1, bit genF = 0, bit genV = 0> {
+multiclass TernOpTypedGen<string name, bits<16> opCode, SDNode node, bit genP = 1, bit genI = 1, bit genF = 0, bit genV = 0> {
if genF then {
def SFSCond: TernOpTyped<name, opCode, ID, fID, node>;
def SFVCond: TernOpTyped<name, opCode, vID, fID, node>;
@@ -64,6 +67,12 @@ multiclass TernOpTypedGen<string name, bits<16> opCode, SDNode node, bit genI =
def SISCond: TernOpTyped<name, opCode, ID, ID, node>;
def SIVCond: TernOpTyped<name, opCode, vID, ID, node>;
}
+ if genP then {
+ def SPSCond32: TernOpTyped<name, opCode, ID, pID32, node>;
+ def SPVCond32: TernOpTyped<name, opCode, vID, pID32, node>;
+ def SPSCond64: TernOpTyped<name, opCode, ID, pID64, node>;
+ def SPVCond64: TernOpTyped<name, opCode, vID, pID64, node>;
+ }
if genV then {
if genF then {
def VFSCond: TernOpTyped<name, opCode, ID, vfID, node>;
@@ -73,6 +82,12 @@ multiclass TernOpTypedGen<string name, bits<16> opCode, SDNode node, bit genI =
def VISCond: TernOpTyped<name, opCode, ID, vID, node>;
def VIVCond: TernOpTyped<name, opCode, vID, vID, node>;
}
+ if genP then {
+ def VPSCond32: TernOpTyped<name, opCode, ID, vpID32, node>;
+ def VPVCond32: TernOpTyped<name, opCode, vID, vpID32, node>;
+ def VPSCond64: TernOpTyped<name, opCode, ID, vpID64, node>;
+ def VPVCond64: TernOpTyped<name, opCode, vID, vpID64, node>;
+ }
}
}
@@ -552,7 +567,7 @@ def OpLogicalOr: BinOp<"OpLogicalOr", 166>;
def OpLogicalAnd: BinOp<"OpLogicalAnd", 167>;
def OpLogicalNot: UnOp<"OpLogicalNot", 168>;
-defm OpSelect: TernOpTypedGen<"OpSelect", 169, select, 1, 1, 1>;
+defm OpSelect: TernOpTypedGen<"OpSelect", 169, select, 1, 1, 1, 1>;
def OpIEqual: BinOp<"OpIEqual", 170>;
def OpINotEqual: BinOp<"OpINotEqual", 171>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index c1c0fc4..72e5a7b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -145,9 +145,15 @@ private:
bool selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectAnyOrAll(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, unsigned OpType) const;
+
bool selectAll(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectAny(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
bool selectBitreverse(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
@@ -284,14 +290,18 @@ bool SPIRVInstructionSelector::select(MachineInstr &I) {
// If it's not a GMIR instruction, we've selected it already.
if (!isPreISelGenericOpcode(Opcode)) {
if (Opcode == SPIRV::ASSIGN_TYPE) { // These pseudos aren't needed any more.
- auto *Def = MRI->getVRegDef(I.getOperand(1).getReg());
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
+ auto *Def = MRI->getVRegDef(SrcReg);
if (isTypeFoldingSupported(Def->getOpcode())) {
+ if (MRI->getType(DstReg).isPointer())
+ MRI->setType(DstReg, LLT::scalar(32));
bool Res = selectImpl(I, *CoverageInfo);
assert(Res || Def->getOpcode() == TargetOpcode::G_CONSTANT);
if (Res)
return Res;
}
- MRI->replaceRegWith(I.getOperand(1).getReg(), I.getOperand(0).getReg());
+ MRI->replaceRegWith(SrcReg, DstReg);
I.removeFromParent();
return true;
} else if (I.getNumDefs() == 1) {
@@ -1160,9 +1170,10 @@ static unsigned getBoolCmpOpcode(unsigned PredNum) {
}
}
-bool SPIRVInstructionSelector::selectAll(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I) const {
+bool SPIRVInstructionSelector::selectAnyOrAll(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ unsigned OpAnyOrAll) const {
assert(I.getNumOperands() == 3);
assert(I.getOperand(2).isReg());
MachineBasicBlock &BB = *I.getParent();
@@ -1212,13 +1223,25 @@ bool SPIRVInstructionSelector::selectAll(Register ResVReg,
if (!IsVectorTy)
return true;
- return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpAll))
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(OpAnyOrAll))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(SpvBoolScalarTy))
.addUse(NotEqualReg)
.constrainAllUses(TII, TRI, RBI);
}
+bool SPIRVInstructionSelector::selectAll(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ return selectAnyOrAll(ResVReg, ResType, I, SPIRV::OpAll);
+}
+
+bool SPIRVInstructionSelector::selectAny(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ return selectAnyOrAll(ResVReg, ResType, I, SPIRV::OpAny);
+}
+
bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
@@ -1877,6 +1900,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectSpvThreadId(ResVReg, ResType, I);
case Intrinsic::spv_all:
return selectAll(ResVReg, ResType, I);
+ case Intrinsic::spv_any:
+ return selectAny(ResVReg, ResType, I);
case Intrinsic::spv_lifetime_start:
case Intrinsic::spv_lifetime_end: {
unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart
diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
index b9d66de..d652b5d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
@@ -55,8 +55,9 @@ extern void processInstr(MachineInstr &MI, MachineIRBuilder &MIB,
static bool isMetaInstrGET(unsigned Opcode) {
return Opcode == SPIRV::GET_ID || Opcode == SPIRV::GET_fID ||
- Opcode == SPIRV::GET_pID || Opcode == SPIRV::GET_vID ||
- Opcode == SPIRV::GET_vfID;
+ Opcode == SPIRV::GET_pID32 || Opcode == SPIRV::GET_pID64 ||
+ Opcode == SPIRV::GET_vID || Opcode == SPIRV::GET_vfID ||
+ Opcode == SPIRV::GET_vpID32 || Opcode == SPIRV::GET_vpID64;
}
static bool mayBeInserted(unsigned Opcode) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 7e155a3..d16f6d5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -64,9 +64,16 @@ static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) {
auto *BuildVec = MRI.getVRegDef(MI.getOperand(2).getReg());
assert(BuildVec &&
BuildVec->getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- for (unsigned i = 0; i < ConstVec->getNumElements(); ++i)
- GR->add(ConstVec->getElementAsConstant(i), &MF,
- BuildVec->getOperand(1 + i).getReg());
+ for (unsigned i = 0; i < ConstVec->getNumElements(); ++i) {
+ // Ensure that OpConstantComposite reuses a constant when it's
+ // already created and available in the same machine function.
+ Constant *ElemConst = ConstVec->getElementAsConstant(i);
+ Register ElemReg = GR->find(ElemConst, &MF);
+ if (!ElemReg.isValid())
+ GR->add(ElemConst, &MF, BuildVec->getOperand(1 + i).getReg());
+ else
+ BuildVec->getOperand(1 + i).setReg(ElemReg);
+ }
}
GR->add(Const, &MF, MI.getOperand(2).getReg());
} else {
@@ -164,6 +171,12 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
// %1 = G_GLOBAL_VALUE
// %2 = COPY %1
// %3 = G_ADDRSPACE_CAST %2
+//
+// or
+//
+// %1 = G_ZEXT %2
+// G_MEMCPY ... %2 ...
+//
// New registers have no SPIRVType and no register class info.
//
// Set SPIRVType for GV, propagate it from GV to other instructions,
@@ -193,6 +206,24 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR,
SpirvTy = GR->getOrCreateSPIRVType(Ty, MIB);
break;
}
+ case TargetOpcode::G_ZEXT: {
+ if (MI->getOperand(1).isReg()) {
+ if (MachineInstr *DefInstr =
+ MRI.getVRegDef(MI->getOperand(1).getReg())) {
+ if (SPIRVType *Def = propagateSPIRVType(DefInstr, GR, MRI, MIB)) {
+ unsigned CurrentBW = GR->getScalarOrVectorBitWidth(Def);
+ unsigned ExpectedBW =
+ std::max(MRI.getType(Reg).getScalarSizeInBits(), CurrentBW);
+ unsigned NumElements = GR->getScalarOrVectorComponentCount(Def);
+ SpirvTy = GR->getOrCreateSPIRVIntegerType(ExpectedBW, MIB);
+ if (NumElements > 1)
+ SpirvTy =
+ GR->getOrCreateSPIRVVectorType(SpirvTy, NumElements, MIB);
+ }
+ }
+ }
+ break;
+ }
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_ADDRSPACE_CAST:
case TargetOpcode::G_PTR_ADD:
@@ -216,11 +247,12 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR,
}
static std::pair<Register, unsigned>
-createNewIdReg(Register ValReg, unsigned Opcode, MachineRegisterInfo &MRI,
+createNewIdReg(SPIRVType *SpvType, Register SrcReg, MachineRegisterInfo &MRI,
const SPIRVGlobalRegistry &GR) {
- LLT NewT = LLT::scalar(32);
- SPIRVType *SpvType = GR.getSPIRVTypeForVReg(ValReg);
+ if (!SpvType)
+ SpvType = GR.getSPIRVTypeForVReg(SrcReg);
assert(SpvType && "VReg is expected to have SPIRV type");
+ LLT NewT = LLT::scalar(32);
bool IsFloat = SpvType->getOpcode() == SPIRV::OpTypeFloat;
bool IsVectorFloat =
SpvType->getOpcode() == SPIRV::OpTypeVector &&
@@ -229,14 +261,38 @@ createNewIdReg(Register ValReg, unsigned Opcode, MachineRegisterInfo &MRI,
IsFloat |= IsVectorFloat;
auto GetIdOp = IsFloat ? SPIRV::GET_fID : SPIRV::GET_ID;
auto DstClass = IsFloat ? &SPIRV::fIDRegClass : &SPIRV::IDRegClass;
- if (MRI.getType(ValReg).isPointer()) {
- NewT = LLT::pointer(0, 32);
- GetIdOp = SPIRV::GET_pID;
- DstClass = &SPIRV::pIDRegClass;
- } else if (MRI.getType(ValReg).isVector()) {
+ if (MRI.getType(SrcReg).isPointer()) {
+ unsigned PtrSz = GR.getPointerSize();
+ NewT = LLT::pointer(0, PtrSz);
+ bool IsVec = MRI.getType(SrcReg).isVector();
+ if (IsVec)
+ NewT = LLT::fixed_vector(2, NewT);
+ if (PtrSz == 64) {
+ if (IsVec) {
+ GetIdOp = SPIRV::GET_vpID64;
+ DstClass = &SPIRV::vpID64RegClass;
+ } else {
+ GetIdOp = SPIRV::GET_pID64;
+ DstClass = &SPIRV::pID64RegClass;
+ }
+ } else {
+ if (IsVec) {
+ GetIdOp = SPIRV::GET_vpID32;
+ DstClass = &SPIRV::vpID32RegClass;
+ } else {
+ GetIdOp = SPIRV::GET_pID32;
+ DstClass = &SPIRV::pID32RegClass;
+ }
+ }
+ } else if (MRI.getType(SrcReg).isVector()) {
NewT = LLT::fixed_vector(2, NewT);
- GetIdOp = IsFloat ? SPIRV::GET_vfID : SPIRV::GET_vID;
- DstClass = IsFloat ? &SPIRV::vfIDRegClass : &SPIRV::vIDRegClass;
+ if (IsFloat) {
+ GetIdOp = SPIRV::GET_vfID;
+ DstClass = &SPIRV::vfIDRegClass;
+ } else {
+ GetIdOp = SPIRV::GET_vID;
+ DstClass = &SPIRV::vIDRegClass;
+ }
}
Register IdReg = MRI.createGenericVirtualRegister(NewT);
MRI.setRegClass(IdReg, DstClass);
@@ -257,6 +313,7 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
MIB.setInsertPt(*Def->getParent(),
(Def->getNextNode() ? Def->getNextNode()->getIterator()
: Def->getParent()->end()));
+ SpirvTy = SpirvTy ? SpirvTy : GR->getOrCreateSPIRVType(Ty, MIB);
Register NewReg = MRI.createGenericVirtualRegister(MRI.getType(Reg));
if (auto *RC = MRI.getRegClassOrNull(Reg)) {
MRI.setRegClass(NewReg, RC);
@@ -264,7 +321,6 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
MRI.setRegClass(NewReg, &SPIRV::IDRegClass);
MRI.setRegClass(Reg, &SPIRV::IDRegClass);
}
- SpirvTy = SpirvTy ? SpirvTy : GR->getOrCreateSPIRVType(Ty, MIB);
GR->assignSPIRVTypeToVReg(SpirvTy, Reg, MIB.getMF());
// This is to make it convenient for Legalizer to get the SPIRVType
// when processing the actual MI (i.e. not pseudo one).
@@ -283,11 +339,11 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
void processInstr(MachineInstr &MI, MachineIRBuilder &MIB,
MachineRegisterInfo &MRI, SPIRVGlobalRegistry *GR) {
- unsigned Opc = MI.getOpcode();
assert(MI.getNumDefs() > 0 && MRI.hasOneUse(MI.getOperand(0).getReg()));
MachineInstr &AssignTypeInst =
*(MRI.use_instr_begin(MI.getOperand(0).getReg()));
- auto NewReg = createNewIdReg(MI.getOperand(0).getReg(), Opc, MRI, *GR).first;
+ auto NewReg =
+ createNewIdReg(nullptr, MI.getOperand(0).getReg(), MRI, *GR).first;
AssignTypeInst.getOperand(1).setReg(NewReg);
MI.getOperand(0).setReg(NewReg);
MIB.setInsertPt(*MI.getParent(),
@@ -296,7 +352,7 @@ void processInstr(MachineInstr &MI, MachineIRBuilder &MIB,
for (auto &Op : MI.operands()) {
if (!Op.isReg() || Op.isDef())
continue;
- auto IdOpInfo = createNewIdReg(Op.getReg(), Opc, MRI, *GR);
+ auto IdOpInfo = createNewIdReg(nullptr, Op.getReg(), MRI, *GR);
MIB.buildInstr(IdOpInfo.second).addDef(IdOpInfo.first).addUse(Op.getReg());
Op.setReg(IdOpInfo.first);
}
@@ -383,6 +439,7 @@ static void generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
}
insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MRI);
} else if (MI.getOpcode() == TargetOpcode::G_TRUNC ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT ||
MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE ||
MI.getOpcode() == TargetOpcode::COPY ||
MI.getOpcode() == TargetOpcode::G_ADDRSPACE_CAST) {
@@ -412,6 +469,7 @@ static void processInstrsWithTypeFolding(MachineFunction &MF,
processInstr(MI, MIB, MRI, GR);
}
}
+
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
// We need to rewrite dst types for ASSIGN_TYPE instrs to be able
@@ -424,16 +482,18 @@ static void processInstrsWithTypeFolding(MachineFunction &MF,
if (!isTypeFoldingSupported(Opcode))
continue;
Register DstReg = MI.getOperand(0).getReg();
- if (MRI.getType(DstReg).isVector())
+ bool IsDstPtr = MRI.getType(DstReg).isPointer();
+ if (IsDstPtr || MRI.getType(DstReg).isVector())
MRI.setRegClass(DstReg, &SPIRV::IDRegClass);
// Don't need to reset type of register holding constant and used in
- // G_ADDRSPACE_CAST, since it braaks legalizer.
+ // G_ADDRSPACE_CAST, since it breaks legalizer.
if (Opcode == TargetOpcode::G_CONSTANT && MRI.hasOneUse(DstReg)) {
MachineInstr &UseMI = *MRI.use_instr_begin(DstReg);
if (UseMI.getOpcode() == TargetOpcode::G_ADDRSPACE_CAST)
continue;
}
- MRI.setType(DstReg, LLT::scalar(32));
+ MRI.setType(DstReg, IsDstPtr ? LLT::pointer(0, GR->getPointerSize())
+ : LLT::scalar(32));
}
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp
index 9bf9d7f..ecd99f1 100644
--- a/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp
@@ -27,21 +27,7 @@ using namespace llvm;
const RegisterBank &
SPIRVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT Ty) const {
- switch (RC.getID()) {
- case SPIRV::TYPERegClassID:
+ if (RC.getID() == SPIRV::TYPERegClassID)
return SPIRV::TYPERegBank;
- case SPIRV::pIDRegClassID:
- case SPIRV::IDRegClassID:
- return SPIRV::IDRegBank;
- case SPIRV::fIDRegClassID:
- return SPIRV::fIDRegBank;
- case SPIRV::vIDRegClassID:
- return SPIRV::vIDRegBank;
- case SPIRV::vfIDRegClassID:
- return SPIRV::vfIDRegBank;
- case SPIRV::ANYIDRegClassID:
- case SPIRV::ANYRegClassID:
- return SPIRV::IDRegBank;
- }
- llvm_unreachable("Unknown register class");
+ return SPIRV::IDRegBank;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td b/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td
index 90c7f3a..dea2ef4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td
@@ -8,8 +8,6 @@
// Although RegisterBankSelection is disabled we need to distinct the banks
// as InstructionSelector RegClass checking code relies on them
-def IDRegBank : RegisterBank<"IDBank", [ID]>;
-def fIDRegBank : RegisterBank<"fIDBank", [fID]>;
-def vIDRegBank : RegisterBank<"vIDBank", [vID]>;
-def vfIDRegBank : RegisterBank<"vfIDBank", [vfID]>;
+
def TYPERegBank : RegisterBank<"TYPEBank", [TYPE]>;
+def IDRegBank : RegisterBank<"IDBank", [ID, fID, pID32, pID64, vID, vfID, vpID32, vpID64]>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td
index d0b64b6..9231d22 100644
--- a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td
@@ -11,26 +11,46 @@
//===----------------------------------------------------------------------===//
let Namespace = "SPIRV" in {
- def p0 : PtrValueType <i32, 0>;
- // All registers are for 32-bit identifiers, so have a single dummy register
+ // Pointer types for patterns with the GlobalISelEmitter
+ def p32 : PtrValueType <i32, 0>;
+ def p64 : PtrValueType <i64, 0>;
- // Class for registers that are the result of OpTypeXXX instructions
+ class VTPtrVec<int nelem, PtrValueType ptr>
+ : VTVec<nelem, ValueType<ptr.Size, ptr.Value>, ptr.Value> {
+ int isPointer = true;
+ }
+
+ def v2p32 : VTPtrVec<2, p32>;
+ def v2p64 : VTPtrVec<2, p64>;
+
+ // Class for type registers
def TYPE0 : Register<"TYPE0">;
def TYPE : RegisterClass<"SPIRV", [i32], 32, (add TYPE0)>;
- // Class for every other non-type ID
+ // Class for non-type registers
def ID0 : Register<"ID0">;
+ def fID0 : Register<"fID0">;
+ def pID320 : Register<"pID320">;
+ def pID640 : Register<"pID640">;
+ def vID0 : Register<"vID0">;
+ def vfID0 : Register<"vfID0">;
+ def vpID320 : Register<"vpID320">;
+ def vpID640 : Register<"vpID640">;
+
def ID : RegisterClass<"SPIRV", [i32], 32, (add ID0)>;
- def fID0 : Register<"FID0">;
def fID : RegisterClass<"SPIRV", [f32], 32, (add fID0)>;
- def pID0 : Register<"pID0">;
- def pID : RegisterClass<"SPIRV", [p0], 32, (add pID0)>;
- def vID0 : Register<"pID0">;
+ def pID32 : RegisterClass<"SPIRV", [p32], 32, (add pID320)>;
+ def pID64 : RegisterClass<"SPIRV", [p64], 32, (add pID640)>;
def vID : RegisterClass<"SPIRV", [v2i32], 32, (add vID0)>;
- def vfID0 : Register<"pID0">;
def vfID : RegisterClass<"SPIRV", [v2f32], 32, (add vfID0)>;
+ def vpID32 : RegisterClass<"SPIRV", [v2p32], 32, (add vpID320)>;
+ def vpID64 : RegisterClass<"SPIRV", [v2p64], 32, (add vpID640)>;
- def ANYID : RegisterClass<"SPIRV", [i32, f32, p0, v2i32, v2f32], 32, (add ID, fID, pID, vID, vfID)>;
+ def ANYID : RegisterClass<
+ "SPIRV",
+ [i32, f32, p32, p64, v2i32, v2f32, v2p32, v2p64],
+ 32,
+ (add ID0, fID0, pID320, pID640, vID0, vfID0, vpID320, vpID640)>;
// A few instructions like OpName can take ids from both type and non-type
// instructions, so we need a super-class to allow for both to count as valid
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index fbf64f2..ae8baa3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -55,9 +55,9 @@ static std::string computeDataLayout(const Triple &TT) {
// mean anything.
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
}
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 299a4341..2e44c20 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -251,7 +251,8 @@ bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID) {
}
Type *getMDOperandAsType(const MDNode *N, unsigned I) {
- return cast<ValueAsMetadata>(N->getOperand(I))->getType();
+ Type *ElementTy = cast<ValueAsMetadata>(N->getOperand(I))->getType();
+ return toTypedPointer(ElementTy, N->getContext());
}
// The set of names is borrowed from the SPIR-V translator.
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index c2c3475..cd1a2af 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -149,5 +149,12 @@ inline Type *reconstructFunctionType(Function *F) {
return FunctionType::get(F->getReturnType(), ArgTys, F->isVarArg());
}
+inline Type *toTypedPointer(Type *Ty, LLVMContext &Ctx) {
+ return isUntypedPointerTy(Ty)
+ ? TypedPointerType::get(IntegerType::getInt8Ty(Ctx),
+ getPointerAddressSpace(Ty))
+ : Ty;
+}
+
} // namespace llvm
#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e79242..4d68f93 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
}
let rd = 0 in {
- let Defs = [CPSR] in {
- def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Uses = [CPSR] in {
+ def STCSRrr : F3_1<3, 0b110101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"st %csr, [$addr]", [], IIC_st>;
- def STCSRri : F3_2<3, 0b110101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STCSRri : F3_2<3, 0b110101, (outs), (ins (MEMri $rs1, $simm13):$addr),
"st %csr, [$addr]", [], IIC_st>;
}
- let Defs = [CPQ] in {
- def STDCQrr : F3_1<3, 0b110110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Uses = [CPQ] in {
+ def STDCQrr : F3_1<3, 0b110110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"std %cq, [$addr]", [], IIC_std>;
- def STDCQri : F3_2<3, 0b110110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STDCQri : F3_2<3, 0b110110, (outs), (ins (MEMri $rs1, $simm13):$addr),
"std %cq, [$addr]", [], IIC_std>;
}
}
let rd = 0 in {
- let Defs = [FSR] in {
- def STFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Uses = [FSR] in {
+ def STFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"st %fsr, [$addr]", [], IIC_st>;
- def STFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
"st %fsr, [$addr]", [], IIC_st>;
}
- let Defs = [FQ] in {
- def STDFQrr : F3_1<3, 0b100110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Defs = [FQ] in {
+ def STDFQrr : F3_1<3, 0b100110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"std %fq, [$addr]", [], IIC_std>;
- def STDFQri : F3_2<3, 0b100110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STDFQri : F3_2<3, 0b100110, (outs), (ins (MEMri $rs1, $simm13):$addr),
"std %fq, [$addr]", [], IIC_std>;
}
}
-let rd = 1, Defs = [FSR] in {
- def STXFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+let rd = 1, mayStore = 1, Uses = [FSR] in {
+ def STXFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"stx %fsr, [$addr]", []>, Requires<[HasV9]>;
- def STXFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STXFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
"stx %fsr, [$addr]", []>, Requires<[HasV9]>;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 4897b37..50ecd6e 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -824,10 +824,7 @@ void SystemZELFFrameLowering::inlineStackProbe(
StackAllocMI->eraseFromParent();
if (DoneMBB != nullptr) {
// Compute the live-in lists for the new blocks.
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*DoneMBB) || recomputeLiveIns(*LoopMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({DoneMBB, LoopMBB});
}
}
@@ -1425,10 +1422,7 @@ void SystemZXPLINKFrameLowering::inlineStackProbe(
StackAllocMI->eraseFromParent();
// Compute the live-in lists for the new blocks.
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*StackExtMBB) || recomputeLiveIns(*NextMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({StackExtMBB, NextMBB});
}
bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3b3057f..5c2579f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -7818,7 +7818,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
break;
}
case Intrinsic::s390_vperm:
- SrcDemE = APInt(NumElts, 1);
+ SrcDemE = APInt(NumElts, -1);
break;
default:
llvm_unreachable("Unhandled intrinsic.");
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index a7fe329..8ddc742 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -43,6 +43,12 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
if (getTargetTriple().getArch() != Triple::x86_64)
return false;
+ // Remaining logic below is ELF-specific. For other object file formats where
+ // the large code model is mostly used for JIT compilation, just look at the
+ // code model.
+ if (!getTargetTriple().isOSBinFormatELF())
+ return getCodeModel() == CodeModel::Large;
+
auto *GO = GVal->getAliaseeObject();
// Be conservative if we can't find an underlying GlobalObject.
@@ -51,9 +57,20 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
auto *GV = dyn_cast<GlobalVariable>(GO);
+ auto IsPrefix = [](StringRef Name, StringRef Prefix) {
+ return Name.consume_front(Prefix) && (Name.empty() || Name[0] == '.');
+ };
+
// Functions/GlobalIFuncs are only large under the large code model.
- if (!GV)
+ if (!GV) {
+ // Handle explicit sections as we do for GlobalVariables with an explicit
+ // section, see comments below.
+ if (GO->hasSection()) {
+ StringRef Name = GO->getSection();
+ return IsPrefix(Name, ".ltext");
+ }
return getCodeModel() == CodeModel::Large;
+ }
if (GV->isThreadLocal())
return false;
@@ -73,11 +90,8 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
// data sections. The code model attribute overrides this above.
if (GV->hasSection()) {
StringRef Name = GV->getSection();
- auto IsPrefix = [&](StringRef Prefix) {
- StringRef S = Name;
- return S.consume_front(Prefix) && (S.empty() || S[0] == '.');
- };
- return IsPrefix(".lbss") || IsPrefix(".ldata") || IsPrefix(".lrodata");
+ return IsPrefix(Name, ".lbss") || IsPrefix(Name, ".ldata") ||
+ IsPrefix(Name, ".lrodata");
}
// Respect large data threshold for medium and large code models.
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index e7c9e60..9e85424 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -13,10 +13,13 @@
#include "X86RegisterBankInfo.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/IntrinsicsX86.h"
#define GET_TARGET_REGBANK_IMPL
#include "X86GenRegisterBank.inc"
@@ -68,6 +71,98 @@ X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
llvm_unreachable("Unsupported register kind yet.");
}
+// \returns true if a given intrinsic only uses and defines FPRs.
+static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) {
+ // TODO: Add more intrinsics.
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ default:
+ return false;
+ // SSE1
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rcp_ps:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ case Intrinsic::x86_sse_rsqrt_ps:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_max_ps:
+ return true;
+ }
+ return false;
+}
+
+bool X86RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ unsigned Op = MI.getOpcode();
+ if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
+ return true;
+
+ // Do we have an explicit floating point instruction?
+ if (isPreISelGenericFloatingPointOpcode(Op))
+ return true;
+
+ // No. Check if we have a copy-like instruction. If we do, then we could
+ // still be fed by floating point instructions.
+ if (Op != TargetOpcode::COPY && !MI.isPHI() &&
+ !isPreISelGenericOptimizationHint(Op))
+ return false;
+
+ // Check if we already know the register bank.
+ auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+ if (RB == &getRegBank(X86::PSRRegBankID))
+ return true;
+ if (RB == &getRegBank(X86::GPRRegBankID))
+ return false;
+
+ // We don't know anything.
+ //
+ // If we have a phi, we may be able to infer that it will be assigned a fp
+ // type based off of its inputs.
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+ return Op.isReg() &&
+ onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+ });
+}
+
+bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
+bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
X86GenRegisterBankInfo::PartialMappingIdx
X86GenRegisterBankInfo::getPartialMappingIdx(const MachineInstr &MI,
const LLT &Ty, bool isFP) {
@@ -180,11 +275,13 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI,
const RegisterBankInfo::InstructionMapping &
X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Opc = MI.getOpcode();
- // Try the default logic for non-generic instructions that are either copies
- // or already have some operands assigned to banks.
+ // Try the default logic for non-generic instructions that are either
+ // copies or already have some operands assigned to banks.
if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) {
const InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
@@ -221,13 +318,14 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCONSTANT:
- // Instruction having only floating-point operands (all scalars in VECRReg)
+ // Instruction having only floating-point operands (all scalars in
+ // VECRReg)
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
break;
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_FPTOSI: {
- // Some of the floating-point instructions have mixed GPR and FP operands:
- // fine-tune the computed mapping.
+ // Some of the floating-point instructions have mixed GPR and FP
+ // operands: fine-tune the computed mapping.
auto &Op0 = MI.getOperand(0);
auto &Op1 = MI.getOperand(1);
const LLT Ty0 = MRI.getType(Op0.getReg());
@@ -271,9 +369,36 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ isFPTrunc || isFPAnyExt,
OpRegBankIdx);
- } break;
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ bool IsFP = any_of(MRI.use_nodbg_instructions(cast<GLoad>(MI).getDstReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP
+ // instruction, assume this was a floating point load
+ // in the IR. If it was not, we would have had a
+ // bitcast before reaching that instruction.
+ return onlyUsesFP(UseMI, MRI, TRI);
+ });
+ getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+ break;
+ }
+ case TargetOpcode::G_STORE: {
+ // Check if that store is fed by fp instructions.
+ Register VReg = cast<GStore>(MI).getValueReg();
+ if (!VReg)
+ break;
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ bool IsFP = onlyDefinesFP(*DefMI, MRI, TRI);
+ getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+ break;
+ }
default:
- // Track the bank of each register, use NotFP mapping (all scalars in GPRs)
+ // Track the bank of each register, use NotFP mapping (all scalars in
+ // GPRs)
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ false, OpRegBankIdx);
break;
}
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
index 989c595..8f38e71 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
@@ -62,6 +62,22 @@ private:
const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
SmallVectorImpl<const ValueMapping *> &OpdsMapping);
+ // Maximum recursion depth for hasFPConstraints.
+ const unsigned MaxFPRSearchDepth = 2;
+
+ /// \returns true if \p MI only uses and defines FPRs.
+ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth = 0) const;
+
+ /// \returns true if \p MI only uses FPRs.
+ bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
+ /// \returns true if \p MI only defines FPRs.
+ bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
public:
X86RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index d914e1b..4521401 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -885,10 +885,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
}
// Update Live In information
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*tailMBB) || recomputeLiveIns(*testMBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({tailMBB, testMBB});
}
void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
@@ -1380,11 +1377,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
footMBB->addSuccessor(&MBB);
}
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*footMBB) || recomputeLiveIns(*bodyMBB) ||
- recomputeLiveIns(*headMBB) || recomputeLiveIns(MBB);
- } while (anyChange);
+ fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
}
} else {
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index edfa05a..27107f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1276,6 +1276,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
}
+ if (Subtarget.hasGFNI()) {
+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
+ }
+
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
@@ -1286,11 +1293,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTLZ, VT, Custom);
}
- if (Subtarget.hasGFNI()) {
- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
- }
-
// These might be better off as horizontal vector ops.
setOperationAction(ISD::ADD, MVT::i16, Custom);
setOperationAction(ISD::ADD, MVT::i32, Custom);
@@ -3530,14 +3532,6 @@ static bool isAnyZero(ArrayRef<int> Mask) {
return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
}
-/// Return true if the value of any element in Mask is the zero or undef
-/// sentinel values.
-static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
- return llvm::any_of(Mask, [](int M) {
- return M == SM_SentinelZero || M == SM_SentinelUndef;
- });
-}
-
/// Return true if Val is undef or if its value falls within the
/// specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -3594,6 +3588,17 @@ static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
return llvm::all_of(Mask.slice(Pos, Size), isUndefOrZero);
}
+/// Return true if every element of a single input is referenced by the shuffle
+/// mask. i.e. it just permutes them all.
+static bool isCompletePermute(ArrayRef<int> Mask) {
+ unsigned NumElts = Mask.size();
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (int M : Mask)
+ if (isInRange(M, 0, NumElts))
+ DemandedElts.setBit(M);
+ return DemandedElts.isAllOnes();
+}
+
/// Helper function to test whether a shuffle mask could be
/// simplified by widening the elements being shuffled.
///
@@ -10584,15 +10589,6 @@ static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2,
return true;
}
-static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size,
- int Scale) {
- uint64_t ScaledMask = 0;
- for (int i = 0; i != Size; ++i)
- if (BlendMask & (1ull << i))
- ScaledMask |= ((1ull << Scale) - 1) << (i * Scale);
- return ScaledMask;
-}
-
/// Try to emit a blend instruction for a shuffle.
///
/// This doesn't do any checks for the availability of instructions for blending
@@ -31317,16 +31313,18 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntUnary(Op, DAG, DL);
- // Lower i32/i64 as vXi8 BITREVERSE + BSWAP
+ // Lower i8/i16/i32/i64 as vXi8 BITREVERSE + BSWAP
if (!VT.isVector()) {
- assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
+ assert(
+ (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&
+ "Only tested for i8/i16/i32/i64");
MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
DAG.getBitcast(MVT::v16i8, Res));
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::BSWAP, DL, VT, Res);
+ return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
}
assert(VT.isVector() && VT.getSizeInBits() >= 128);
@@ -40532,14 +40530,15 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
MVT SrcVT = N0.getOperand(0).getSimpleValueType();
if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
SrcVT.getScalarSizeInBits() >= 32) {
- unsigned BlendMask = N.getConstantOperandVal(2);
unsigned Size = VT.getVectorNumElements();
- unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
- BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale);
+ unsigned NewSize = SrcVT.getVectorNumElements();
+ APInt BlendMask = N.getConstantOperandAPInt(2).zextOrTrunc(Size);
+ APInt NewBlendMask = APIntOps::ScaleBitMask(BlendMask, NewSize);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
N1.getOperand(0),
- DAG.getTargetConstant(BlendMask, DL, MVT::i8)));
+ DAG.getTargetConstant(NewBlendMask.getZExtValue(),
+ DL, MVT::i8)));
}
}
return SDValue();
@@ -46465,24 +46464,15 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
if (NumElts <= CmpBits &&
getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs,
ShuffleMask, DAG) &&
- ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) &&
+ ShuffleInputs.size() == 1 && isCompletePermute(ShuffleMask) &&
ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits() &&
scaleShuffleElements(ShuffleMask, NumElts, ScaledMaskUnused)) {
- unsigned NumShuffleElts = ShuffleMask.size();
- APInt DemandedElts = APInt::getZero(NumShuffleElts);
- for (int M : ShuffleMask) {
- assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index");
- DemandedElts.setBit(M);
- }
- if (DemandedElts.isAllOnes()) {
- SDLoc DL(EFLAGS);
- SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
- Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
- Result =
- DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType());
- return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
- EFLAGS.getOperand(1));
- }
+ SDLoc DL(EFLAGS);
+ SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
+ Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+ Result =
+ DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType());
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, EFLAGS.getOperand(1));
}
// MOVMSKPS(V) !=/== 0 -> TESTPS(V,V)
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 27a0c88..e27aa41 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -58,8 +58,8 @@ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
}
let SchedRW = [WriteCMOV, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault],
Predicates = [HasCMOV, HasCF, In64BitMode], mayStore = 1 in
- def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst),
- (ins t.RegClass:$src1, ccode:$cond),
+ def mr : ITy<0x40, MRMDestMemCC, t, (outs),
+ (ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond),
"cfcmov${cond}", unaryop_ndd_args, []>, UseEFLAGS, NF;
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index f27d8d6..41b66aa 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -2453,7 +2453,7 @@ bool AANonNull::isImpliedByIR(Attributor &A, const IRPosition &IRP,
if (llvm::any_of(Worklist, [&](AA::ValueAndContext VAC) {
return !isKnownNonZero(
- VAC.getValue(), /*Depth=*/0,
+ VAC.getValue(),
SimplifyQuery(A.getDataLayout(), DT, AC, VAC.getCtxI()));
}))
return false;
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 14612b2..7ebf265 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1175,7 +1175,7 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
Value *RetVal = FlowsToReturn[i];
// If this value is locally known to be non-null, we're good
- if (isKnownNonZero(RetVal, /*Depth=*/0, DL))
+ if (isKnownNonZero(RetVal, DL))
continue;
// Otherwise, we need to look upwards since we can't make any local
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 07c50d8..c59b867 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -988,7 +988,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
if (C->isOne()) {
if (match(Op0, m_ZExt(m_Add(m_Value(X), m_AllOnes())))) {
const SimplifyQuery Q = SQ.getWithInstruction(&Add);
- if (llvm::isKnownNonZero(X, /*Depth=*/0, Q))
+ if (llvm::isKnownNonZero(X, Q))
return new ZExtInst(X, Ty);
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2c0c4ee..0f4fbf5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1039,9 +1039,9 @@ static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,
match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) &&
(ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) {
auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) {
- if (!isKnownNonZero(NonZero, /*Depth=*/0, Q))
+ if (!isKnownNonZero(NonZero, Q))
std::swap(NonZero, Other);
- return isKnownNonZero(NonZero, /*Depth=*/0, Q);
+ return isKnownNonZero(NonZero, Q);
};
// Given ZeroCmpOp = (A + B)
@@ -2538,6 +2538,8 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
}
}
+ // and(shl(zext(X), Y), SignMask) -> and(sext(X), SignMask)
+ // where Y is a valid shift amount.
if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),
m_SignMask())) &&
match(Y, m_SpecificInt_ICMP(
@@ -2546,15 +2548,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
Ty->getScalarSizeInBits() -
X->getType()->getScalarSizeInBits())))) {
auto *SExt = Builder.CreateSExt(X, Ty, X->getName() + ".signext");
- auto *SanitizedSignMask = cast<Constant>(Op1);
- // We must be careful with the undef elements of the sign bit mask, however:
- // the mask elt can be undef iff the shift amount for that lane was undef,
- // otherwise we need to sanitize undef masks to zero.
- SanitizedSignMask = Constant::replaceUndefsWith(
- SanitizedSignMask, ConstantInt::getNullValue(Ty->getScalarType()));
- SanitizedSignMask =
- Constant::mergeUndefsWith(SanitizedSignMask, cast<Constant>(Y));
- return BinaryOperator::CreateAnd(SExt, SanitizedSignMask);
+ return BinaryOperator::CreateAnd(SExt, Op1);
}
if (Instruction *Z = narrowMaskedBinOp(I))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 20f51c8..60e4be8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -601,8 +601,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
// then change the 'ZeroIsPoison' parameter to 'true'
// because we know the zero behavior can't affect the result.
if (!Known.One.isZero() ||
- isKnownNonZero(Op0, /*Depth=*/0,
- IC.getSimplifyQuery().getWithInstruction(&II))) {
+ isKnownNonZero(Op0, IC.getSimplifyQuery().getWithInstruction(&II))) {
if (!match(II.getArgOperand(1), m_One()))
return IC.replaceOperand(II, 1, IC.Builder.getTrue());
}
@@ -1774,6 +1773,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *I = moveAddAfterMinMax(II, Builder))
return I;
+ // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
+ const APInt *RHSC;
+ if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
+ match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
+ return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
+ ConstantInt::get(II->getType(), *RHSC));
+
// smax(X, -X) --> abs(X)
// smin(X, -X) --> -abs(X)
// umax(X, -X) --> -abs(X)
@@ -1815,7 +1821,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return NewMinMax;
// Try to fold minmax with constant RHS based on range information
- const APInt *RHSC;
if (match(I1, m_APIntAllowUndef(RHSC))) {
ICmpInst::Predicate Pred =
ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
@@ -2061,8 +2066,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// See if we can deduce non-null.
if (!CI.hasRetAttr(Attribute::NonNull) &&
(Known.isNonZero() ||
- isKnownNonZero(II, /*Depth=*/0,
- getSimplifyQuery().getWithInstruction(II)))) {
+ isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
CI.addRetAttr(Attribute::NonNull);
Changed = true;
}
@@ -3408,6 +3412,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return I;
break;
}
+ case Intrinsic::threadlocal_address: {
+ Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
+ MaybeAlign Align = II->getRetAlign();
+ if (MinAlign > Align.valueOrOne()) {
+ II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
+ return II;
+ }
+ break;
+ }
default: {
// Handle target specific intrinsics
std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
@@ -3649,8 +3662,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
for (Value *V : Call.args()) {
if (V->getType()->isPointerTy() &&
!Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
- isKnownNonZero(V, /*Depth=*/0,
- getSimplifyQuery().getWithInstruction(&Call)))
+ isKnownNonZero(V, getSimplifyQuery().getWithInstruction(&Call)))
ArgNos.push_back(ArgNo);
ArgNo++;
}
@@ -3830,7 +3842,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
// isKnownNonNull -> nonnull attribute
if (!GCR.hasRetAttr(Attribute::NonNull) &&
- isKnownNonZero(DerivedPtr, /*Depth=*/0,
+ isKnownNonZero(DerivedPtr,
getSimplifyQuery().getWithInstruction(&Call))) {
GCR.addRetAttr(Attribute::NonNull);
// We discovered new fact, re-check users.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 437e9b9..d242d3f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1977,11 +1977,25 @@ Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
}
Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
- return commonCastTransforms(CI);
+ if (Instruction *R = commonCastTransforms(CI))
+ return R;
+ if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
+ CI.setNonNeg();
+ return &CI;
+ }
+ return nullptr;
}
Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
- return commonCastTransforms(CI);
+ if (Instruction *R = commonCastTransforms(CI))
+ return R;
+ if (isKnownNonNegative(CI.getOperand(0), SQ)) {
+ auto UI =
+ CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
+ UI->setNonNeg(true);
+ return UI;
+ }
+ return nullptr;
}
Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 90550cd..de909077 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1273,12 +1273,12 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
// if X non-zero and NoOverflow(X * Y)
// (icmp eq/ne Y)
- if (!XKnown.One.isZero() || isKnownNonZero(X, /*Depth=*/0, Q))
+ if (!XKnown.One.isZero() || isKnownNonZero(X, Q))
return new ICmpInst(Pred, Y, Cmp.getOperand(1));
// if Y non-zero and NoOverflow(X * Y)
// (icmp eq/ne X)
- if (!YKnown.One.isZero() || isKnownNonZero(Y, /*Depth=*/0, Q))
+ if (!YKnown.One.isZero() || isKnownNonZero(Y, Q))
return new ICmpInst(Pred, X, Cmp.getOperand(1));
}
// Note, we are skipping cases:
@@ -3087,7 +3087,7 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
// (X + -1) <u C --> X <=u C (if X is never null)
if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) {
const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
- if (llvm::isKnownNonZero(X, /*Depth=*/0, Q))
+ if (llvm::isKnownNonZero(X, Q))
return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C));
}
@@ -4275,7 +4275,7 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst::Predicate Pred, Value *Op0,
// Look for: x & ~Mask pred ~Mask
if (isMaskOrZero(X, /*Not=*/true, Q)) {
- return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, /*Depth=*/0, Q);
+ return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, Q);
}
return false;
}
@@ -4779,7 +4779,7 @@ static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
// icmp (X ^ Y_NonZero) s>= X --> icmp (X ^ Y_NonZero) s> X
// icmp (X ^ Y_NonZero) s<= X --> icmp (X ^ Y_NonZero) s< X
CmpInst::Predicate PredOut = CmpInst::getStrictPredicate(Pred);
- if (PredOut != Pred && isKnownNonZero(A, /*Depth=*/0, Q))
+ if (PredOut != Pred && isKnownNonZero(A, Q))
return new ICmpInst(PredOut, Op0, Op1);
return nullptr;
@@ -5062,11 +5062,11 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, C, D);
// (A - B) u>=/u< A --> B u>/u<= A iff B != 0
if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
- isKnownNonZero(B, /*Depth=*/0, Q))
+ isKnownNonZero(B, Q))
return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A);
// C u<=/u> (C - D) --> C u</u>= D iff B != 0
if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
- isKnownNonZero(D, /*Depth=*/0, Q))
+ isKnownNonZero(D, Q))
return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D);
// icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow.
@@ -5108,13 +5108,13 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
// X * Z eq/ne Y * Z -> X eq/ne Y
if (ZKnown.countMaxTrailingZeros() == 0)
return new ICmpInst(Pred, X, Y);
- NonZero = !ZKnown.One.isZero() || isKnownNonZero(Z, /*Depth=*/0, Q);
+ NonZero = !ZKnown.One.isZero() || isKnownNonZero(Z, Q);
// if Z != 0 and nsw(X * Z) and nsw(Y * Z)
// X * Z eq/ne Y * Z -> X eq/ne Y
if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
return new ICmpInst(Pred, X, Y);
} else
- NonZero = isKnownNonZero(Z, /*Depth=*/0, Q);
+ NonZero = isKnownNonZero(Z, Q);
// If Z != 0 and nuw(X * Z) and nuw(Y * Z)
// X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
@@ -8097,6 +8097,14 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
}
+ // fcmp (fadd X, 0.0), Y --> fcmp X, Y
+ if (match(Op0, m_FAdd(m_Value(X), m_AnyZeroFP())))
+ return new FCmpInst(Pred, X, Op1, "", &I);
+
+ // fcmp X, (fadd Y, 0.0) --> fcmp X, Y
+ if (match(Op1, m_FAdd(m_Value(Y), m_AnyZeroFP())))
+ return new FCmpInst(Pred, Op0, Y, "", &I);
+
if (match(Op0, m_FPExt(m_Value(X)))) {
// fcmp (fpext X), (fpext Y) -> fcmp X, Y
if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 4dc1319..7b86fcd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -319,19 +319,12 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
}
// abs(X) * abs(X) -> X * X
- // nabs(X) * nabs(X) -> X * X
- if (Op0 == Op1) {
- Value *X, *Y;
- SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
- if (SPF == SPF_ABS || SPF == SPF_NABS)
- return BinaryOperator::CreateMul(X, X);
-
- if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
- return BinaryOperator::CreateMul(X, X);
- }
+ Value *X;
+ if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
+ return BinaryOperator::CreateMul(X, X);
{
- Value *X, *Y;
+ Value *Y;
// abs(X) * abs(Y) -> abs(X * Y)
if (I.hasNoSignedWrap() &&
match(Op0,
@@ -344,7 +337,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
}
// -X * C --> X * -C
- Value *X, *Y;
+ Value *Y;
Constant *Op1C;
if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Constant(Op1C)))
return BinaryOperator::CreateMul(X, ConstantExpr::getNeg(Op1C));
@@ -631,31 +624,38 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
Value *Op1 = I.getOperand(1);
Value *X, *Y;
Constant *C;
+ BinaryOperator *Op0BinOp;
// Reassociate constant RHS with another constant to form constant
// expression.
- if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
+ if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP() &&
+ match(Op0, m_AllowReassoc(m_BinOp(Op0BinOp)))) {
+ // Everything in this scope folds I with Op0, intersecting their FMF.
+ FastMathFlags FMF = I.getFastMathFlags() & Op0BinOp->getFastMathFlags();
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
Constant *C1;
if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
// (C1 / X) * C --> (C * C1) / X
Constant *CC1 =
ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
if (CC1 && CC1->isNormalFP())
- return BinaryOperator::CreateFDivFMF(CC1, X, &I);
+ return BinaryOperator::CreateFDivFMF(CC1, X, FMF);
}
if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
+ // FIXME: This seems like it should also be checking for arcp
// (X / C1) * C --> X * (C / C1)
Constant *CDivC1 =
ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
if (CDivC1 && CDivC1->isNormalFP())
- return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
+ return BinaryOperator::CreateFMulFMF(X, CDivC1, FMF);
// If the constant was a denormal, try reassociating differently.
// (X / C1) * C --> X / (C1 / C)
Constant *C1DivC =
ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
- return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
+ return BinaryOperator::CreateFDivFMF(X, C1DivC, FMF);
}
// We do not need to match 'fadd C, X' and 'fsub X, C' because they are
@@ -665,26 +665,33 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
// (X + C1) * C --> (X * C) + (C * C1)
if (Constant *CC1 =
ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ Value *XC = Builder.CreateFMul(X, C);
+ return BinaryOperator::CreateFAddFMF(XC, CC1, FMF);
}
}
if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
// (C1 - X) * C --> (C * C1) - (X * C)
if (Constant *CC1 =
ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ Value *XC = Builder.CreateFMul(X, C);
+ return BinaryOperator::CreateFSubFMF(CC1, XC, FMF);
}
}
}
Value *Z;
if (match(&I,
- m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), m_Value(Z)))) {
- // Sink division: (X / Y) * Z --> (X * Z) / Y
- Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
- return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+ m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))),
+ m_Value(Z)))) {
+ BinaryOperator *DivOp = cast<BinaryOperator>(((Z == Op0) ? Op1 : Op0));
+ FastMathFlags FMF = I.getFastMathFlags() & DivOp->getFastMathFlags();
+ if (FMF.allowReassoc()) {
+ // Sink division: (X / Y) * Z --> (X * Z) / Y
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
+ auto *NewFMul = Builder.CreateFMul(X, Z);
+ return BinaryOperator::CreateFDivFMF(NewFMul, Y, FMF);
+ }
}
// sqrt(X) * sqrt(Y) -> sqrt(X * Y)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 9838e2a..52803e9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1537,8 +1537,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator();
Value *VA = PN.getIncomingValue(I);
- if (isKnownNonZero(VA, 0,
- getSimplifyQuery().getWithInstruction(CtxI))) {
+ if (isKnownNonZero(VA, getSimplifyQuery().getWithInstruction(CtxI))) {
if (!NonZeroConst)
NonZeroConst = getAnyNonZeroConstInt(PN);
if (NonZeroConst != VA) {
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4cfafa7..5a144cc 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1431,7 +1431,7 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
if (OpsKnown[OpNo].hasKnownBits() &&
OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
return true;
- return isKnownNonZero(IntOps[OpNo], /*Depth=*/0, SQ);
+ return isKnownNonZero(IntOps[OpNo], SQ);
};
auto IsNonNeg = [&](unsigned OpNo) -> bool {
@@ -3572,6 +3572,38 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
return nullptr;
}
+// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
+// we can prove that both (switch C) and (switch X) go to the default when cond
+// is false/true.
+static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
+ SelectInst *Select,
+ bool IsTrueArm) {
+ unsigned CstOpIdx = IsTrueArm ? 1 : 2;
+ auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
+ if (!C)
+ return nullptr;
+
+ BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
+ if (CstBB != SI.getDefaultDest())
+ return nullptr;
+ Value *X = Select->getOperand(3 - CstOpIdx);
+ ICmpInst::Predicate Pred;
+ const APInt *RHSC;
+ if (!match(Select->getCondition(),
+ m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
+ return nullptr;
+ if (IsTrueArm)
+ Pred = ICmpInst::getInversePredicate(Pred);
+
+ // See whether we can replace the select with X
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
+ for (auto Case : SI.cases())
+ if (!CR.contains(Case.getCaseValue()->getValue()))
+ return nullptr;
+
+ return X;
+}
+
Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
Value *Op0;
@@ -3645,6 +3677,16 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
}
}
+ // Fold switch(select cond, X, Y) into switch(X/Y) if possible
+ if (auto *Select = dyn_cast<SelectInst>(Cond)) {
+ if (Value *V =
+ simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
+ return replaceOperand(SI, 0, V);
+ if (Value *V =
+ simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
+ return replaceOperand(SI, 0, V);
+ }
+
KnownBits Known = computeKnownBits(Cond, 0, &SI);
unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a72b0ee..ee3531b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1281,7 +1281,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// ignored.
return;
}
- if (llvm::isKnownNonZero(ConvertedShadow, /*Depth=*/0, DL)) {
+ if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
// Copy origin as the value is definitely uninitialized.
paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
OriginAlignment);
@@ -1427,7 +1427,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Skip, value is initialized or const shadow is ignored.
continue;
}
- if (llvm::isKnownNonZero(ConvertedShadow, /*Depth=*/0, DL)) {
+ if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
// Report as the value is definitely uninitialized.
insertWarningFn(IRB, ShadowData.Origin);
if (!MS.Recover)
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 200bad2..fcc82ea 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -87,10 +87,6 @@ bool llvm::applyDebugifyMetadata(
return false;
}
- bool NewDebugMode = M.IsNewDbgInfoFormat;
- if (NewDebugMode)
- M.convertFromNewDbgValues();
-
DIBuilder DIB(M);
LLVMContext &Ctx = M.getContext();
auto *Int32Ty = Type::getInt32Ty(Ctx);
@@ -214,9 +210,6 @@ bool llvm::applyDebugifyMetadata(
if (!M.getModuleFlag(DIVersionKey))
M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION);
- if (NewDebugMode)
- M.convertToNewDbgValues();
-
return true;
}
@@ -311,10 +304,6 @@ bool llvm::collectDebugInfoMetadata(Module &M,
return false;
}
- bool NewDebugMode = M.IsNewDbgInfoFormat;
- if (NewDebugMode)
- M.convertFromNewDbgValues();
-
uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size();
// Visit each instruction.
for (Function &F : Functions) {
@@ -349,20 +338,23 @@ bool llvm::collectDebugInfoMetadata(Module &M,
// Cllect dbg.values and dbg.declare.
if (DebugifyLevel > Level::Locations) {
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ auto HandleDbgVariable = [&](auto *DbgVar) {
if (!SP)
- continue;
+ return;
// Skip inlined variables.
- if (I.getDebugLoc().getInlinedAt())
- continue;
+ if (DbgVar->getDebugLoc().getInlinedAt())
+ return;
// Skip undef values.
- if (DVI->isKillLocation())
- continue;
+ if (DbgVar->isKillLocation())
+ return;
- auto *Var = DVI->getVariable();
+ auto *Var = DbgVar->getVariable();
DebugInfoBeforePass.DIVariables[Var]++;
- continue;
- }
+ };
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ HandleDbgVariable(&DVR);
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+ HandleDbgVariable(DVI);
}
// Skip debug instructions other than dbg.value and dbg.declare.
@@ -379,9 +371,6 @@ bool llvm::collectDebugInfoMetadata(Module &M,
}
}
- if (NewDebugMode)
- M.convertToNewDbgValues();
-
return true;
}
@@ -561,10 +550,6 @@ bool llvm::checkDebugInfoMetadata(Module &M,
return false;
}
- bool NewDebugMode = M.IsNewDbgInfoFormat;
- if (NewDebugMode)
- M.convertFromNewDbgValues();
-
// Map the debug info holding DIs after a pass.
DebugInfoPerPass DebugInfoAfterPass;
@@ -599,20 +584,23 @@ bool llvm::checkDebugInfoMetadata(Module &M,
// Collect dbg.values and dbg.declares.
if (DebugifyLevel > Level::Locations) {
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ auto HandleDbgVariable = [&](auto *DbgVar) {
if (!SP)
- continue;
+ return;
// Skip inlined variables.
- if (I.getDebugLoc().getInlinedAt())
- continue;
+ if (DbgVar->getDebugLoc().getInlinedAt())
+ return;
// Skip undef values.
- if (DVI->isKillLocation())
- continue;
+ if (DbgVar->isKillLocation())
+ return;
- auto *Var = DVI->getVariable();
+ auto *Var = DbgVar->getVariable();
DebugInfoAfterPass.DIVariables[Var]++;
- continue;
- }
+ };
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ HandleDbgVariable(&DVR);
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+ HandleDbgVariable(DVI);
}
// Skip debug instructions other than dbg.value and dbg.declare.
@@ -675,16 +663,14 @@ bool llvm::checkDebugInfoMetadata(Module &M,
// the debugging information from the previous pass.
DebugInfoBeforePass = DebugInfoAfterPass;
- if (NewDebugMode)
- M.convertToNewDbgValues();
-
LLVM_DEBUG(dbgs() << "\n\n");
return Result;
}
namespace {
-/// Return true if a mis-sized diagnostic is issued for \p DVI.
-bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
+/// Return true if a mis-sized diagnostic is issued for \p DbgVal.
+template <typename DbgValTy>
+bool diagnoseMisSizedDbgValue(Module &M, DbgValTy *DbgVal) {
// The size of a dbg.value's value operand should match the size of the
// variable it corresponds to.
//
@@ -693,22 +679,22 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
// For now, don't try to interpret anything more complicated than an empty
// DIExpression. Eventually we should try to handle OP_deref and fragments.
- if (DVI->getExpression()->getNumElements())
+ if (DbgVal->getExpression()->getNumElements())
return false;
- Value *V = DVI->getVariableLocationOp(0);
+ Value *V = DbgVal->getVariableLocationOp(0);
if (!V)
return false;
Type *Ty = V->getType();
uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty);
- std::optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits();
+ std::optional<uint64_t> DbgVarSize = DbgVal->getFragmentSizeInBits();
if (!ValueOperandSize || !DbgVarSize)
return false;
bool HasBadSize = false;
if (Ty->isIntegerTy()) {
- auto Signedness = DVI->getVariable()->getSignedness();
+ auto Signedness = DbgVal->getVariable()->getSignedness();
if (Signedness && *Signedness == DIBasicType::Signedness::Signed)
HasBadSize = ValueOperandSize < *DbgVarSize;
} else {
@@ -718,7 +704,7 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
if (HasBadSize) {
dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize
<< ", but its variable has size " << *DbgVarSize << ": ";
- DVI->print(dbg());
+ DbgVal->print(dbg());
dbg() << "\n";
}
return HasBadSize;
@@ -735,10 +721,6 @@ bool checkDebugifyMetadata(Module &M,
return false;
}
- bool NewDebugMode = M.IsNewDbgInfoFormat;
- if (NewDebugMode)
- M.convertFromNewDbgValues();
-
auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
->getZExtValue();
@@ -780,18 +762,23 @@ bool checkDebugifyMetadata(Module &M,
}
// Find missing variables and mis-sized debug values.
- for (Instruction &I : instructions(F)) {
- auto *DVI = dyn_cast<DbgValueInst>(&I);
- if (!DVI)
- continue;
-
+ auto CheckForMisSized = [&](auto *DbgVal) {
unsigned Var = ~0U;
- (void)to_integer(DVI->getVariable()->getName(), Var, 10);
+ (void)to_integer(DbgVal->getVariable()->getName(), Var, 10);
assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable");
- bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI);
+ bool HasBadSize = diagnoseMisSizedDbgValue(M, DbgVal);
if (!HasBadSize)
MissingVars.reset(Var - 1);
HasErrors |= HasBadSize;
+ };
+ for (Instruction &I : instructions(F)) {
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ if (DVR.isDbgValue() || DVR.isDbgAssign())
+ CheckForMisSized(&DVR);
+ auto *DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+ CheckForMisSized(DVI);
}
}
@@ -820,9 +807,6 @@ bool checkDebugifyMetadata(Module &M,
if (Strip)
Ret = stripDebugifyMetadata(M);
- if (NewDebugMode)
- M.convertToNewDbgValues();
-
return Ret;
}
@@ -1052,10 +1036,6 @@ FunctionPass *createCheckDebugifyFunctionPass(
PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
ModuleAnalysisManager &) {
- bool NewDebugMode = M.IsNewDbgInfoFormat;
- if (NewDebugMode)
- M.convertFromNewDbgValues();
-
if (Mode == DebugifyMode::SyntheticDebugInfo)
checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
"CheckModuleDebugify", Strip, StatsMap);
@@ -1065,9 +1045,6 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
"CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
- if (NewDebugMode)
- M.convertToNewDbgValues();
-
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 380bac9..a42ef0c 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3627,10 +3627,12 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
return createIntegerExpression(C);
auto *FP = dyn_cast<ConstantFP>(&C);
- if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) {
+ if (FP && Ty.isFloatingPointTy() && Ty.getScalarSizeInBits() <= 64) {
const APFloat &APF = FP->getValueAPF();
- return DIB.createConstantValueExpression(
- APF.bitcastToAPInt().getZExtValue());
+ APInt const &API = APF.bitcastToAPInt();
+ if (auto Temp = API.getZExtValue())
+ return DIB.createConstantValueExpression(static_cast<uint64_t>(Temp));
+ return DIB.createConstantValueExpression(*API.getRawData());
}
if (!Ty.isPointerTy())
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 9d816c5..73c5d63 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1034,6 +1034,15 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
}
}
+Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
+ RecurKind RK, Value *Left, Value *Right) {
+ if (auto VTy = dyn_cast<VectorType>(Left->getType()))
+ StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
+ return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+}
+
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
@@ -1142,13 +1151,16 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
NewVal = SI->getTrueValue();
}
+ // Create a splat vector with the new value and compare this to the vector
+ // we want to reduce.
+ ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
+ Value *Right = Builder.CreateVectorSplat(EC, InitVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
+
// If any predicate is true it means that we want to select the new value.
- Value *AnyOf =
- Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
- // The compares in the loop may yield poison, which propagates through the
- // bitwise ORs. Freeze it here before the condition is used.
- AnyOf = Builder.CreateFreeze(AnyOf);
- return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
+ Cmp = Builder.CreateOrReduce(Cmp);
+ return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
}
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index f376b5f..40d0f6b 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -459,7 +459,7 @@ static void convertMetadataToAssumes(LoadInst *LI, Value *Val,
// we can only do this if the value is known non-poison.
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
LI->getMetadata(LLVMContext::MD_noundef) &&
- !isKnownNonZero(Val, /*Depth=*/0, SimplifyQuery(DL, DT, AC, LI)))
+ !isKnownNonZero(Val, SimplifyQuery(DL, DT, AC, LI)))
addAssumeNonNull(AC, LI);
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 440fe07..31be7d6 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1153,6 +1153,7 @@ protected:
Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter,
PHINode *OrigPhi, PHINode *WidePhi);
+ void truncateIVUse(NarrowIVDefUse DU);
bool widenLoopCompare(NarrowIVDefUse DU);
bool widenWithVariantUse(NarrowIVDefUse DU);
@@ -1569,15 +1570,18 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
/// This IV user cannot be widened. Replace this use of the original narrow IV
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
-static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT,
- LoopInfo *LI) {
+void WidenIV::truncateIVUse(NarrowIVDefUse DU) {
auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
if (!InsertPt)
return;
LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
<< *DU.NarrowUse << "\n");
+ ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
IRBuilder<> Builder(InsertPt);
- Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ Value *Trunc =
+ Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType(), "",
+ DU.NeverNegative || ExtKind == ExtendKind::Zero,
+ DU.NeverNegative || ExtKind == ExtendKind::Sign);
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
@@ -1826,6 +1830,13 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
assert(ExtendKindMap.count(DU.NarrowDef) &&
"Should already know the kind of extension used to widen NarrowDef");
+ // This narrow use can be widened by a sext if it's non-negative or its narrow
+ // def was widened by a sext. Same for zext.
+ bool CanWidenBySExt =
+ DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
+ bool CanWidenByZExt =
+ DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
+
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
if (LI->getLoopFor(UsePhi->getParent()) != L) {
@@ -1833,7 +1844,7 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
// After SimplifyCFG most loop exit targets have a single predecessor.
// Otherwise fall back to a truncate within the loop.
if (UsePhi->getNumOperands() != 1)
- truncateIVUse(DU, DT, LI);
+ truncateIVUse(DU);
else {
// Widening the PHI requires us to insert a trunc. The logical place
// for this trunc is in the same BB as the PHI. This is not possible if
@@ -1847,7 +1858,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
BasicBlock *WidePhiBB = WidePhi->getParent();
IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
- Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+ Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType(), "",
+ CanWidenByZExt, CanWidenBySExt);
UsePhi->replaceAllUsesWith(Trunc);
DeadInsts.emplace_back(UsePhi);
LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
@@ -1857,18 +1869,9 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
}
}
- // This narrow use can be widened by a sext if it's non-negative or its narrow
- // def was widened by a sext. Same for zext.
- auto canWidenBySExt = [&]() {
- return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
- };
- auto canWidenByZExt = [&]() {
- return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
- };
-
// Our raison d'etre! Eliminate sign and zero extension.
- if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) ||
- (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
+ if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && CanWidenBySExt) ||
+ (isa<ZExtInst>(DU.NarrowUse) && CanWidenByZExt)) {
Value *NewDef = DU.WideDef;
if (DU.NarrowUse->getType() != WideType) {
unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
@@ -1876,7 +1879,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
if (CastWidth < IVWidth) {
// The cast isn't as wide as the IV, so insert a Trunc.
IRBuilder<> Builder(DU.NarrowUse);
- NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType(), "",
+ CanWidenByZExt, CanWidenBySExt);
}
else {
// A wider extend was hidden behind a narrower one. This may induce
@@ -1975,7 +1979,7 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
// This user does not evaluate to a recurrence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT, LI);
+ truncateIVUse(DU);
return nullptr;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 7e9e916..2e68a9c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -305,7 +305,7 @@ static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> A
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
- } else if (isKnownNonZero(Size, /*Depth=*/0, DL)) {
+ } else if (isKnownNonZero(Size, DL)) {
annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
const APInt *X, *Y;
uint64_t DerefMin = 1;
@@ -394,7 +394,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
uint64_t Len;
annotateNonNullNoUndefBasedOnAccess(CI, 0);
- if (isKnownNonZero(Size, /*Depth=*/0, DL))
+ if (isKnownNonZero(Size, DL))
annotateNonNullNoUndefBasedOnAccess(CI, 1);
// We don't do anything if length is not constant.
@@ -613,7 +613,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
- if (isKnownNonZero(Size, /*Depth=*/0, DL))
+ if (isKnownNonZero(Size, DL))
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
// Get the length argument if it is constant.
uint64_t Length;
@@ -749,7 +749,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
- if (isKnownNonZero(Size, /*Depth=*/0, DL))
+ if (isKnownNonZero(Size, DL))
// Like snprintf, the function stores into the destination only when
// the size argument is nonzero.
annotateNonNullNoUndefBasedOnAccess(CI, 0);
@@ -833,7 +833,7 @@ Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
Value *Src = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
- if (isKnownNonZero(Size, /*Depth=*/0, DL)) {
+ if (isKnownNonZero(Size, DL)) {
// Both st{p,r}ncpy(D, S, N) access the source and destination arrays
// only when N is nonzero.
annotateNonNullNoUndefBasedOnAccess(CI, 0);
@@ -926,7 +926,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
Type *CharTy = B.getIntNTy(CharSize);
if (isOnlyUsedInZeroEqualityComparison(CI) &&
- (!Bound || isKnownNonZero(Bound, /*Depth=*/0, DL))) {
+ (!Bound || isKnownNonZero(Bound, DL))) {
// Fold strlen:
// strlen(x) != 0 --> *x != 0
// strlen(x) == 0 --> *x == 0
@@ -1047,7 +1047,7 @@ Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
if (Value *V = optimizeStringLength(CI, B, 8, Bound))
return V;
- if (isKnownNonZero(Bound, /*Depth=*/0, DL))
+ if (isKnownNonZero(Bound, DL))
annotateNonNullNoUndefBasedOnAccess(CI, 0);
return nullptr;
}
@@ -1291,7 +1291,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
Value *Size = CI->getArgOperand(2);
- if (isKnownNonZero(Size, /*Depth=*/0, DL)) {
+ if (isKnownNonZero(Size, DL)) {
annotateNonNullNoUndefBasedOnAccess(CI, 0);
if (isOnlyUsedInEqualityComparison(CI, SrcStr))
return memChrToCharCompare(CI, Size, B, DL);
@@ -2976,7 +2976,7 @@ Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
// It would be readonly too, except that it still may write to errno.
CI->addParamAttr(0, Attribute::NoCapture);
EndPtr = nullptr;
- } else if (!isKnownNonZero(EndPtr, /*Depth=*/0, DL))
+ } else if (!isKnownNonZero(EndPtr, DL))
return nullptr;
StringRef Str;
@@ -3402,7 +3402,7 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
return V;
}
- if (isKnownNonZero(CI->getOperand(1), /*Depth=*/0, DL))
+ if (isKnownNonZero(CI->getOperand(1), DL))
annotateNonNullNoUndefBasedOnAccess(CI, 0);
return nullptr;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ece2a34..ebca2d8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -68,7 +68,9 @@ class VPBuilder {
public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
- VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
+ VPBuilder(VPRecipeBase *InsertPt) {
+ setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
+ }
/// Clear the insertion point: created instructions will not be inserted into
/// a block.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5535cc55..a8272f4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -545,11 +545,6 @@ public:
// Return true if any runtime check is added.
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
- /// A type for vectorized values in the new loop. Each value from the
- /// original loop, when vectorized, is represented by UF vector values in the
- /// new unrolled loop, where UF is the unroll factor.
- using VectorParts = SmallVector<Value *, 2>;
-
/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
@@ -616,9 +611,6 @@ protected:
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
VPTransformState &State);
- /// Create code for the loop exit value of the reduction.
- void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -3051,8 +3043,9 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
}
// Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
- LoopScalarPreHeader->getFirstNonPHI());
+ PHINode *BCResumeVal =
+ PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
+ LoopScalarPreHeader->getTerminator()->getIterator());
// Copy original phi DL over to the new one.
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
@@ -7450,6 +7443,7 @@ static void createAndCollectMergePhiForReduction(
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
Value *FinalValue =
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
auto *ResumePhi =
@@ -7474,7 +7468,7 @@ static void createAndCollectMergePhiForReduction(
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
Incoming);
else
- BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
+ BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
}
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
@@ -7767,10 +7761,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
// Now, compare the remaining count and if there aren't enough iterations to
// execute the vectorized epilogue skip to the scalar part.
- LoopVectorPreHeader->setName("vec.epilog.ph");
- BasicBlock *VecEpilogueIterationCountCheck =
- SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
- nullptr, "vec.epilog.iter.check", true);
+ BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader;
+ VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check");
+ LoopVectorPreHeader =
+ SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
+ LI, nullptr, "vec.epilog.ph");
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
VecEpilogueIterationCountCheck);
@@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
BlockMaskCache[BB] = BlockMask;
}
-VPWidenMemoryInstructionRecipe *
+VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Ptr = VectorPtr;
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
- Reverse, I->getDebugLoc());
+ return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
+ I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
- return new VPWidenMemoryInstructionRecipe(
- *Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
+ return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
+ Reverse, I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (const auto *IG : InterleaveGroups) {
- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
- RecipeBuilder.getRecipe(IG->getInsertPos()));
+ auto *Recipe =
+ cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
- auto *StoreR =
- cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
+ auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());
}
@@ -8893,10 +8887,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// A ComputeReductionResult recipe is added to the middle block, also for
// in-loop reductions which compute their result in-loop, because generating
// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes.
-//
-// Adjust AnyOf reductions; replace the reduction phi for the selected value
-// with a boolean reduction phi node to check if the condition is true in any
-// iteration. The final value is selected by the final ComputeReductionResult.
void LoopVectorizationPlanner::adjustRecipesForReductions(
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF) {
@@ -9071,41 +9061,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- // Adjust AnyOf reductions; replace the reduction phi for the selected value
- // with a boolean reduction phi node to check if the condition is true in
- // any iteration. The final value is selected by the final
- // ComputeReductionResult.
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
- auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
- return isa<VPWidenSelectRecipe>(U) ||
- (isa<VPReplicateRecipe>(U) &&
- cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
- Instruction::Select);
- }));
- VPValue *Cmp = Select->getOperand(0);
- // If the compare is checking the reduction PHI node, adjust it to check
- // the start value.
- if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
- for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
- if (CmpR->getOperand(I) == PhiR)
- CmpR->setOperand(I, PhiR->getStartValue());
- }
- VPBuilder::InsertPointGuard Guard(Builder);
- Builder.setInsertPoint(Select);
-
- // If the true value of the select is the reduction phi, the new value is
- // selected if the negated condition is true in any iteration.
- if (Select->getOperand(1) == PhiR)
- Cmp = Builder.createNot(Cmp);
- VPValue *Or = Builder.createOr(PhiR, Cmp);
- Select->getVPSingleValue()->replaceAllUsesWith(Or);
-
- // Convert the reduction phi to operate on bools.
- PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
- OrigLoop->getHeader()->getContext())));
- }
-
// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
@@ -9138,9 +9093,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
- if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
- !RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
+ if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
auto *Trunc =
@@ -9409,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
return Call;
}
-void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
- VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
-
- // Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
- StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
-
- assert((LI || SI) && "Invalid Load/Store instruction");
- assert((!SI || StoredValue) && "No stored value provided for widened store");
- assert((!LI || !StoredValue) && "Stored value provided for widened load");
+void VPWidenLoadRecipe::execute(VPTransformState &State) {
+ auto *LI = cast<LoadInst>(&Ingredient);
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
-
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
- bool CreateGatherScatter = !isConsecutive();
+ bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
- InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
- bool isMaskRequired = getMask();
- if (isMaskRequired) {
- // Mask reversal is only needed for non-all-one (null) masks, as reverse of
- // a null all-one mask is a null mask.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Mask = State.get(getMask(), Part);
+ State.setDebugLocFrom(getDebugLoc());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *NewLI;
+ Value *Mask = nullptr;
+ if (auto *VPMask = getMask()) {
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
+ // of a null all-one mask is a null mask.
+ Mask = State.get(VPMask, Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
- BlockInMaskParts[Part] = Mask;
}
- }
- // Handle Stores:
- if (SI) {
- State.setDebugLocFrom(getDebugLoc());
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Instruction *NewSI = nullptr;
- Value *StoredVal = State.get(StoredValue, Part);
- // TODO: split this into several classes for better design.
- if (State.EVL) {
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
- assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
- VPInstruction::ExplicitVectorLength &&
- "EVL must be VPInstruction::ExplicitVectorLength.");
- Value *EVL = State.get(State.EVL, VPIteration(0, 0));
- // If EVL is not nullptr, then EVL must be a valid value set during plan
- // creation, possibly default value = whole vector register length. EVL
- // is created only if TTI prefers predicated vectorization, thus if EVL
- // is not nullptr it also implies preference for predicated
- // vectorization.
- // FIXME: Support reverse store after vp_reverse is added.
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- NewSI = lowerStoreUsingVectorIntrinsics(
- Builder, State.get(getAddr(), Part, !CreateGatherScatter),
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
- } else if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(getAddr(), Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
- } else {
- if (isReverse()) {
- // If we store to reverse consecutive memory locations, then we need
- // to reverse the order of elements in the stored value.
- StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't call resetVectorValue(StoredVal).
- }
- auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
- if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
- BlockInMaskParts[Part]);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
- }
- State.addMetadata(NewSI, SI);
- }
- return;
- }
-
- // Handle loads.
- assert(LI && "Must have a load instruction");
- State.setDebugLocFrom(getDebugLoc());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *NewLI;
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
@@ -9509,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse loading after vp_reverse is added.
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
- Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
- CreateGatherScatter, MaskPart, EVL, Alignment);
- } else if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
+ CreateGather, Mask, EVL, Alignment);
+ } else if (CreateGather) {
Value *VectorGep = State.get(getAddr(), Part);
- NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
+ NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, Mask,
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
- if (isMaskRequired)
- NewLI = Builder.CreateMaskedLoad(
- DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
- PoisonValue::get(DataTy), "wide.masked.load");
+ if (Mask)
+ NewLI = Builder.CreateMaskedLoad(DataTy, VecPtr, Alignment, Mask,
+ PoisonValue::get(DataTy),
+ "wide.masked.load");
else
NewLI =
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
@@ -9535,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}
- State.set(getVPSingleValue(), NewLI, Part);
+ State.set(this, NewLI, Part);
+ }
+}
+
+void VPWidenStoreRecipe::execute(VPTransformState &State) {
+ auto *SI = cast<StoreInst>(&Ingredient);
+
+ VPValue *StoredVPValue = getStoredValue();
+ bool CreateScatter = !isConsecutive();
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Instruction *NewSI = nullptr;
+ Value *Mask = nullptr;
+ if (auto *VPMask = getMask()) {
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
+ // of a null all-one mask is a null mask.
+ Mask = State.get(VPMask, Part);
+ if (isReverse())
+ Mask = Builder.CreateVectorReverse(Mask, "reverse");
+ }
+
+ Value *StoredVal = State.get(StoredVPValue, Part);
+ if (isReverse()) {
+ assert(!State.EVL && "reversing not yet implemented with EVL");
+ // If we store to reverse consecutive memory locations, then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+ }
+ // TODO: split this into several classes for better design.
+ if (State.EVL) {
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+ assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
+ VPInstruction::ExplicitVectorLength &&
+ "EVL must be VPInstruction::ExplicitVectorLength.");
+ Value *EVL = State.get(State.EVL, VPIteration(0, 0));
+ // If EVL is not nullptr, then EVL must be a valid value set during plan
+ // creation, possibly default value = whole vector register length. EVL
+ // is created only if TTI prefers predicated vectorization, thus if EVL
+ // is not nullptr it also implies preference for predicated
+ // vectorization.
+ // FIXME: Support reverse store after vp_reverse is added.
+ NewSI = lowerStoreUsingVectorIntrinsics(
+ Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
+ CreateScatter, Mask, EVL, Alignment);
+ } else if (CreateScatter) {
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewSI =
+ Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, Mask);
+ } else {
+ auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
+ if (Mask)
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask);
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ }
+ State.addMetadata(NewSI, SI);
}
}
@@ -9722,7 +9670,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
}
// The scalar cost should only be 0 when vectorizing with a user specified VF/IC. In those cases, runtime checks should always be generated.
- double ScalarC = *VF.ScalarCost.getValue();
+ uint64_t ScalarC = *VF.ScalarCost.getValue();
if (ScalarC == 0)
return true;
@@ -9749,7 +9697,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
// RtC + VecC * (TC / VF) + EpiC < ScalarC * TC
//
// Now we can compute the minimum required trip count TC as
- // (RtC + EpiC) / (ScalarC - (VecC / VF)) < TC
+ // VF * (RtC + EpiC) / (ScalarC * VF - VecC) < TC
//
// For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
// the computations are performed on doubles, not integers and the result
@@ -9761,9 +9709,9 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
AssumedMinimumVscale = *VScale;
IntVF *= AssumedMinimumVscale;
}
- double VecCOverVF = double(*VF.Cost.getValue()) / IntVF;
- double RtC = *CheckCost.getValue();
- double MinTC1 = RtC / (ScalarC - VecCOverVF);
+ uint64_t RtC = *CheckCost.getValue();
+ uint64_t Div = ScalarC * IntVF - *VF.Cost.getValue();
+ uint64_t MinTC1 = Div == 0 ? 0 : divideCeil(RtC * IntVF, Div);
// Second, compute a minimum iteration count so that the cost of the
// runtime checks is only a fraction of the total scalar loop cost. This
@@ -9772,12 +9720,12 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
// * TC. To bound the runtime check to be a fraction 1/X of the scalar
// cost, compute
// RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC
- double MinTC2 = RtC * 10 / ScalarC;
+ uint64_t MinTC2 = divideCeil(RtC * 10, ScalarC);
// Now pick the larger minimum. If it is not a multiple of VF and a scalar
// epilogue is allowed, choose the next closest multiple of VF. This should
// partly compensate for ignoring the epilogue cost.
- uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2));
+ uint64_t MinTC = std::max(MinTC1, MinTC2);
if (SEL == CM_ScalarEpilogueAllowed)
MinTC = alignTo(MinTC, IntVF);
VF.MinProfitableTripCount = ElementCount::getFixed(MinTC);
@@ -10181,19 +10129,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
- const RecurrenceDescriptor &RdxDesc =
- ReductionPhi->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
- ResumeV = ReductionResumeValues.find(&RdxDesc)->second;
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
- // VPReductionPHIRecipes for AnyOf reductions expect a boolean as
- // start value; compare the final value from the main vector loop
- // to the start value.
- IRBuilder<> Builder(
- cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
- ResumeV = Builder.CreateICmpNE(ResumeV,
- RdxDesc.getRecurrenceStartValue());
- }
+ ResumeV = ReductionResumeValues
+ .find(&ReductionPhi->getRecurrenceDescriptor())
+ ->second;
} else {
// Create induction resume values for both widened pointer and
// integer/fp inductions and update the start value of the induction
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b031b40..7694627 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -118,6 +118,11 @@ static cl::opt<int>
cl::desc("Only vectorize if you gain more than this "
"number "));
+static cl::opt<bool> SLPSkipEarlyProfitabilityCheck(
+ "slp-skip-early-profitability-check", cl::init(false), cl::Hidden,
+ cl::desc("When true, SLP vectorizer bypasses profitability checks based on "
+ "heuristics and makes vectorization decision via cost modeling."));
+
static cl::opt<bool>
ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
@@ -1129,6 +1134,7 @@ public:
MustGather.clear();
EntryToLastInstruction.clear();
ExternalUses.clear();
+ ExternalUsesAsGEPs.clear();
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -3149,6 +3155,10 @@ private:
/// after vectorization.
UserList ExternalUses;
+ /// A list of GEPs which can be reaplced by scalar GEPs instead of
+ /// extractelement instructions.
+ SmallPtrSet<Value *, 4> ExternalUsesAsGEPs;
+
/// Values used only by @llvm.assume calls.
SmallPtrSet<const Value *, 32> EphValues;
@@ -5509,6 +5519,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
void BoUpSLP::buildExternalUses(
const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
+ DenseMap<Value *, unsigned> ScalarToExtUses;
// Collect the values that we need to extract from the tree.
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
@@ -5522,14 +5533,20 @@ void BoUpSLP::buildExternalUses(
Value *Scalar = Entry->Scalars[Lane];
if (!isa<Instruction>(Scalar))
continue;
- int FoundLane = Entry->findLaneForValue(Scalar);
+ // All uses must be replaced already? No need to do it again.
+ auto It = ScalarToExtUses.find(Scalar);
+ if (It != ScalarToExtUses.end() && !ExternalUses[It->second].User)
+ continue;
// Check if the scalar is externally used as an extra arg.
const auto *ExtI = ExternallyUsedValues.find(Scalar);
if (ExtI != ExternallyUsedValues.end()) {
+ int FoundLane = Entry->findLaneForValue(Scalar);
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
- << Lane << " from " << *Scalar << ".\n");
+ << FoundLane << " from " << *Scalar << ".\n");
+ ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
+ continue;
}
for (User *U : Scalar->users()) {
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -5556,12 +5573,20 @@ void BoUpSLP::buildExternalUses(
continue;
}
U = nullptr;
+ if (It != ScalarToExtUses.end()) {
+ ExternalUses[It->second].User = nullptr;
+ break;
+ }
}
+ int FoundLane = Entry->findLaneForValue(Scalar);
LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst
- << " from lane " << Lane << " from " << *Scalar
+ << " from lane " << FoundLane << " from " << *Scalar
<< ".\n");
+ It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first;
ExternalUses.emplace_back(Scalar, U, FoundLane);
+ if (!U)
+ break;
}
}
}
@@ -6250,7 +6275,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
- if (!areAltOperandsProfitable(S, VL)) {
+ if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) {
LLVM_DEBUG(
dbgs()
<< "SLP: ShuffleVector not vectorized, operands are buildvector and "
@@ -9906,6 +9931,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallVector<APInt> DemandedElts;
SmallDenseSet<Value *, 4> UsedInserts;
DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
+ std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -10014,12 +10040,40 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
}
}
}
+ // Leave the GEPs as is, they are free in most cases and better to keep them
+ // as GEPs.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(EU.Scalar)) {
+ if (!ValueToExtUses) {
+ ValueToExtUses.emplace();
+ for_each(enumerate(ExternalUses), [&](const auto &P) {
+ ValueToExtUses->try_emplace(P.value().Scalar, P.index());
+ });
+ }
+ // Can use original GEP, if no operands vectorized or they are marked as
+ // externally used already.
+ bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) {
+ if (!getTreeEntry(V))
+ return true;
+ auto It = ValueToExtUses->find(V);
+ if (It != ValueToExtUses->end()) {
+ // Replace all uses to avoid compiler crash.
+ ExternalUses[It->second].User = nullptr;
+ return true;
+ }
+ return false;
+ });
+ if (CanBeUsedAsGEP) {
+ ExtractCost += TTI->getInstructionCost(GEP, CostKind);
+ ExternalUsesAsGEPs.insert(EU.Scalar);
+ continue;
+ }
+ }
// If we plan to rewrite the tree in a smaller type, we will need to sign
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto It = MinBWs.find(getTreeEntry(EU.Scalar));
if (It != MinBWs.end()) {
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
@@ -13142,6 +13196,8 @@ Value *BoUpSLP::vectorizeTree(
if (Scalar->getType() != Vec->getType()) {
Value *Ex = nullptr;
Value *ExV = nullptr;
+ auto *GEP = dyn_cast<GetElementPtrInst>(Scalar);
+ bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP);
auto It = ScalarToEEs.find(Scalar);
if (It != ScalarToEEs.end()) {
// No need to emit many extracts, just move the only one in the
@@ -13167,6 +13223,15 @@ Value *BoUpSLP::vectorizeTree(
if (const TreeEntry *ETE = getTreeEntry(V))
V = ETE->VectorizedValue;
Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
+ } else if (ReplaceGEP) {
+ // Leave the GEPs as is, they are free in most cases and better to
+ // keep them as GEPs.
+ auto *CloneGEP = GEP->clone();
+ CloneGEP->insertBefore(*Builder.GetInsertBlock(),
+ Builder.GetInsertPoint());
+ if (GEP->hasName())
+ CloneGEP->takeName(GEP);
+ Ex = CloneGEP;
} else {
Ex = Builder.CreateExtractElement(Vec, Lane);
}
@@ -13205,6 +13270,8 @@ Value *BoUpSLP::vectorizeTree(
assert((ExternallyUsedValues.count(Scalar) ||
any_of(Scalar->users(),
[&](llvm::User *U) {
+ if (ExternalUsesAsGEPs.contains(U))
+ return true;
TreeEntry *UseEntry = getTreeEntry(U);
return UseEntry &&
(UseEntry->State == TreeEntry::Vectorize ||
@@ -14620,10 +14687,16 @@ bool BoUpSLP::collectValuesToDemote(
assert((ID == Intrinsic::smin || ID == Intrinsic::smax) &&
"Expected min/max intrinsics only.");
unsigned SignBits = OrigBitWidth - BitWidth;
+ APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
nullptr, DT) &&
+ (!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL)) ||
+ MaskedValueIsZero(I->getOperand(0), Mask,
+ SimplifyQuery(*DL))) &&
SignBits <= ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
- nullptr, DT);
+ nullptr, DT) &&
+ (!isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL)) ||
+ MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
});
};
if (ID != Intrinsic::abs) {
@@ -15136,8 +15209,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
Type *ValueTy = StoreTy;
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
ValueTy = Trunc->getSrcTy();
- unsigned MinVF = TTI->getStoreMinimumVF(
- R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy);
+ unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
+ R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy));
if (MaxVF < MinVF) {
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 605b47f..b4c7ab0 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -69,9 +69,9 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
- VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
- ArrayRef<VPValue *> Operands,
- VFRange &Range);
+ VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
+ ArrayRef<VPValue *> Operands,
+ VFRange &Range);
/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 3e1069d..999236a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1300,18 +1300,7 @@ void VPValue::replaceUsesWithIf(
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
- if (const Value *UV = getUnderlyingValue()) {
- OS << "ir<";
- UV->printAsOperand(OS, false);
- OS << ">";
- return;
- }
-
- unsigned Slot = Tracker.getSlot(this);
- if (Slot == unsigned(-1))
- OS << "<badref>";
- else
- OS << "vp<%" << Tracker.getSlot(this) << ">";
+ OS << Tracker.getOrCreateName(this);
}
void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
@@ -1373,32 +1362,88 @@ VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
}
-void VPSlotTracker::assignSlot(const VPValue *V) {
- if (V->getUnderlyingValue())
+void VPSlotTracker::assignName(const VPValue *V) {
+ assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
+ auto *UV = V->getUnderlyingValue();
+ if (!UV) {
+ VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
+ NextSlot++;
return;
- assert(!Slots.contains(V) && "VPValue already has a slot!");
- Slots[V] = NextSlot++;
+ }
+
+ // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
+ // appending ".Number" to the name if there are multiple uses.
+ std::string Name;
+ raw_string_ostream S(Name);
+ UV->printAsOperand(S, false);
+ assert(!Name.empty() && "Name cannot be empty.");
+ std::string BaseName = (Twine("ir<") + Name + Twine(">")).str();
+
+ // First assign the base name for V.
+ const auto &[A, _] = VPValue2Name.insert({V, BaseName});
+ // Integer or FP constants with different types will result in he same string
+ // due to stripping types.
+ if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
+ return;
+
+ // If it is already used by C > 0 other VPValues, increase the version counter
+ // C and use it for V.
+ const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0});
+ if (!UseInserted) {
+ C->second++;
+ A->second = (BaseName + Twine(".") + Twine(C->second)).str();
+ }
}
-void VPSlotTracker::assignSlots(const VPlan &Plan) {
+void VPSlotTracker::assignNames(const VPlan &Plan) {
if (Plan.VFxUF.getNumUsers() > 0)
- assignSlot(&Plan.VFxUF);
- assignSlot(&Plan.VectorTripCount);
+ assignName(&Plan.VFxUF);
+ assignName(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
- assignSlot(Plan.BackedgeTakenCount);
- assignSlots(Plan.getPreheader());
+ assignName(Plan.BackedgeTakenCount);
+ for (VPValue *LI : Plan.VPLiveInsToFree)
+ assignName(LI);
+ assignNames(Plan.getPreheader());
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
for (const VPBasicBlock *VPBB :
VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
- assignSlots(VPBB);
+ assignNames(VPBB);
}
-void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) {
+void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
for (const VPRecipeBase &Recipe : *VPBB)
for (VPValue *Def : Recipe.definedValues())
- assignSlot(Def);
+ assignName(Def);
+}
+
+std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
+ std::string Name = VPValue2Name.lookup(V);
+ if (!Name.empty())
+ return Name;
+
+ // If no name was assigned, no VPlan was provided when creating the slot
+ // tracker or it is not reachable from the provided VPlan. This can happen,
+ // e.g. when trying to print a recipe that has not been inserted into a VPlan
+ // in a debugger.
+ // TODO: Update VPSlotTracker constructor to assign names to recipes &
+ // VPValues not associated with a VPlan, instead of constructing names ad-hoc
+ // here.
+ const VPRecipeBase *DefR = V->getDefiningRecipe();
+ (void)DefR;
+ assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
+ "VPValue defined by a recipe in a VPlan?");
+
+ // Use the underlying value's name, if there is one.
+ if (auto *UV = V->getUnderlyingValue()) {
+ std::string Name;
+ raw_string_ostream S(Name);
+ UV->printAsOperand(S, false);
+ return (Twine("ir<") + Name + ">").str();
+ }
+
+ return "<badref>";
}
bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index d86a81d..148227f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -875,7 +875,8 @@ public:
return true;
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
- case VPRecipeBase::VPWidenMemoryInstructionSC:
+ case VPRecipeBase::VPWidenLoadSC:
+ case VPRecipeBase::VPWidenStoreSC:
// TODO: Widened stores don't define a value, but widened loads do. Split
// the recipes to be able to make widened loads VPSingleDefRecipes.
return false;
@@ -2280,68 +2281,62 @@ public:
}
};
-/// A Recipe for widening load/store operations.
-/// The recipe uses the following VPValues:
-/// - For load: Address, optional mask
-/// - For store: Address, stored value, optional mask
-/// TODO: We currently execute only per-part unless a specific instance is
-/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
+/// A common base class for widening memory operations. An optional mask can be
+/// provided as the last operand.
+class VPWidenMemoryRecipe : public VPRecipeBase {
+protected:
Instruction &Ingredient;
- // Whether the loaded-from / stored-to addresses are consecutive.
+ /// Whether the accessed addresses are consecutive.
bool Consecutive;
- // Whether the consecutive loaded/stored addresses are in reverse order.
+ /// Whether the consecutive accessed addresses are in reverse order.
bool Reverse;
+ /// Whether the memory access is masked.
+ bool IsMasked = false;
+
void setMask(VPValue *Mask) {
+ assert(!IsMasked && "cannot re-set mask");
if (!Mask)
return;
addOperand(Mask);
+ IsMasked = true;
}
- bool isMasked() const {
- return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
+ VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
+ std::initializer_list<VPValue *> Operands,
+ bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
+ Reverse(Reverse) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
}
public:
- VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse, DebugLoc DL)
- : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}, DL),
- Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
- assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- new VPValue(this, &Load);
- setMask(Mask);
- }
+ VPWidenMemoryRecipe *clone() override = 0;
- VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
- VPValue *StoredValue, VPValue *Mask,
- bool Consecutive, bool Reverse, DebugLoc DL)
- : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue},
- DL),
- Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
- assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- setMask(Mask);
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPDef::VPWidenLoadSC ||
+ R->getVPDefID() == VPDef::VPWidenStoreSC;
}
- VPWidenMemoryInstructionRecipe *clone() override {
- if (isStore())
- return new VPWidenMemoryInstructionRecipe(
- cast<StoreInst>(Ingredient), getAddr(), getStoredValue(), getMask(),
- Consecutive, Reverse, getDebugLoc());
-
- return new VPWidenMemoryInstructionRecipe(cast<LoadInst>(Ingredient),
- getAddr(), getMask(), Consecutive,
- Reverse, getDebugLoc());
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
+ /// Return whether the loaded-from / stored-to addresses are consecutive.
+ bool isConsecutive() const { return Consecutive; }
+
+ /// Return whether the consecutive loaded/stored addresses are in reverse
+ /// order.
+ bool isReverse() const { return Reverse; }
/// Return the address accessed by this recipe.
- VPValue *getAddr() const {
- return getOperand(0); // Address is the 1st, mandatory operand.
- }
+ VPValue *getAddr() const { return getOperand(0); }
+
+ /// Returns true if the recipe is masked.
+ bool isMasked() const { return IsMasked; }
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
@@ -2350,23 +2345,34 @@ public:
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
- /// Returns true if this recipe is a store.
- bool isStore() const { return isa<StoreInst>(Ingredient); }
+ /// Generate the wide load/store.
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
+ }
- /// Return the address accessed by this recipe.
- VPValue *getStoredValue() const {
- assert(isStore() && "Stored value only available for store instructions");
- return getOperand(1); // Stored value is the 2nd, mandatory operand.
+ Instruction &getIngredient() const { return Ingredient; }
+};
+
+/// A recipe for widening load operations, using the address to load from and an
+/// optional mask.
+struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
+ VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
+ bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
+ Reverse, DL),
+ VPValue(this, &Load) {
+ setMask(Mask);
}
- // Return whether the loaded-from / stored-to addresses are consecutive.
- bool isConsecutive() const { return Consecutive; }
+ VPWidenLoadRecipe *clone() override {
+ return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
+ getMask(), Consecutive, Reverse,
+ getDebugLoc());
+ }
- // Return whether the consecutive loaded/stored addresses are in reverse
- // order.
- bool isReverse() const { return Reverse; }
+ VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
- /// Generate the wide load/store.
+ /// Generate a wide load or gather.
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2380,16 +2386,51 @@ public:
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
- // Widened, consecutive memory operations only demand the first lane of
- // their address, unless the same operand is also stored. That latter can
- // happen with opaque pointers.
- return Op == getAddr() && isConsecutive() &&
- (!isStore() || Op != getStoredValue());
+ // Widened, consecutive loads operations only demand the first lane of
+ // their address.
+ return Op == getAddr() && isConsecutive();
}
-
- Instruction &getIngredient() const { return Ingredient; }
};
+/// A recipe for widening store operations, using the stored value, the address
+/// to store to and an optional mask.
+struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
+ VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
+ VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
+ Consecutive, Reverse, DL) {
+ setMask(Mask);
+ }
+
+ VPWidenStoreRecipe *clone() override {
+ return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
+ getStoredValue(), getMask(), Consecutive,
+ Reverse, getDebugLoc());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
+
+ /// Return the value stored by this recipe.
+ VPValue *getStoredValue() const { return getOperand(1); }
+
+ /// Generate a wide store or scatter.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // Widened, consecutive stores only demand the first lane of their address,
+ // unless the same operand is also stored.
+ return Op == getAddr() && isConsecutive() && Op != getStoredValue();
+ }
+};
/// Recipe to expand a SCEV expression.
class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *Expr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index c8ae2ee..130fb04 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -108,9 +108,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
return CI.getType();
}
-Type *VPTypeAnalysis::inferScalarTypeForRecipe(
- const VPWidenMemoryInstructionRecipe *R) {
- assert(!R->isStore() && "Store recipes should not define any values");
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
+ assert(isa<VPWidenLoadRecipe>(R) &&
+ "Store recipes should not define any values");
return cast<LoadInst>(&R->getIngredient())->getType();
}
@@ -231,8 +231,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
- VPWidenCallRecipe, VPWidenMemoryInstructionRecipe,
- VPWidenSelectRecipe>(
+ VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index 4e69de7..7d310b1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -20,7 +20,7 @@ class VPInstruction;
class VPWidenRecipe;
class VPWidenCallRecipe;
class VPWidenIntOrFpInductionRecipe;
-class VPWidenMemoryInstructionRecipe;
+class VPWidenMemoryRecipe;
struct VPWidenSelectRecipe;
class VPReplicateRecipe;
class Type;
@@ -46,7 +46,7 @@ class VPTypeAnalysis {
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R);
- Type *inferScalarTypeForRecipe(const VPWidenMemoryInstructionRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenSelectRecipe *R);
Type *inferScalarTypeForRecipe(const VPReplicateRecipe *R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6253199..7893264 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -47,9 +47,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPInterleaveSC:
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
- case VPWidenMemoryInstructionSC: {
- return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
- }
+ case VPWidenStoreSC:
+ return true;
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
@@ -64,6 +63,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
+ case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenSelectSC: {
@@ -81,16 +81,16 @@ bool VPRecipeBase::mayWriteToMemory() const {
bool VPRecipeBase::mayReadFromMemory() const {
switch (getVPDefID()) {
- case VPWidenMemoryInstructionSC: {
- return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
- }
+ case VPWidenLoadSC:
+ return true;
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayReadFromMemory();
case VPBranchOnMaskSC:
- case VPScalarIVStepsSC:
case VPPredInstPHISC:
+ case VPScalarIVStepsSC:
+ case VPWidenStoreSC:
return false;
case VPBlendSC:
case VPReductionSC:
@@ -155,12 +155,13 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
case VPInterleaveSC:
return mayWriteToMemory();
- case VPWidenMemoryInstructionSC:
- assert(cast<VPWidenMemoryInstructionRecipe>(this)
- ->getIngredient()
- .mayHaveSideEffects() == mayWriteToMemory() &&
- "mayHaveSideffects result for ingredient differs from this "
- "implementation");
+ case VPWidenLoadSC:
+ case VPWidenStoreSC:
+ assert(
+ cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
+ mayWriteToMemory() &&
+ "mayHaveSideffects result for ingredient differs from this "
+ "implementation");
return mayWriteToMemory();
case VPReplicateSC: {
auto *R = cast<VPReplicateRecipe>(this);
@@ -501,8 +502,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
- Op = Instruction::Or;
if (PhiR->isOrdered()) {
ReducedPartRdx = RdxParts[State.UF - 1];
@@ -515,16 +514,19 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- else
+ else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
+ TrackingVH<Value> ReductionStartValue =
+ RdxDesc.getRecurrenceStartValue();
+ ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
+ ReducedPartRdx, RdxPart);
+ } else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
}
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
- if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
- !PhiR->isInLoop()) {
+ if (State.VF.isVector() && !PhiR->isInLoop()) {
ReducedPartRdx =
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
@@ -1768,16 +1770,17 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
}
-void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
+void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << " = load ";
+ printOperands(O, SlotTracker);
+}
- if (!isStore()) {
- getVPSingleValue()->printAsOperand(O, SlotTracker);
- O << " = ";
- }
- O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
-
+void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN store ";
printOperands(O, SlotTracker);
}
#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1256e4d..382bf5a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -60,14 +60,14 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
assert(isa<VPInstruction>(&Ingredient) &&
"only VPInstructions expected here");
assert(!isa<PHINode>(Inst) && "phis should be handled above");
- // Create VPWidenMemoryInstructionRecipe for loads and stores.
+ // Create VPWidenMemoryRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
- NewRecipe = new VPWidenMemoryInstructionRecipe(
+ NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
false /*Consecutive*/, false /*Reverse*/,
Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
- NewRecipe = new VPWidenMemoryInstructionRecipe(
+ NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
Ingredient.getDebugLoc());
@@ -977,10 +977,9 @@ void VPlanTransforms::truncateToMinimalBitwidths(
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
- VPWidenSelectRecipe, VPWidenMemoryInstructionRecipe>(&R))
+ VPWidenSelectRecipe, VPWidenMemoryRecipe>(&R))
continue;
- if (isa<VPWidenMemoryInstructionRecipe>(&R) &&
- cast<VPWidenMemoryInstructionRecipe>(&R)->isStore())
+ if (isa<VPWidenStoreRecipe>(&R))
continue;
VPValue *ResultVPV = R.getVPSingleValue();
@@ -1048,10 +1047,9 @@ void VPlanTransforms::truncateToMinimalBitwidths(
assert(cast<VPWidenRecipe>(&R)->getOpcode() == Instruction::ICmp &&
"Only ICmps should not need extending the result.");
- if (isa<VPWidenMemoryInstructionRecipe>(&R)) {
- assert(!cast<VPWidenMemoryInstructionRecipe>(&R)->isStore() && "stores cannot be narrowed");
+ assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
+ if (isa<VPWidenLoadRecipe>(&R))
continue;
- }
// Shrink operands by introducing truncates as needed.
unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
@@ -1315,7 +1313,7 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext());
VPValue *VPTrueMask = Plan.getOrAddLiveIn(TrueMask);
replaceHeaderPredicateWith(Plan, *VPTrueMask, [](VPUser &U, unsigned) {
- return isa<VPWidenMemoryInstructionRecipe>(U);
+ return isa<VPWidenMemoryRecipe>(U);
});
// Now create the ExplicitVectorLengthPhi recipe in the main loop.
auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
@@ -1371,8 +1369,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// instruction. Widen memory instructions involved in address computation
// will lead to gather/scatter instructions, which don't need to be
// handled.
- if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
- isa<VPInterleaveRecipe>(CurRec) ||
+ if (isa<VPWidenMemoryRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) ||
isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec))
continue;
@@ -1420,7 +1417,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &Recipe : *VPBB) {
- if (auto *WidenRec = dyn_cast<VPWidenMemoryInstructionRecipe>(&Recipe)) {
+ if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
Instruction &UnderlyingInstr = WidenRec->getIngredient();
VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
if (AddrDef && WidenRec->isConsecutive() &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8b221d3..0bbc7ff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/iterator_range.h"
@@ -35,7 +36,6 @@ class VPDef;
class VPSlotTracker;
class VPUser;
class VPRecipeBase;
-class VPWidenMemoryInstructionRecipe;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -50,7 +50,6 @@ class VPValue {
friend class VPInterleavedAccessInfo;
friend class VPSlotTracker;
friend class VPRecipeBase;
- friend class VPWidenMemoryInstructionRecipe;
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -357,11 +356,12 @@ public:
VPWidenCanonicalIVSC,
VPWidenCastSC,
VPWidenGEPSC,
- VPWidenMemoryInstructionSC,
+ VPWidenLoadSC,
+ VPWidenStoreSC,
VPWidenSC,
VPWidenSelectSC,
- // START: Phi-like recipes. Need to be kept together.
VPBlendSC,
+ // START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
VPPredInstPHISC,
// START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
@@ -375,7 +375,7 @@ public:
VPReductionPHISC,
// END: SubclassID for recipes that inherit VPHeaderPHIRecipe
// END: Phi-like recipes
- VPFirstPHISC = VPBlendSC,
+ VPFirstPHISC = VPWidenPHISC,
VPFirstHeaderPHISC = VPCanonicalIVPHISC,
VPLastHeaderPHISC = VPReductionPHISC,
VPLastPHISC = VPReductionPHISC,
@@ -443,29 +443,36 @@ public:
class VPlan;
class VPBasicBlock;
-/// This class can be used to assign consecutive numbers to all VPValues in a
-/// VPlan and allows querying the numbering for printing, similar to the
+/// This class can be used to assign names to VPValues. For VPValues without
+/// underlying value, assign consecutive numbers and use those as names (wrapped
+/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
+/// ir<>), appending a .V version number if there are multiple uses of the same
+/// name. Allows querying names for VPValues for printing, similar to the
/// ModuleSlotTracker for IR values.
class VPSlotTracker {
- DenseMap<const VPValue *, unsigned> Slots;
+ /// Keep track of versioned names assigned to VPValues with underlying IR
+ /// values.
+ DenseMap<const VPValue *, std::string> VPValue2Name;
+ /// Keep track of the next number to use to version the base name.
+ StringMap<unsigned> BaseName2Version;
+
+ /// Number to assign to the next VPValue without underlying value.
unsigned NextSlot = 0;
- void assignSlot(const VPValue *V);
- void assignSlots(const VPlan &Plan);
- void assignSlots(const VPBasicBlock *VPBB);
+ void assignName(const VPValue *V);
+ void assignNames(const VPlan &Plan);
+ void assignNames(const VPBasicBlock *VPBB);
public:
VPSlotTracker(const VPlan *Plan = nullptr) {
if (Plan)
- assignSlots(*Plan);
+ assignNames(*Plan);
}
- unsigned getSlot(const VPValue *V) const {
- auto I = Slots.find(V);
- if (I == Slots.end())
- return -1;
- return I->second;
- }
+ /// Returns the name assigned to \p V, if there is one, otherwise try to
+ /// construct one from the underlying value, if there's one; else return
+ /// <badref>.
+ std::string getOrCreateName(const VPValue *V) const;
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 12d37fa..5587302 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -128,7 +128,7 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB,
}
return true;
}
- if (isa<VPWidenMemoryInstructionRecipe>(R))
+ if (isa<VPWidenMemoryRecipe>(R))
VPWidenMemRecipe = R;
return true;
};
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index e0e2f50..4918cee 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -886,7 +886,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode(
*FunctionalOpcode, &VPI, nullptr, &AC, &DT);
if (!SafeToSpeculate &&
- !isKnownNonZero(EVL, /*Depth=*/0, SimplifyQuery(*DL, &DT, &AC, &VPI)))
+ !isKnownNonZero(EVL, SimplifyQuery(*DL, &DT, &AC, &VPI)))
return false;
Value *ScalarVal =