aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp14
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h17
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp31
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp10
6 files changed, 92 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index ee1fec0..805bdb4 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1350,6 +1350,10 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
BB->getTerminator()->eraseFromParent();
SwitchInst *SI = IRB.CreateSwitch(
IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
+ // We can't know the precise weights here, as they would depend on the value
+ // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
+ setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(),
+ DEBUG_TYPE);
Type *IndexTy = DL.getIndexType(Call->getType());
SmallVector<DominatorTree::UpdateType, 8> Updates;
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b988957..cf076b9a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5810,10 +5810,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx512_vpdpbusds_512:
case Intrinsic::x86_avx2_vpdpbssd_128:
case Intrinsic::x86_avx2_vpdpbssd_256:
+ case Intrinsic::x86_avx10_vpdpbssd_512:
case Intrinsic::x86_avx2_vpdpbssds_128:
case Intrinsic::x86_avx2_vpdpbssds_256:
- case Intrinsic::x86_avx10_vpdpbssd_512:
case Intrinsic::x86_avx10_vpdpbssds_512:
+ case Intrinsic::x86_avx2_vpdpbsud_128:
+ case Intrinsic::x86_avx2_vpdpbsud_256:
+ case Intrinsic::x86_avx10_vpdpbsud_512:
+ case Intrinsic::x86_avx2_vpdpbsuds_128:
+ case Intrinsic::x86_avx2_vpdpbsuds_256:
+ case Intrinsic::x86_avx10_vpdpbsuds_512:
+ case Intrinsic::x86_avx2_vpdpbuud_128:
+ case Intrinsic::x86_avx2_vpdpbuud_256:
+ case Intrinsic::x86_avx10_vpdpbuud_512:
+ case Intrinsic::x86_avx2_vpdpbuuds_128:
+ case Intrinsic::x86_avx2_vpdpbuuds_256:
+ case Intrinsic::x86_avx10_vpdpbuuds_512:
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/4, /*EltSize=*/8);
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0822511..4c7a083 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2997,6 +2997,10 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
/// vector operands, performing a reduction.add on the result, and adding
/// the scalar result to a chain.
MulAccReduction,
+ /// Represent an inloop multiply-accumulate reduction, multiplying the
+ /// extended vector operands, negating the multiplication, performing a
+ /// reduction.add on the result, and adding the scalar result to a chain.
+ ExtNegatedMulAccReduction,
};
/// Type of the expression.
@@ -3020,6 +3024,19 @@ public:
VPWidenRecipe *Mul, VPReductionRecipe *Red)
: VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
{Ext0, Ext1, Mul, Red}) {}
+ VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
+ VPWidenRecipe *Mul, VPWidenRecipe *Sub,
+ VPReductionRecipe *Red)
+ : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
+ {Ext0, Ext1, Mul, Sub, Red}) {
+ assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
+ assert(Red->getRecurrenceKind() == RecurKind::Add &&
+ "Expected an add reduction");
+ assert(getNumOperands() >= 3 && "Expected at least three operands");
+ auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
+ assert(SubConst && SubConst->getValue() == 0 &&
+ Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
+ }
~VPExpressionRecipe() override {
for (auto *R : reverse(ExpressionRecipes))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b5e30cb..ee03729 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2839,12 +2839,17 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,
Ctx.CostKind);
- case ExpressionTypes::ExtMulAccReduction:
+ case ExpressionTypes::ExtNegatedMulAccReduction:
+ assert(Opcode == Instruction::Add && "Unexpected opcode");
+ Opcode = Instruction::Sub;
+ LLVM_FALLTHROUGH;
+ case ExpressionTypes::ExtMulAccReduction: {
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
Opcode, RedTy, SrcVecTy, Ctx.CostKind);
}
+ }
llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
}
@@ -2890,6 +2895,30 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
O << ")";
break;
}
+ case ExpressionTypes::ExtNegatedMulAccReduction: {
+ getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
+ O << " + reduce."
+ << Instruction::getOpcodeName(
+ RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
+ << " (sub (0, mul";
+ auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
+ Mul->printFlags(O);
+ O << "(";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
+ O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
+ << *Ext0->getResultType() << "), (";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
+ O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
+ << *Ext1->getResultType() << ")";
+ if (Red->isConditional()) {
+ O << ", ";
+ Red->getCondOp()->printAsOperand(O, SlotTracker);
+ }
+ O << "))";
+ break;
+ }
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5252e1f..969dce4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3543,7 +3543,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
};
VPValue *VecOp = Red->getVecOp();
+ VPRecipeBase *Sub = nullptr;
VPValue *A, *B;
+ VPValue *Tmp = nullptr;
+ // Sub reductions could have a sub between the add reduction and vec op.
+ if (match(VecOp,
+ m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Tmp)))) {
+ Sub = VecOp->getDefiningRecipe();
+ VecOp = Tmp;
+ }
// Try to match reduce.add(mul(...)).
if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
auto *RecipeA =
@@ -3560,12 +3568,21 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
Instruction::CastOps::ZExt,
Mul, RecipeA, RecipeB, nullptr)) {
+ if (Sub)
+ return new VPExpressionRecipe(RecipeA, RecipeB, Mul,
+ cast<VPWidenRecipe>(Sub), Red);
return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
}
// Match reduce.add(mul).
- if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
+ // TODO: Add an expression type for this variant with a negated mul
+ if (!Sub &&
+ IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
return new VPExpressionRecipe(Mul, Red);
}
+ // TODO: Add an expression type for negated versions of other expression
+ // variants.
+ if (Sub)
+ return nullptr;
// Match reduce.add(ext(mul(ext(A), ext(B)))).
// All extend recipes must have same opcode or A == B
// which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 32704bd..d6eb00d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1031,6 +1031,16 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {
// Create the cast operation directly to ensure we get a new instruction
Instruction *NewCast = CastInst::Create(CastOpcode, NewOp, I.getType());
+ // Preserve cast instruction flags
+ if (RHSFlags.NNeg)
+ NewCast->setNonNeg();
+ if (RHSFlags.NUW)
+ NewCast->setHasNoUnsignedWrap();
+ if (RHSFlags.NSW)
+ NewCast->setHasNoSignedWrap();
+
+ NewCast->andIRFlags(LHSCast);
+
// Insert the new instruction
Value *Result = Builder.Insert(NewCast);