diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.h | 17 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 10 |
6 files changed, 92 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index ee1fec0..805bdb4 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1350,6 +1350,10 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, BB->getTerminator()->eraseFromParent(); SwitchInst *SI = IRB.CreateSwitch( IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N); + // We can't know the precise weights here, as they would depend on the value + // distribution of Call->getArgOperand(1). So we just mark it as "unknown". + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(), + DEBUG_TYPE); Type *IndexTy = DL.getIndexType(Call->getType()); SmallVector<DominatorTree::UpdateType, 8> Updates; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b988957..cf076b9a 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -5810,10 +5810,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::x86_avx512_vpdpbusds_512: case Intrinsic::x86_avx2_vpdpbssd_128: case Intrinsic::x86_avx2_vpdpbssd_256: + case Intrinsic::x86_avx10_vpdpbssd_512: case Intrinsic::x86_avx2_vpdpbssds_128: case Intrinsic::x86_avx2_vpdpbssds_256: - case Intrinsic::x86_avx10_vpdpbssd_512: case Intrinsic::x86_avx10_vpdpbssds_512: + case Intrinsic::x86_avx2_vpdpbsud_128: + case Intrinsic::x86_avx2_vpdpbsud_256: + case Intrinsic::x86_avx10_vpdpbsud_512: + case Intrinsic::x86_avx2_vpdpbsuds_128: + case Intrinsic::x86_avx2_vpdpbsuds_256: + case Intrinsic::x86_avx10_vpdpbsuds_512: + case Intrinsic::x86_avx2_vpdpbuud_128: + case Intrinsic::x86_avx2_vpdpbuud_256: + case Intrinsic::x86_avx10_vpdpbuud_512: + case Intrinsic::x86_avx2_vpdpbuuds_128: + case Intrinsic::x86_avx2_vpdpbuuds_256: + case Intrinsic::x86_avx10_vpdpbuuds_512: handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/4, /*EltSize=*/8); break; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0822511..4c7a083 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2997,6 +2997,10 @@ class VPExpressionRecipe : public VPSingleDefRecipe { /// vector operands, performing a reduction.add on the result, and adding /// the scalar result to a chain. MulAccReduction, + /// Represent an inloop multiply-accumulate reduction, multiplying the + /// extended vector operands, negating the multiplication, performing a + /// reduction.add on the result, and adding the scalar result to a chain. + ExtNegatedMulAccReduction, }; /// Type of the expression. @@ -3020,6 +3024,19 @@ public: VPWidenRecipe *Mul, VPReductionRecipe *Red) : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction, {Ext0, Ext1, Mul, Red}) {} + VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, + VPWidenRecipe *Mul, VPWidenRecipe *Sub, + VPReductionRecipe *Red) + : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction, + {Ext0, Ext1, Mul, Sub, Red}) { + assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul"); + assert(Red->getRecurrenceKind() == RecurKind::Add && + "Expected an add reduction"); + assert(getNumOperands() >= 3 && "Expected at least three operands"); + auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue()); + assert(SubConst && SubConst->getValue() == 0 && + Sub->getOpcode() == Instruction::Sub && "Expected a negating sub"); + } ~VPExpressionRecipe() override { for (auto *R : reverse(ExpressionRecipes)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b5e30cb..ee03729 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2839,12 +2839,17 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF, return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy, Ctx.CostKind); - case ExpressionTypes::ExtMulAccReduction: + case ExpressionTypes::ExtNegatedMulAccReduction: + assert(Opcode == Instruction::Add && "Unexpected opcode"); + Opcode = Instruction::Sub; + LLVM_FALLTHROUGH; + case ExpressionTypes::ExtMulAccReduction: { return Ctx.TTI.getMulAccReductionCost( cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() == Instruction::ZExt, Opcode, RedTy, SrcVecTy, Ctx.CostKind); } + } llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum"); } @@ -2890,6 +2895,30 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent, O << ")"; break; } + case ExpressionTypes::ExtNegatedMulAccReduction: { + getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker); + O << " + reduce." + << Instruction::getOpcodeName( + RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind())) + << " (sub (0, mul"; + auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]); + Mul->printFlags(O); + O << "("; + getOperand(0)->printAsOperand(O, SlotTracker); + auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]); + O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to " + << *Ext0->getResultType() << "), ("; + getOperand(1)->printAsOperand(O, SlotTracker); + auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]); + O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to " + << *Ext1->getResultType() << ")"; + if (Red->isConditional()) { + O << ", "; + Red->getCondOp()->printAsOperand(O, SlotTracker); + } + O << "))"; + break; + } case ExpressionTypes::MulAccReduction: case ExpressionTypes::ExtMulAccReduction: { getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5252e1f..969dce4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3543,7 +3543,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, }; VPValue *VecOp = Red->getVecOp(); + VPRecipeBase *Sub = nullptr; VPValue *A, *B; + VPValue *Tmp = nullptr; + // Sub reductions could have a sub between the add reduction and vec op. + if (match(VecOp, + m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Tmp)))) { + Sub = VecOp->getDefiningRecipe(); + VecOp = Tmp; + } // Try to match reduce.add(mul(...)). if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) { auto *RecipeA = @@ -3560,12 +3568,21 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, IsMulAccValidAndClampRange(RecipeA->getOpcode() == Instruction::CastOps::ZExt, Mul, RecipeA, RecipeB, nullptr)) { + if (Sub) + return new VPExpressionRecipe(RecipeA, RecipeB, Mul, + cast<VPWidenRecipe>(Sub), Red); return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red); } // Match reduce.add(mul). - if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr)) + // TODO: Add an expression type for this variant with a negated mul + if (!Sub && + IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr)) return new VPExpressionRecipe(Mul, Red); } + // TODO: Add an expression type for negated versions of other expression + // variants. + if (Sub) + return nullptr; // Match reduce.add(ext(mul(ext(A), ext(B)))). // All extend recipes must have same opcode or A == B // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))). diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 32704bd..d6eb00d 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1031,6 +1031,16 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) { // Create the cast operation directly to ensure we get a new instruction Instruction *NewCast = CastInst::Create(CastOpcode, NewOp, I.getType()); + // Preserve cast instruction flags + if (RHSFlags.NNeg) + NewCast->setNonNeg(); + if (RHSFlags.NUW) + NewCast->setHasNoUnsignedWrap(); + if (RHSFlags.NSW) + NewCast->setHasNoSignedWrap(); + + NewCast->andIRFlags(LHSCast); + // Insert the new instruction Value *Result = Builder.Insert(NewCast); |