diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine')
5 files changed, 124 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 73ec451..9bee523 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2760,21 +2760,34 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (match(Op0, m_PtrToIntOrAddr(m_Value(LHSOp))) && + match(Op1, m_PtrToIntOrAddr(m_Value(RHSOp)))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(), I.hasNoUnsignedWrap())) return replaceInstUsesWith(I, Res); // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (match(Op0, m_Trunc(m_PtrToIntOrAddr(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToIntOrAddr(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(), /* IsNUW */ false)) return replaceInstUsesWith(I, Res); - if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && - match(Op1, m_ZExtOrSelf(m_PtrToInt(m_Value(RHSOp))))) { + auto MatchSubOfZExtOfPtrToIntOrAddr = [&]() { + if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && + match(Op1, m_ZExt(m_PtrToIntSameSize(DL, m_Value(RHSOp))))) + return true; + if (match(Op0, m_ZExt(m_PtrToAddr(m_Value(LHSOp)))) && + match(Op1, m_ZExt(m_PtrToAddr(m_Value(RHSOp))))) + return true; + // Special case for non-canonical ptrtoint in constant expression, + // where the zext has been folded into the ptrtoint. + if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + return true; + return false; + }; + if (MatchSubOfZExtOfPtrToIntOrAddr()) { if (auto *GEP = dyn_cast<GEPOperator>(LHSOp)) { if (GEP->getPointerOperand() == RHSOp) { if (GEP->hasNoUnsignedWrap() || GEP->hasNoUnsignedSignedWrap()) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index dab200d..8d9933b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); return BinaryOperator::CreateSub(ConstCtlz, X); } + + // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) + if (Op0->hasOneUse() && + match(Op0, + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + Type *Ty = II.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); + auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); + return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); + } } // cttz(Pow2) -> Log2(Pow2) @@ -4003,18 +4015,29 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Try to fold intrinsic into select/phi operands. This is legal if: // * The intrinsic is speculatable. - // * The select condition is not a vector, or the intrinsic does not - // perform cross-lane operations. - if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) && - isNotCrossLaneOperation(II)) + // * The operand is one of the following: + // - a phi. + // - a select with a scalar condition. + // - a select with a vector condition and II is not a cross lane operation. + if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) { for (Value *Op : II->args()) { - if (auto *Sel = dyn_cast<SelectInst>(Op)) - if (Instruction *R = FoldOpIntoSelect(*II, Sel)) + if (auto *Sel = dyn_cast<SelectInst>(Op)) { + bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy(); + if (IsVectorCond && !isNotCrossLaneOperation(II)) + continue; + // Don't replace a scalar select with a more expensive vector select if + // we can't simplify both arms of the select. + bool SimplifyBothArms = + !Op->getType()->isVectorTy() && II->getType()->isVectorTy(); + if (Instruction *R = FoldOpIntoSelect( + *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms)) return R; + } if (auto *Phi = dyn_cast<PHINode>(Op)) if (Instruction *R = foldOpIntoPhi(*II, Phi)) return R; } + } if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) return Shuf; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 943c223..ede73f8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -664,7 +664,8 @@ public: /// This also works for Cast instructions, which obviously do not have a /// second operand. Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - bool FoldWithMultiUse = false); + bool FoldWithMultiUse = false, + bool SimplifyBothArms = false); /// This is a convenience wrapper function for the above two functions. Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5aa8de3..f5130da 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(), CondVal, FalseVal)); + // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X, + // bitwidth-1, 1 -> scmp(X, 0) + // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + CmpPredicate Pred; + Value *CmpLHS, *CmpRHS; + + // Canonicalize sign function ashr patterns: + // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0) + // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_Value(TrueVal), m_Value(FalseVal))) && + ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) && + match(TrueVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) && + match(FalseVal, m_One())) || + (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) && + match(TrueVal, m_One()) && + match(FalseVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) { + + Function *Scmp = Intrinsic::getOrInsertDeclaration( + SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()}); + return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)}); + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 3f11cae..9c8de45 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, } Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - bool FoldWithMultiUse) { + bool FoldWithMultiUse, + bool SimplifyBothArms) { // Don't modify shared select instructions unless set FoldWithMultiUse if (!SI->hasOneUse() && !FoldWithMultiUse) return nullptr; @@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (!NewTV && !NewFV) return nullptr; + if (SimplifyBothArms && !(NewTV && NewFV)) + return nullptr; + // Create an instruction for the arm that did not fold. if (!NewTV) NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this); @@ -2323,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, return ConstantVector::get(NewVecC); } +// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS). +static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, + Constant *Splat, bool SplatLHS, + const DataLayout &DL) { + ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount(); + Constant *LHS = ConstantVector::getSplat(EC, Splat); + Constant *RHS = Vector; + if (!SplatLHS) + std::swap(LHS, RHS); + return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); +} + Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { if (!isa<VectorType>(Inst.getType())) return nullptr; @@ -2334,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { assert(cast<VectorType>(RHS->getType())->getElementCount() == cast<VectorType>(Inst.getType())->getElementCount()); + auto foldConstantsThroughSubVectorInsertSplat = + [&](Value *MaybeSubVector, Value *MaybeSplat, + bool SplatLHS) -> Instruction * { + Value *Idx; + Constant *Splat, *SubVector, *Dest; + if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) || + !match(MaybeSubVector, + m_VectorInsert(m_Constant(Dest), m_Constant(SubVector), + m_Value(Idx)))) + return nullptr; + SubVector = + constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL); + Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL); + if (!SubVector || !Dest) + return nullptr; + auto *InsertVector = + Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx); + return replaceInstUsesWith(Inst, InsertVector); + }; + + // If one operand is a constant splat and the other operand is a + // `vector.insert` where both the destination and subvector are constant, + // apply the operation to both the destination and subvector, returning a new + // constant `vector.insert`. This helps constant folding for scalable vectors. + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false)) + return Folded; + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true)) + return Folded; + // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation // of the results. |
