diff options
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 26 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 40 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.h | 14 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 164 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 3 |
7 files changed, 207 insertions, 95 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 669d4f0..8d9933b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); return BinaryOperator::CreateSub(ConstCtlz, X); } + + // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) + if (Op0->hasOneUse() && + match(Op0, + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + Type *Ty = II.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); + auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); + return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); + } } // cttz(Pow2) -> Log2(Pow2) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5aa8de3..f5130da 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(), CondVal, FalseVal)); + // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X, + // bitwidth-1, 1 -> scmp(X, 0) + // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + CmpPredicate Pred; + Value *CmpLHS, *CmpRHS; + + // Canonicalize sign function ashr patterns: + // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0) + // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_Value(TrueVal), m_Value(FalseVal))) && + ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) && + match(TrueVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) && + match(FalseVal, m_One())) || + (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) && + match(TrueVal, m_One()) && + match(FalseVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) { + + Function *Scmp = Intrinsic::getOrInsertDeclaration( + SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()}); + return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)}); + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 67e2aae..9c8de45 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2327,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, return ConstantVector::get(NewVecC); } +// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS). +static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, + Constant *Splat, bool SplatLHS, + const DataLayout &DL) { + ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount(); + Constant *LHS = ConstantVector::getSplat(EC, Splat); + Constant *RHS = Vector; + if (!SplatLHS) + std::swap(LHS, RHS); + return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); +} + Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { if (!isa<VectorType>(Inst.getType())) return nullptr; @@ -2338,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { assert(cast<VectorType>(RHS->getType())->getElementCount() == cast<VectorType>(Inst.getType())->getElementCount()); + auto foldConstantsThroughSubVectorInsertSplat = + [&](Value *MaybeSubVector, Value *MaybeSplat, + bool SplatLHS) -> Instruction * { + Value *Idx; + Constant *Splat, *SubVector, *Dest; + if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) || + !match(MaybeSubVector, + m_VectorInsert(m_Constant(Dest), m_Constant(SubVector), + m_Value(Idx)))) + return nullptr; + SubVector = + constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL); + Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL); + if (!SubVector || !Dest) + return nullptr; + auto *InsertVector = + Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx); + return replaceInstUsesWith(Inst, InsertVector); + }; + + // If one operand is a constant splat and the other operand is a + // `vector.insert` where both the destination and subvector are constant, + // apply the operation to both the destination and subvector, returning a new + // constant `vector.insert`. This helps constant folding for scalable vectors. + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false)) + return Folded; + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true)) + return Folded; + // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation // of the results. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b6cbecb..10b03bb 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4); static const Align kShadowTLSAlignment = Align(8); // These constants must be kept in sync with the ones in msan.h. +// TODO: increase size to match SVE/SVE2/SME/SME2 limits static const unsigned kParamTLSSize = 800; static const unsigned kRetvalTLSSize = 800; @@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } } + static bool isAArch64SVCount(Type *Ty) { + if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty)) + return TTy->getName() == "aarch64.svcount"; + return false; + } + + // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka + // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>. + static bool isScalableNonVectorType(Type *Ty) { + if (!isAArch64SVCount(Ty)) + LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty + << "\n"); + + return Ty->isScalableTy() && !isa<VectorType>(Ty); + } + void materializeChecks() { #ifndef NDEBUG // For assert below. @@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } + if (isScalableNonVectorType(OrigTy)) { + LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy + << "\n"); + return OrigTy; + } + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { << *OrigIns << "\n"); return; } -#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + if (isScalableNonVectorType(ShadowTy)) { + LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow + << " before " << *OrigIns << "\n"); + return; + } +#ifndef NDEBUG assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) || isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) && "Can only insert checks for integer, vector, and aggregate shadow " @@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // an extra "select". This results in much more compact IR. // Sa = select Sb, poisoned, (select b, Sc, Sd) Sa1 = getPoisonedShadow(getShadowTy(I.getType())); + } else if (isScalableNonVectorType(I.getType())) { + // This is intended to handle target("aarch64.svcount"), which can't be + // handled in the else branch because of incompatibility with CreateXor + // ("The supported LLVM operations on this type are limited to load, + // store, phi, select and alloca instructions"). + + // TODO: this currently underapproximates. Use Arm SVE EOR in the else + // branch as needed instead. + Sa1 = getCleanShadow(getShadowTy(I.getType())); } else { // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] // If Sb (condition is poisoned), look for bits in c and d that are equal diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a1ad2db..2591df8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4172,11 +4172,6 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector<VPValue *, 16> VPLiveIns; - /// Mapping from SCEVs to the VPValues representing their expansions. - /// NOTE: This mapping is temporary and will be removed once all users have - /// been modeled in VPlan directly. - DenseMap<const SCEV *, VPValue *> SCEVToExpansion; - /// Blocks allocated and owned by the VPlan. They will be deleted once the /// VPlan is destroyed. SmallVector<VPBlockBase *> CreatedBlocks; @@ -4424,15 +4419,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - VPValue *getSCEVExpansion(const SCEV *S) const { - return SCEVToExpansion.lookup(S); - } - - void addSCEVExpansion(const SCEV *S, VPValue *V) { - assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); - SCEVToExpansion[S] = V; - } - /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. VPlan *duplicate(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c385c36..84817d7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -943,12 +943,40 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { } } +/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. +/// Returns an optional pair, where the first element indicates whether it is +/// an intrinsic ID. +static std::optional<std::pair<bool, unsigned>> +getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) { + return TypeSwitch<const VPSingleDefRecipe *, + std::optional<std::pair<bool, unsigned>>>(R) + .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, + VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( + [](auto *I) { return std::make_pair(false, I->getOpcode()); }) + .Case<VPWidenIntrinsicRecipe>([](auto *I) { + return std::make_pair(true, I->getVectorIntrinsicID()); + }) + .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) + .Default([](auto *) { return std::nullopt; }); +} + /// Try to fold \p R using InstSimplifyFolder. Will succeed and return a -/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are -/// foldable live-ins. -static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, - ArrayRef<VPValue *> Operands, - const DataLayout &DL, VPTypeAnalysis &TypeInfo) { +/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p +/// Operands are foldable live-ins. +static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R, + ArrayRef<VPValue *> Operands, + const DataLayout &DL, + VPTypeAnalysis &TypeInfo) { + auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R); + if (!OpcodeOrIID) + return nullptr; + SmallVector<Value *, 4> Ops; for (VPValue *Op : Operands) { if (!Op->isLiveIn() || !Op->getLiveInIRValue()) @@ -956,43 +984,57 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, Ops.push_back(Op->getLiveInIRValue()); } - InstSimplifyFolder Folder(DL); - if (Instruction::isBinaryOp(Opcode)) - return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0], + auto FoldToIRValue = [&]() -> Value * { + InstSimplifyFolder Folder(DL); + if (OpcodeOrIID->first) { + if (R.getNumOperands() != 2) + return nullptr; + unsigned ID = OpcodeOrIID->second; + return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1], + TypeInfo.inferScalarType(&R)); + } + unsigned Opcode = OpcodeOrIID->second; + if (Instruction::isBinaryOp(Opcode)) + return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), + Ops[0], Ops[1]); + if (Instruction::isCast(Opcode)) + return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0], + TypeInfo.inferScalarType(R.getVPSingleValue())); + switch (Opcode) { + case VPInstruction::LogicalAnd: + return Folder.FoldSelect(Ops[0], Ops[1], + ConstantInt::getNullValue(Ops[1]->getType())); + case VPInstruction::Not: + return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], + Constant::getAllOnesValue(Ops[0]->getType())); + case Instruction::Select: + return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ICmp: + case Instruction::FCmp: + return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0], Ops[1]); - if (Instruction::isCast(Opcode)) - return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0], - TypeInfo.inferScalarType(R.getVPSingleValue())); - switch (Opcode) { - case VPInstruction::LogicalAnd: - return Folder.FoldSelect(Ops[0], Ops[1], - ConstantInt::getNullValue(Ops[1]->getType())); - case VPInstruction::Not: - return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], - Constant::getAllOnesValue(Ops[0]->getType())); - case Instruction::Select: - return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); - case Instruction::ICmp: - case Instruction::FCmp: - return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0], - Ops[1]); - case Instruction::GetElementPtr: { - auto &RFlags = cast<VPRecipeWithIRFlags>(R); - auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr()); - return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops), - RFlags.getGEPNoWrapFlags()); - } - case VPInstruction::PtrAdd: - case VPInstruction::WidePtrAdd: - return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0], - Ops[1], - cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()); - // An extract of a live-in is an extract of a broadcast, so return the - // broadcasted element. - case Instruction::ExtractElement: - assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar"); - return Ops[0]; - } + case Instruction::GetElementPtr: { + auto &RFlags = cast<VPRecipeWithIRFlags>(R); + auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr()); + return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], + drop_begin(Ops), RFlags.getGEPNoWrapFlags()); + } + case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: + return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), + Ops[0], Ops[1], + cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()); + // An extract of a live-in is an extract of a broadcast, so return the + // broadcasted element. + case Instruction::ExtractElement: + assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar"); + return Ops[0]; + } + return nullptr; + }; + + if (Value *V = FoldToIRValue()) + return R.getParent()->getPlan()->getOrAddLiveIn(V); return nullptr; } @@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { // Simplification of live-in IR values for SingleDef recipes using // InstSimplifyFolder. - if (TypeSwitch<VPRecipeBase *, bool>(&R) - .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, - VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) { - const DataLayout &DL = - Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); - Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL, - TypeInfo); - if (V) - I->replaceAllUsesWith(Plan->getOrAddLiveIn(V)); - return V; - }) - .Default([](auto *) { return false; })) - return; + const DataLayout &DL = + Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); + if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo)) + return Def->replaceAllUsesWith(V); // Fold PredPHI LiveIn -> LiveIn. if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) { @@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { return Def == getEmptyKey() || Def == getTombstoneKey(); } - /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. - /// Returns an optional pair, where the first element indicates whether it is - /// an intrinsic ID. - static std::optional<std::pair<bool, unsigned>> - getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) { - return TypeSwitch<const VPSingleDefRecipe *, - std::optional<std::pair<bool, unsigned>>>(R) - .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, - VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( - [](auto *I) { return std::make_pair(false, I->getOpcode()); }) - .Case<VPWidenIntrinsicRecipe>([](auto *I) { - return std::make_pair(true, I->getVectorIntrinsicID()); - }) - .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) { - // For recipes that do not directly map to LLVM IR instructions, - // assign opcodes after the last VPInstruction opcode (which is also - // after the last IR Instruction opcode), based on the VPDefID. - return std::make_pair(false, - VPInstruction::OpsEnd + 1 + I->getVPDefID()); - }) - .Default([](auto *) { return std::nullopt; }); - } - /// If recipe \p R will lower to a GEP with a non-i8 source element type, /// return that source element type. static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 06c3d75..fe66f13 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) { } VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { - if (auto *Expanded = Plan.getSCEVExpansion(Expr)) - return Expanded; VPValue *Expanded = nullptr; if (auto *E = dyn_cast<SCEVConstant>(Expr)) Expanded = Plan.getOrAddLiveIn(E->getValue()); @@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } } - Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } |
