aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp43
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp70
9 files changed, 118 insertions, 67 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index ee1fec0..805bdb4 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1350,6 +1350,10 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
BB->getTerminator()->eraseFromParent();
SwitchInst *SI = IRB.CreateSwitch(
IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
+ // We can't know the precise weights here, as they would depend on the value
+ // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
+ setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(),
+ DEBUG_TYPE);
Type *IndexTy = DL.getIndexType(Call->getType());
SmallVector<DominatorTree::UpdateType, 8> Updates;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d1ca0a6..59e103cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -880,11 +880,11 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1);
+ return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);
+ return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 7a979c1..4f94aa2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -23,6 +23,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -62,14 +63,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
public InstVisitor<InstCombinerImpl, Instruction *> {
public:
InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder,
- bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
+ Function &F, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
ProfileSummaryInfo *PSI, const DataLayout &DL,
ReversePostOrderTraversal<BasicBlock *> &RPOT)
- : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
- BFI, BPI, PSI, DL, RPOT) {}
+ : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI,
+ PSI, DL, RPOT) {}
virtual ~InstCombinerImpl() = default;
@@ -469,6 +470,17 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
+ SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
+ const Twine &NameStr = "",
+ InsertPosition InsertBefore = nullptr,
+ Instruction *MDFrom = nullptr) {
+ SelectInst *SI =
+ SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
+ if (!MDFrom)
+ setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
+ return SI;
+ }
+
public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 550f095..d457e0c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1253,7 +1253,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
- return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty));
+ return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f0ddd5c..8fbaf68 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1735,7 +1735,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
Constant *Zero = ConstantInt::getNullValue(BO.getType());
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
- return SelectInst::Create(X, TVal, FVal);
+ return createSelectInst(X, TVal, FVal);
}
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
@@ -5934,8 +5934,8 @@ static bool combineInstructionsOverFunction(
LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
- InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
- ORE, BFI, BPI, PSI, DL, RPOT);
+ InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
+ BPI, PSI, DL, RPOT);
IC.MaxArraySizeForCombine = MaxArraySize;
bool MadeChangeInThisIteration = IC.prepareWorklist(F);
MadeChangeInThisIteration |= IC.run();
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index e5bf2d1..d842275 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -35,6 +35,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
@@ -92,8 +93,10 @@ class GCOVFunction;
class GCOVProfiler {
public:
- GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
- GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
+ GCOVProfiler()
+ : GCOVProfiler(GCOVOptions::getDefault(), *vfs::getRealFileSystem()) {}
+ GCOVProfiler(const GCOVOptions &Opts, vfs::FileSystem &VFS)
+ : Options(Opts), VFS(VFS) {}
bool
runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
@@ -110,6 +113,7 @@ public:
os->write_zeros(4 - s.size() % 4);
}
void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
+ vfs::FileSystem &getVirtualFileSystem() const { return VFS; }
private:
// Create the .gcno files for the Module based on DebugInfo.
@@ -166,6 +170,7 @@ private:
std::vector<Regex> ExcludeRe;
DenseSet<const BasicBlock *> ExecBlocks;
StringMap<bool> InstrumentedFiles;
+ vfs::FileSystem &VFS;
};
struct BBInfo {
@@ -214,10 +219,10 @@ static StringRef getFunctionName(const DISubprogram *SP) {
/// Prefer relative paths in the coverage notes. Clang also may split
/// up absolute paths into a directory and filename component. When
/// the relative path doesn't exist, reconstruct the absolute path.
-static SmallString<128> getFilename(const DIScope *SP) {
+static SmallString<128> getFilename(const DIScope *SP, vfs::FileSystem &VFS) {
SmallString<128> Path;
StringRef RelPath = SP->getFilename();
- if (sys::fs::exists(RelPath))
+ if (VFS.exists(RelPath))
Path = RelPath;
else
sys::path::append(Path, SP->getDirectory(), SP->getFilename());
@@ -357,7 +362,7 @@ namespace {
void writeOut(uint32_t CfgChecksum) {
write(GCOV_TAG_FUNCTION);
- SmallString<128> Filename = getFilename(SP);
+ SmallString<128> Filename = getFilename(SP, P->getVirtualFileSystem());
uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP));
BlockLen += 1 + wordsOfString(Filename) + 4;
@@ -455,7 +460,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
if (FilterRe.empty() && ExcludeRe.empty()) {
return true;
}
- SmallString<128> Filename = getFilename(F.getSubprogram());
+ SmallString<128> Filename = getFilename(F.getSubprogram(), VFS);
auto It = InstrumentedFiles.find(Filename);
if (It != InstrumentedFiles.end()) {
return It->second;
@@ -467,7 +472,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
// Path can be
// /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
// such a case we must get the real_path.
- if (sys::fs::real_path(Filename, RealPath)) {
+ if (VFS.getRealPath(Filename, RealPath)) {
// real_path can fail with path like "foo.c".
RealFilename = Filename;
} else {
@@ -524,9 +529,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
SmallString<128> Filename = CU->getFilename();
sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
StringRef FName = sys::path::filename(Filename);
- SmallString<128> CurPath;
- if (sys::fs::current_path(CurPath))
+ ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
+ if (!CWD)
return std::string(FName);
+ SmallString<128> CurPath{*CWD};
sys::path::append(CurPath, FName);
return std::string(CurPath);
}
@@ -554,7 +560,7 @@ bool GCOVProfiler::runOnModule(
PreservedAnalyses GCOVProfilerPass::run(Module &M,
ModuleAnalysisManager &AM) {
- GCOVProfiler Profiler(GCOVOpts);
+ GCOVProfiler Profiler(GCOVOpts, *VFS);
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -789,7 +795,7 @@ bool GCOVProfiler::emitProfileNotes(
// Add the function line number to the lines of the entry block
// to have a counter for the function definition.
uint32_t Line = SP->getLine();
- auto Filename = getFilename(SP);
+ auto Filename = getFilename(SP, VFS);
BranchProbabilityInfo *BPI = GetBPI(F);
BlockFrequencyInfo *BFI = GetBFI(F);
@@ -881,7 +887,7 @@ bool GCOVProfiler::emitProfileNotes(
if (SP != getDISubprogram(Scope))
continue;
- GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope()));
+ GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope(), VFS));
Lines.addLine(Loc.getLine());
}
Line = 0;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2d84b4a..216bdf4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -84,7 +84,6 @@
#include <cstdint>
#include <iterator>
#include <map>
-#include <numeric>
#include <optional>
#include <set>
#include <tuple>
@@ -6356,25 +6355,25 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
if (DefaultResult) {
Value *ValueCompare =
Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
- SelectInst *SelectValueInst = cast<SelectInst>(Builder.CreateSelect(
- ValueCompare, ResultVector[1].first, DefaultResult, "switch.select"));
- SelectValue = SelectValueInst;
- if (HasBranchWeights) {
+ SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
+ DefaultResult, "switch.select");
+ if (auto *SI = dyn_cast<SelectInst>(SelectValue);
+ SI && HasBranchWeights) {
// We start with 3 probabilities, where the numerator is the
// corresponding BranchWeights[i], and the denominator is the sum over
// BranchWeights. We want the probability and negative probability of
// Condition == SecondCase.
assert(BranchWeights.size() == 3);
- setBranchWeights(SelectValueInst, BranchWeights[2],
+ setBranchWeights(SI, BranchWeights[2],
BranchWeights[0] + BranchWeights[1],
/*IsExpected=*/false);
}
}
Value *ValueCompare =
Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
- SelectInst *Ret = cast<SelectInst>(Builder.CreateSelect(
- ValueCompare, ResultVector[0].first, SelectValue, "switch.select"));
- if (HasBranchWeights) {
+ Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
+ SelectValue, "switch.select");
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
// We may have had a DefaultResult. Base the position of the first and
// second's branch weights accordingly. Also the proability that Condition
// != FirstCase needs to take that into account.
@@ -6382,7 +6381,7 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
size_t FirstCasePos = (Condition != nullptr);
size_t SecondCasePos = FirstCasePos + 1;
uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
- setBranchWeights(Ret, BranchWeights[FirstCasePos],
+ setBranchWeights(SI, BranchWeights[FirstCasePos],
DefaultCase + BranchWeights[SecondCasePos],
/*IsExpected=*/false);
}
@@ -6422,13 +6421,13 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Value *And = Builder.CreateAnd(Condition, AndMask);
Value *Cmp = Builder.CreateICmpEQ(
And, Constant::getIntegerValue(And->getType(), AndMask));
- SelectInst *Ret = cast<SelectInst>(
- Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
- if (HasBranchWeights) {
+ Value *Ret =
+ Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
// We know there's a Default case. We base the resulting branch
// weights off its probability.
assert(BranchWeights.size() >= 2);
- setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
+ setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
BranchWeights[0], /*IsExpected=*/false);
}
return Ret;
@@ -6448,11 +6447,11 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
Value *Cmp = Builder.CreateICmpEQ(
And, Constant::getNullValue(And->getType()), "switch.selectcmp");
- SelectInst *Ret = cast<SelectInst>(
- Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
- if (HasBranchWeights) {
+ Value *Ret =
+ Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
assert(BranchWeights.size() >= 2);
- setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
+ setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
BranchWeights[0], /*IsExpected=*/false);
}
return Ret;
@@ -6466,11 +6465,11 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
"switch.selectcmp.case2");
Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
- SelectInst *Ret = cast<SelectInst>(
- Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
- if (HasBranchWeights) {
+ Value *Ret =
+ Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
assert(BranchWeights.size() >= 2);
- setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
+ setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
BranchWeights[0], /*IsExpected=*/false);
}
return Ret;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 065622e..c547662 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1100,7 +1100,9 @@ class BinOpSameOpcodeHelper {
// constant + x cannot be -constant - x
// instead, it should be x - -constant
if (Pos == 1 ||
- (FromOpcode == Instruction::Add && ToOpcode == Instruction::Sub))
+ ((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or ||
+ FromOpcode == Instruction::Xor) &&
+ ToOpcode == Instruction::Sub))
return SmallVector<Value *>({LHS, RHS});
return SmallVector<Value *>({RHS, LHS});
}
@@ -1188,6 +1190,10 @@ public:
if (CIValue.isAllOnes())
InterchangeableMask = CanBeAll;
break;
+ case Instruction::Xor:
+ if (CIValue.isZero())
+ InterchangeableMask = XorBIT | OrBIT | AndBIT | SubBIT | AddBIT;
+ break;
default:
if (CIValue.isZero())
InterchangeableMask = CanBeAll;
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0ef933f..32704bd 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2487,21 +2487,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
return false;
+ // Check whether this is a binary shuffle.
+ bool IsBinaryShuffle = !isa<UndefValue>(V1);
+
auto *C0 = dyn_cast<CastInst>(V0);
auto *C1 = dyn_cast<CastInst>(V1);
- if (!C0 || !C1)
+ if (!C0 || (IsBinaryShuffle && !C1))
return false;
Instruction::CastOps Opcode = C0->getOpcode();
- if (C0->getSrcTy() != C1->getSrcTy())
+
+ // If this is allowed, foldShuffleOfCastops can get stuck in a loop
+ // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle.
+ if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
return false;
- // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
- if (Opcode != C1->getOpcode()) {
- if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
- Opcode = Instruction::SExt;
- else
+ if (IsBinaryShuffle) {
+ if (C0->getSrcTy() != C1->getSrcTy())
return false;
+ // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
+ if (Opcode != C1->getOpcode()) {
+ if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
+ Opcode = Instruction::SExt;
+ else
+ return false;
+ }
}
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
@@ -2544,23 +2554,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
InstructionCost CostC0 =
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
TTI::CastContextHint::None, CostKind);
- InstructionCost CostC1 =
- TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
- TTI::CastContextHint::None, CostKind);
- InstructionCost OldCost = CostC0 + CostC1;
- OldCost +=
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
- CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I);
- InstructionCost NewCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy,
- CastSrcTy, NewMask, CostKind);
+ TargetTransformInfo::ShuffleKind ShuffleKind;
+ if (IsBinaryShuffle)
+ ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc;
+ else
+ ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc;
+
+ InstructionCost OldCost = CostC0;
+ OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask,
+ CostKind, 0, nullptr, {}, &I);
+
+ InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy,
+ CastSrcTy, NewMask, CostKind);
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
TTI::CastContextHint::None, CostKind);
if (!C0->hasOneUse())
NewCost += CostC0;
- if (!C1->hasOneUse())
- NewCost += CostC1;
+ if (IsBinaryShuffle) {
+ InstructionCost CostC1 =
+ TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
+ TTI::CastContextHint::None, CostKind);
+ OldCost += CostC1;
+ if (!C1->hasOneUse())
+ NewCost += CostC1;
+ }
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
@@ -2568,14 +2586,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (NewCost > OldCost)
return false;
- Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0),
- C1->getOperand(0), NewMask);
+ Value *Shuf;
+ if (IsBinaryShuffle)
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0),
+ NewMask);
+ else
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask);
+
Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
// Intersect flags from the old casts.
if (auto *NewInst = dyn_cast<Instruction>(Cast)) {
NewInst->copyIRFlags(C0);
- NewInst->andIRFlags(C1);
+ if (IsBinaryShuffle)
+ NewInst->andIRFlags(C1);
}
Worklist.pushValue(Shuf);
@@ -4433,7 +4457,7 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
// Create new mask using difference of the two incoming masks.
int MaskOffset = NewMask[0u];
- unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
+ unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
NewMask.clear();
for (unsigned I = 0u; I < InputNumElements; ++I) {