aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp38
-rw-r--r--llvm/lib/Target/TargetMachine.cpp2
2 files changed, 11 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 92a587b..280fbe2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1384,6 +1384,11 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
if (TM->getTargetTriple().isAMDGCN() && EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass());
+ TargetPassConfig::addCodeGenPrepare();
+
+ if (isPassEnabled(EnableLoadStoreVectorizer))
+ addPass(createLoadStoreVectorizerPass());
+
if (TM->getTargetTriple().isAMDGCN()) {
// This lowering has been placed after codegenprepare to take advantage of
// address mode matching (which is why it isn't put with the LDS lowerings).
@@ -1392,15 +1397,6 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
// but has been put before switch lowering and CFG flattening so that those
// passes can run on the more optimized control flow this pass creates in
// many cases.
- //
- // FIXME: This should ideally be put after the LoadStoreVectorizer.
- // However, due to some annoying facts about ResourceUsageAnalysis,
- // (especially as exercised in the resource-usage-dead-function test),
- // we need all the function passes codegenprepare all the way through
- // said resource usage analysis to run on the call graph produced
- // before codegenprepare runs (because codegenprepare will knock some
- // nodes out of the graph, which leads to function-level passes not
- // being run on them, which causes crashes in the resource usage analysis).
addPass(createAMDGPULowerBufferFatPointersPass());
addPass(createAMDGPULowerIntrinsicsLegacyPass());
// In accordance with the above FIXME, manually force all the
@@ -1408,11 +1404,6 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
addPass(new DummyCGSCCPass());
}
- TargetPassConfig::addCodeGenPrepare();
-
- if (isPassEnabled(EnableLoadStoreVectorizer))
- addPass(createLoadStoreVectorizerPass());
-
// LowerSwitch pass may introduce unreachable blocks that can
// cause unexpected behavior for subsequent passes. Placing it
// here seems better that these blocks would get cleaned up by
@@ -2125,6 +2116,11 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
if (EnableLowerKernelArguments)
addPass(AMDGPULowerKernelArgumentsPass(TM));
+ Base::addCodeGenPrepare(addPass);
+
+ if (isPassEnabled(EnableLoadStoreVectorizer))
+ addPass(LoadStoreVectorizerPass());
+
// This lowering has been placed after codegenprepare to take advantage of
// address mode matching (which is why it isn't put with the LDS lowerings).
// It could be placed anywhere before uniformity annotations (an analysis
@@ -2132,25 +2128,11 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
// but has been put before switch lowering and CFG flattening so that those
// passes can run on the more optimized control flow this pass creates in
// many cases.
- //
- // FIXME: This should ideally be put after the LoadStoreVectorizer.
- // However, due to some annoying facts about ResourceUsageAnalysis,
- // (especially as exercised in the resource-usage-dead-function test),
- // we need all the function passes codegenprepare all the way through
- // said resource usage analysis to run on the call graph produced
- // before codegenprepare runs (because codegenprepare will knock some
- // nodes out of the graph, which leads to function-level passes not
- // being run on them, which causes crashes in the resource usage analysis).
addPass(AMDGPULowerBufferFatPointersPass(TM));
addPass.requireCGSCCOrder();
addPass(AMDGPULowerIntrinsicsPass(TM));
- Base::addCodeGenPrepare(addPass);
-
- if (isPassEnabled(EnableLoadStoreVectorizer))
- addPass(LoadStoreVectorizerPass());
-
// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
// behavior for subsequent passes. Placing it here seems better that these
// blocks would get cleaned up by UnreachableBlockElim inserted next in the
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index ad7e503..cf85691 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -27,7 +27,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
-cl::opt<bool> NoKernelInfoEndLTO(
+cl::opt<bool> llvm::NoKernelInfoEndLTO(
"no-kernel-info-end-lto",
cl::desc("remove the kernel-info pass at the end of the full LTO pipeline"),
cl::init(false), cl::Hidden);