diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Target/TargetMachine.cpp | 2 |
2 files changed, 11 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 92a587b..280fbe2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1384,6 +1384,11 @@ void AMDGPUPassConfig::addCodeGenPrepare() { if (TM->getTargetTriple().isAMDGCN() && EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); + TargetPassConfig::addCodeGenPrepare(); + + if (isPassEnabled(EnableLoadStoreVectorizer)) + addPass(createLoadStoreVectorizerPass()); + if (TM->getTargetTriple().isAMDGCN()) { // This lowering has been placed after codegenprepare to take advantage of // address mode matching (which is why it isn't put with the LDS lowerings). @@ -1392,15 +1397,6 @@ void AMDGPUPassConfig::addCodeGenPrepare() { // but has been put before switch lowering and CFG flattening so that those // passes can run on the more optimized control flow this pass creates in // many cases. - // - // FIXME: This should ideally be put after the LoadStoreVectorizer. - // However, due to some annoying facts about ResourceUsageAnalysis, - // (especially as exercised in the resource-usage-dead-function test), - // we need all the function passes codegenprepare all the way through - // said resource usage analysis to run on the call graph produced - // before codegenprepare runs (because codegenprepare will knock some - // nodes out of the graph, which leads to function-level passes not - // being run on them, which causes crashes in the resource usage analysis). addPass(createAMDGPULowerBufferFatPointersPass()); addPass(createAMDGPULowerIntrinsicsLegacyPass()); // In accordance with the above FIXME, manually force all the @@ -1408,11 +1404,6 @@ void AMDGPUPassConfig::addCodeGenPrepare() { addPass(new DummyCGSCCPass()); } - TargetPassConfig::addCodeGenPrepare(); - - if (isPassEnabled(EnableLoadStoreVectorizer)) - addPass(createLoadStoreVectorizerPass()); - // LowerSwitch pass may introduce unreachable blocks that can // cause unexpected behavior for subsequent passes. Placing it // here seems better that these blocks would get cleaned up by @@ -2125,6 +2116,11 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { if (EnableLowerKernelArguments) addPass(AMDGPULowerKernelArgumentsPass(TM)); + Base::addCodeGenPrepare(addPass); + + if (isPassEnabled(EnableLoadStoreVectorizer)) + addPass(LoadStoreVectorizerPass()); + // This lowering has been placed after codegenprepare to take advantage of // address mode matching (which is why it isn't put with the LDS lowerings). // It could be placed anywhere before uniformity annotations (an analysis @@ -2132,25 +2128,11 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { // but has been put before switch lowering and CFG flattening so that those // passes can run on the more optimized control flow this pass creates in // many cases. - // - // FIXME: This should ideally be put after the LoadStoreVectorizer. - // However, due to some annoying facts about ResourceUsageAnalysis, - // (especially as exercised in the resource-usage-dead-function test), - // we need all the function passes codegenprepare all the way through - // said resource usage analysis to run on the call graph produced - // before codegenprepare runs (because codegenprepare will knock some - // nodes out of the graph, which leads to function-level passes not - // being run on them, which causes crashes in the resource usage analysis). addPass(AMDGPULowerBufferFatPointersPass(TM)); addPass.requireCGSCCOrder(); addPass(AMDGPULowerIntrinsicsPass(TM)); - Base::addCodeGenPrepare(addPass); - - if (isPassEnabled(EnableLoadStoreVectorizer)) - addPass(LoadStoreVectorizerPass()); - // LowerSwitch pass may introduce unreachable blocks that can cause unexpected // behavior for subsequent passes. Placing it here seems better that these // blocks would get cleaned up by UnreachableBlockElim inserted next in the diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index ad7e503..cf85691 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -27,7 +27,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; -cl::opt<bool> NoKernelInfoEndLTO( +cl::opt<bool> llvm::NoKernelInfoEndLTO( "no-kernel-info-end-lto", cl::desc("remove the kernel-info pass at the end of the full LTO pipeline"), cl::init(false), cl::Hidden); |